merge fullstack_dynamic_deployemt -- batch 3
diff --git a/genomix/HyracksCodeFormatProfile.xml b/genomix/HyracksCodeFormatProfile.xml
new file mode 100644
index 0000000..2cde66d
--- /dev/null
+++ b/genomix/HyracksCodeFormatProfile.xml
@@ -0,0 +1,279 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<profiles version="11">
+<profile kind="CodeFormatterProfile" name="HyracksCodeFormatProfile" version="11">
+<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.disabling_tag" value="@formatter:off"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.use_on_off_tags" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_ellipsis" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression" value="80"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_binary_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.continuation_indentation" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_binary_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package" value="0"/>
+<setting id="org.eclipse.jdt.core.compiler.source" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_line_comments" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.join_wrapped_lines" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.align_type_members_on_columns" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.indent_parameter_description" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.lineSplit" value="120"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.enabling_tag" value="@formatter:on"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_assignment" value="0"/>
+<setting id="org.eclipse.jdt.core.compiler.problem.assertIdentifier" value="error"/>
+<setting id="org.eclipse.jdt.core.formatter.tabulation.char" value="space"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_method_declaration" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.compiler.problem.enumIdentifier" value="error"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_ellipsis" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.compact_else_if" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.indent_root_tags" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.tabulation.size" value="4"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_empty_lines" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.compiler.compliance" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode" value="enabled"/>
+<setting id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_label" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant" value="48"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.line_length" value="9999"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_import_groups" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.wrap_before_binary_operator" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.join_lines_in_comments" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_html" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_source_code" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.compiler.codegen.targetPlatform" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_member" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_header" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_block_comments" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants" value="49"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line" value="false"/>
+</profile>
+</profiles>
diff --git a/genomix/genomix-data/.classpath b/genomix/genomix-data/.classpath
new file mode 100644
index 0000000..e43402f
--- /dev/null
+++ b/genomix/genomix-data/.classpath
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry kind="src" output="target/classes" path="src/main/java">
+		<attributes>
+			<attribute name="optional" value="true"/>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="src" output="target/test-classes" path="src/test/java">
+		<attributes>
+			<attribute name="optional" value="true"/>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="output" path="target/classes"/>
+</classpath>
diff --git a/genomix/genomix-data/.project b/genomix/genomix-data/.project
new file mode 100644
index 0000000..f22376e
--- /dev/null
+++ b/genomix/genomix-data/.project
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>genomix-data</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.m2e.core.maven2Builder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+		<nature>org.eclipse.m2e.core.maven2Nature</nature>
+	</natures>
+</projectDescription>
diff --git a/genomix/genomix-data/.settings/org.eclipse.core.resources.prefs b/genomix/genomix-data/.settings/org.eclipse.core.resources.prefs
new file mode 100644
index 0000000..609d3ca
--- /dev/null
+++ b/genomix/genomix-data/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,4 @@
+eclipse.preferences.version=1
+encoding//src/main/resources=UTF-8
+encoding//src/test/resources=UTF-8
+encoding/<project>=UTF-8
diff --git a/genomix/genomix-data/.settings/org.eclipse.jdt.core.prefs b/genomix/genomix-data/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..ec4300d
--- /dev/null
+++ b/genomix/genomix-data/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,5 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
+org.eclipse.jdt.core.compiler.compliance=1.7
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.7
diff --git a/genomix/genomix-data/.settings/org.eclipse.m2e.core.prefs b/genomix/genomix-data/.settings/org.eclipse.m2e.core.prefs
new file mode 100644
index 0000000..f897a7f
--- /dev/null
+++ b/genomix/genomix-data/.settings/org.eclipse.m2e.core.prefs
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
diff --git a/genomix/genomix-data/HyracksCodeFormatProfile.xml b/genomix/genomix-data/HyracksCodeFormatProfile.xml
new file mode 100644
index 0000000..2cde66d
--- /dev/null
+++ b/genomix/genomix-data/HyracksCodeFormatProfile.xml
@@ -0,0 +1,279 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<profiles version="11">
+<profile kind="CodeFormatterProfile" name="HyracksCodeFormatProfile" version="11">
+<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.disabling_tag" value="@formatter:off"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.use_on_off_tags" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_ellipsis" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression" value="80"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_binary_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.continuation_indentation" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_binary_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package" value="0"/>
+<setting id="org.eclipse.jdt.core.compiler.source" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_line_comments" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.join_wrapped_lines" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.align_type_members_on_columns" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.indent_parameter_description" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.lineSplit" value="120"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.enabling_tag" value="@formatter:on"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_assignment" value="0"/>
+<setting id="org.eclipse.jdt.core.compiler.problem.assertIdentifier" value="error"/>
+<setting id="org.eclipse.jdt.core.formatter.tabulation.char" value="space"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_method_declaration" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.compiler.problem.enumIdentifier" value="error"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_ellipsis" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.compact_else_if" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.indent_root_tags" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.tabulation.size" value="4"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_empty_lines" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.compiler.compliance" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode" value="enabled"/>
+<setting id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_label" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant" value="48"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.line_length" value="9999"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_import_groups" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.wrap_before_binary_operator" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.join_lines_in_comments" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_html" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_source_code" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.compiler.codegen.targetPlatform" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_member" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_header" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_block_comments" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants" value="49"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line" value="false"/>
+</profile>
+</profiles>
diff --git a/genomix/genomix-data/pom.xml b/genomix/genomix-data/pom.xml
new file mode 100644
index 0000000..5ce538f
--- /dev/null
+++ b/genomix/genomix-data/pom.xml
@@ -0,0 +1,47 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+	<artifactId>genomix-data</artifactId>
+	<name>genomix-data</name>
+
+	<parent>
+		<groupId>edu.uci.ics.hyracks</groupId>
+		<artifactId>genomix</artifactId>
+		<version>0.2.4-SNAPSHOT</version>
+	</parent>
+
+	<properties>
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+	</properties>
+
+	<build>
+		<plugins>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-compiler-plugin</artifactId>
+				<version>2.0.2</version>
+				<configuration>
+					<source>1.7</source>
+					<target>1.7</target>
+					<fork>true</fork>
+				</configuration>
+			</plugin>
+		</plugins>
+	</build>
+
+
+	<dependencies>
+		<dependency>
+			<groupId>junit</groupId>
+			<artifactId>junit</artifactId>
+			<version>4.8.1</version>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-core</artifactId>
+			<version>0.20.2</version>
+		</dependency>
+	</dependencies>
+</project>
diff --git a/genomix/genomix-data/src/main/assembly/binary-assembly.xml b/genomix/genomix-data/src/main/assembly/binary-assembly.xml
new file mode 100644
index 0000000..0500499
--- /dev/null
+++ b/genomix/genomix-data/src/main/assembly/binary-assembly.xml
@@ -0,0 +1,19 @@
+<assembly>
+  <id>binary-assembly</id>
+  <formats>
+    <format>zip</format>
+    <format>dir</format>
+  </formats>
+  <includeBaseDirectory>false</includeBaseDirectory>
+  <fileSets>
+    <fileSet>
+      <directory>target/appassembler/bin</directory>
+      <outputDirectory>bin</outputDirectory>
+      <fileMode>0755</fileMode>
+    </fileSet>
+    <fileSet>
+      <directory>target/appassembler/lib</directory>
+      <outputDirectory>lib</outputDirectory>
+    </fileSet>
+  </fileSets>
+</assembly>
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/Kmer.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/Kmer.java
new file mode 100644
index 0000000..21fee2b
--- /dev/null
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/Kmer.java
@@ -0,0 +1,298 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.type;
+
+public class Kmer {
+ 
+	public final static byte[] GENE_SYMBOL = { 'A', 'C', 'G', 'T' };
+
+	public final static class GENE_CODE {
+
+		/**
+		 * make sure this 4 ids equal to the sequence id of char in
+		 * {@GENE_SYMBOL}
+		 */
+		public static final byte A = 0;
+		public static final byte C = 1;
+		public static final byte G = 2;
+		public static final byte T = 3;
+
+		public static byte getCodeFromSymbol(byte ch) {
+			byte r = 0;
+			switch (ch) {
+			case 'A':
+			case 'a':
+				r = A;
+				break;
+			case 'C':
+			case 'c':
+				r = C;
+				break;
+			case 'G':
+			case 'g':
+				r = G;
+				break;
+			case 'T':
+			case 't':
+				r = T;
+				break;
+			}
+			return r;
+		}
+
+		public static byte getSymbolFromCode(byte code) {
+			if (code > 3) {
+				return '!';
+			}
+			return GENE_SYMBOL[code];
+		}
+
+		public static byte getAdjBit(byte t) {
+			byte r = 0;
+			switch (t) {
+			case 'A':
+			case 'a':
+				r = 1 << A;
+				break;
+			case 'C':
+			case 'c':
+				r = 1 << C;
+				break;
+			case 'G':
+			case 'g':
+				r = 1 << G;
+				break;
+			case 'T':
+			case 't':
+				r = 1 << T;
+				break;
+			}
+			return r;
+		}
+
+		/** 
+		 * It works for path merge. 
+		 * Merge the kmer by his next, we need to make sure the @{t} is a single neighbor.
+		 * @param t the neighbor code in BitMap
+		 * @return the genecode 
+		 */
+		public static byte getGeneCodeFromBitMap(byte t) {
+			switch (t) {
+			case 1 << A:
+				return A;
+			case 1 << C:
+				return C;
+			case 1 << G:
+				return G;
+			case 1 << T:
+				return T;
+			}
+			return -1;
+		}
+
+		public static byte mergePreNextAdj(byte pre, byte next) {
+			return (byte) (pre << 4 | (next & 0x0f));
+		}
+
+		public static String getSymbolFromBitMap(byte code) {
+			int left = (code >> 4) & 0x0F;
+			int right = code & 0x0F;
+			StringBuilder str = new StringBuilder();
+			for (int i = A; i <= T; i++) {
+				if ((left & (1 << i)) != 0) {
+					str.append((char) GENE_SYMBOL[i]);
+				}
+			}
+			str.append('|');
+			for (int i = A; i <= T; i++) {
+				if ((right & (1 << i)) != 0) {
+					str.append((char) GENE_SYMBOL[i]);
+				}
+			}
+			return str.toString();
+		}
+	}
+
+	public static String recoverKmerFrom(int k, byte[] keyData, int keyStart,
+			int keyLength) {
+		StringBuilder strKmer = new StringBuilder();
+		int byteId = keyStart + keyLength - 1;
+		byte currentbyte = keyData[byteId];
+		for (int geneCount = 0; geneCount < k; geneCount++) {
+			if (geneCount % 4 == 0 && geneCount > 0) {
+				currentbyte = keyData[--byteId];
+			}
+			strKmer.append((char) GENE_SYMBOL[(currentbyte >> ((geneCount % 4) * 2)) & 0x03]);
+		}
+		return strKmer.toString();
+	}
+
+	public static int getByteNumFromK(int k) {
+		int x = k / 4;
+		if (k % 4 != 0) {
+			x += 1;
+		}
+		return x;
+	}
+
+	/**
+	 * Compress Kmer into bytes array AATAG will compress as [0x000G, 0xATAA]
+	 * 
+	 * @param kmer
+	 * @param input
+	 *            array
+	 * @param start
+	 *            position
+	 * @return initialed kmer array
+	 */
+	public static byte[] compressKmer(int k, byte[] array, int start) {
+		final int byteNum = getByteNumFromK(k);
+		byte[] bytes = new byte[byteNum];
+
+		byte l = 0;
+		int bytecount = 0;
+		int bcount = byteNum - 1;
+		for (int i = start; i < start + k; i++) {
+			byte code = GENE_CODE.getCodeFromSymbol(array[i]);
+			l |= (byte) (code << bytecount);
+			bytecount += 2;
+			if (bytecount == 8) {
+				bytes[bcount--] = l;
+				l = 0;
+				bytecount = 0;
+			}
+		}
+		if (bcount >= 0) {
+			bytes[0] = l;
+		}
+		return bytes;
+	}
+
+	/**
+	 * Shift Kmer to accept new input
+	 * 
+	 * @param kmer
+	 * @param bytes
+	 *            Kmer Array
+	 * @param c
+	 *            Input new gene character
+	 * @return the shiftout gene, in gene code format
+	 */
+	public static byte moveKmer(int k, byte[] kmer, byte c) {
+		int byteNum = kmer.length;
+		byte output = (byte) (kmer[byteNum - 1] & 0x03);
+		for (int i = byteNum - 1; i > 0; i--) {
+			byte in = (byte) (kmer[i - 1] & 0x03);
+			kmer[i] = (byte) (((kmer[i] >>> 2) & 0x3f) | (in << 6));
+		}
+		int pos = ((k - 1) % 4) << 1;
+		byte code = (byte) (GENE_CODE.getCodeFromSymbol(c) << pos);
+		kmer[0] = (byte) (((kmer[0] >>> 2) & 0x3f) | code);
+		return (byte) (1 << output);
+	}
+
+	public static byte reverseKmerByte(byte k) {
+		int x = (((k >> 2) & 0x33) | ((k << 2) & 0xcc));
+		return (byte) (((x >> 4) & 0x0f) | ((x << 4) & 0xf0));
+	}
+
+	public static byte[] reverseKmer(int k, byte[] kmer) {
+		byte[] reverseKmer = new byte[kmer.length];
+
+		int curPosAtKmer = ((k - 1) % 4) << 1;
+		int curByteAtKmer = 0;
+
+		int curPosAtReverse = 0;
+		int curByteAtReverse = reverseKmer.length - 1;
+		reverseKmer[curByteAtReverse] = 0;
+		for (int i = 0; i < k; i++) {
+			byte gene = (byte) ((kmer[curByteAtKmer] >> curPosAtKmer) & 0x03);
+			reverseKmer[curByteAtReverse] |= gene << curPosAtReverse;
+			curPosAtReverse += 2;
+			if (curPosAtReverse >= 8) {
+				curPosAtReverse = 0;
+				reverseKmer[--curByteAtReverse] = 0;
+			}
+			curPosAtKmer -= 2;
+			if (curPosAtKmer < 0) {
+				curPosAtKmer = 6;
+				curByteAtKmer++;
+			}
+		}
+
+		return reverseKmer;
+	}
+
+	/**
+	 * Compress Reversed Kmer into bytes array AATAG will compress as
+	 * [0x000A,0xATAG]
+	 * 
+	 * @param kmer
+	 * @param input
+	 *            array
+	 * @param start
+	 *            position
+	 * @return initialed kmer array
+	 */
+	public static byte[] compressKmerReverse(int k, byte[] array, int start) {
+		final int byteNum = getByteNumFromK(k);
+		byte[] bytes = new byte[byteNum];
+
+		byte l = 0;
+		int bytecount = 0;
+		int bcount = byteNum - 1;
+		for (int i = start + k - 1; i >= 0; i--) {
+			byte code = GENE_CODE.getCodeFromSymbol(array[i]);
+			l |= (byte) (code << bytecount);
+			bytecount += 2;
+			if (bytecount == 8) {
+				bytes[bcount--] = l;
+				l = 0;
+				bytecount = 0;
+			}
+		}
+		if (bcount >= 0) {
+			bytes[0] = l;
+		}
+		return bytes;
+	}
+
+	/**
+	 * Shift Kmer to accept new input
+	 * 
+	 * @param kmer
+	 * @param bytes
+	 *            Kmer Array
+	 * @param c
+	 *            Input new gene character
+	 * @return the shiftout gene, in gene code format
+	 */
+	public static byte moveKmerReverse(int k, byte[] kmer, byte c) {
+		int pos = ((k - 1) % 4) << 1;
+		byte output = (byte) ((kmer[0] >> pos) & 0x03);
+		for (int i = 0; i < kmer.length - 1; i++) {
+			byte in = (byte) ((kmer[i + 1] >> 6) & 0x03);
+			kmer[i] = (byte) ((kmer[i] << 2) | in);
+		}
+		// (k%4) * 2
+		if (k % 4 != 0) {
+			kmer[0] &= (1 << ((k % 4) << 1)) - 1;
+		}
+		kmer[kmer.length - 1] = (byte) ((kmer[kmer.length - 1] << 2) | GENE_CODE
+				.getCodeFromSymbol(c));
+		return (byte) (1 << output);
+	}
+
+}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
new file mode 100644
index 0000000..1d41167
--- /dev/null
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
@@ -0,0 +1,151 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.type;
+
+import java.io.IOException;
+import java.io.DataInput;
+import java.io.DataOutput;
+import org.apache.hadoop.io.BinaryComparable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableComparator;
+
+public class KmerBytesWritable extends BinaryComparable implements
+		WritableComparable<BinaryComparable> {
+	private static final int LENGTH_BYTES = 4;
+	private static final byte[] EMPTY_BYTES = {};
+	private byte size;
+	private byte[] bytes;
+
+	public KmerBytesWritable() {
+		this(EMPTY_BYTES);
+	}
+
+	public KmerBytesWritable(byte[] bytes) {
+		this.bytes = bytes;
+		this.size = (byte) bytes.length;
+	}
+
+	@Override
+	public byte[] getBytes() {
+		return bytes;
+	}
+
+	@Deprecated
+	public byte[] get() {
+		return getBytes();
+	}
+
+	@Override
+	public int getLength() {
+		return (int) size;
+	}
+
+	@Deprecated
+	public int getSize() {
+		return getLength();
+	}
+
+	public void setSize(byte size) {
+		if ((int) size > getCapacity()) {
+			setCapacity((byte) (size * 3 / 2));
+		}
+		this.size = size;
+	}
+
+	public int getCapacity() {
+		return bytes.length;
+	}
+
+	public void setCapacity(byte new_cap) {
+		if (new_cap != getCapacity()) {
+			byte[] new_data = new byte[new_cap];
+			if (new_cap < size) {
+				size = new_cap;
+			}
+			if (size != 0) {
+				System.arraycopy(bytes, 0, new_data, 0, size);
+			}
+			bytes = new_data;
+		}
+	}
+
+	public void set(KmerBytesWritable newData) {
+		set(newData.bytes, (byte) 0, newData.size);
+	}
+
+	public void set(byte[] newData, byte offset, byte length) {
+		setSize((byte) 0);
+		setSize(length);
+		System.arraycopy(newData, offset, bytes, 0, size);
+	}
+
+	public void readFields(DataInput in) throws IOException {
+		setSize((byte) 0); // clear the old data
+		setSize(in.readByte());
+		in.readFully(bytes, 0, size);
+	}
+
+	@Override
+	public void write(DataOutput out) throws IOException {
+		out.writeByte(size);
+		out.write(bytes, 0, size);
+	}
+
+	@Override
+	public int hashCode() {
+		return super.hashCode();
+	}
+
+	@Override
+	public boolean equals(Object right_obj) {
+		if (right_obj instanceof KmerBytesWritable)
+			return super.equals(right_obj);
+		return false;
+	}
+
+	@Override
+	public String toString() {
+		StringBuffer sb = new StringBuffer(3 * size);
+		for (int idx = 0; idx < (int) size; idx++) {
+			// if not the first, put a blank separator in
+			if (idx != 0) {
+				sb.append(' ');
+			}
+			String num = Integer.toHexString(0xff & bytes[idx]);
+			// if it is only one digit, add a leading 0.
+			if (num.length() < 2) {
+				sb.append('0');
+			}
+			sb.append(num);
+		}
+		return sb.toString();
+	}
+
+	public static class Comparator extends WritableComparator {
+		public Comparator() {
+			super(KmerBytesWritable.class);
+		}
+
+		public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+			return compareBytes(b1, s1 + LENGTH_BYTES, l1 - LENGTH_BYTES, b2,
+					s2 + LENGTH_BYTES, l2 - LENGTH_BYTES);
+		}
+	}
+
+	static { // register this comparator
+		WritableComparator.define(KmerBytesWritable.class, new Comparator());
+	}
+
+}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerCountValue.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerCountValue.java
new file mode 100644
index 0000000..87eaa87
--- /dev/null
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerCountValue.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.type;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Writable;
+
+public class KmerCountValue implements Writable {
+	private byte adjBitMap;
+	private byte count;
+
+	public KmerCountValue(byte bitmap, byte count) {
+		set(bitmap, count);
+	}
+
+	public KmerCountValue() {
+		adjBitMap = 0;
+		count = 0;
+	}
+
+	@Override
+	public void readFields(DataInput arg0) throws IOException {
+		adjBitMap = arg0.readByte();
+		count = arg0.readByte();
+	}
+
+	@Override
+	public void write(DataOutput arg0) throws IOException {
+		arg0.writeByte(adjBitMap);
+		arg0.writeByte(count);
+	}
+
+	@Override
+	public String toString() {
+		return Kmer.GENE_CODE.getSymbolFromBitMap(adjBitMap) + '\t'
+				+ String.valueOf(count);
+	}
+
+	public void set(byte bitmap, byte count) {
+		this.adjBitMap = bitmap;
+		this.count = count;
+	}
+
+	public byte getAdjBitMap() {
+		return adjBitMap;
+	}
+
+	public void setAdjBitMap(byte adjBitMap) {
+		this.adjBitMap = adjBitMap;
+	}
+
+	public byte getCount() {
+		return count;
+	}
+}
\ No newline at end of file
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerUtil.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerUtil.java
new file mode 100644
index 0000000..1c8c46e
--- /dev/null
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerUtil.java
@@ -0,0 +1,222 @@
+package edu.uci.ics.genomix.type;
+
+import java.util.Arrays;
+
+public class KmerUtil {
+
+	public static int countNumberOfBitSet(int i) {
+		int c = 0;
+		for (; i != 0; c++) {
+			i &= i - 1;
+		}
+		return c;
+	}
+
+	public static int inDegree(byte bitmap) {
+		return countNumberOfBitSet((bitmap >> 4) & 0x0f);
+	}
+
+	public static int outDegree(byte bitmap) {
+		return countNumberOfBitSet(bitmap & 0x0f);
+	}
+
+	/**
+	 * Get last kmer from kmer-chain. 
+	 * e.g. kmerChain is AAGCTA, if k =5, it will
+	 * return AGCTA
+	 * @param k
+	 * @param kInChain
+	 * @param kmerChain
+	 * @return LastKmer bytes array
+	 */
+	public static byte[] getLastKmerFromChain(int k, int kInChain,
+			byte[] kmerChain, int offset, int length) {
+		if (k > kInChain) {
+			return null;
+		}
+		if (k == kInChain) {
+			return kmerChain.clone();
+		}
+		int byteNum = Kmer.getByteNumFromK(k);
+		byte[] kmer = new byte[byteNum];
+
+		/** from end to start */
+		int byteInChain = length - 1 - (kInChain - k) / 4;
+		int posInByteOfChain = ((kInChain - k) % 4) << 1; // *2
+		int byteInKmer = byteNum - 1;
+		for (; byteInKmer >= 0 && byteInChain > 0; byteInKmer--, byteInChain--) {
+			kmer[byteInKmer] = (byte) ((0xff & kmerChain[offset + byteInChain]) >> posInByteOfChain);
+			kmer[byteInKmer] |= ((kmerChain[offset + byteInChain - 1] << (8 - posInByteOfChain)));
+		}
+
+		/** last kmer byte */
+		if (byteInKmer == 0) {
+			kmer[0] = (byte) ((kmerChain[offset] & 0xff) >> posInByteOfChain);
+		}
+		return kmer;
+	}
+
+	/**
+	 * Get first kmer from kmer-chain e.g. kmerChain is AAGCTA, if k=5, it will
+	 * return AAGCT
+	 * 
+	 * @param k
+	 * @param kInChain
+	 * @param kmerChain
+	 * @return FirstKmer bytes array
+	 */
+	public static byte[] getFirstKmerFromChain(int k, int kInChain,
+			byte[] kmerChain, int offset, int length) {
+		if (k > kInChain) {
+			return null;
+		}
+		if (k == kInChain) {
+			return kmerChain.clone();
+		}
+		int byteNum = Kmer.getByteNumFromK(k);
+		byte[] kmer = new byte[byteNum];
+
+		int i = 1;
+		for (; i < kmer.length; i++) {
+			kmer[kmer.length - i] = kmerChain[offset + length - i];
+		}
+		int posInByteOfChain = (k % 4) << 1; // *2
+		if (posInByteOfChain == 0) {
+			kmer[0] = kmerChain[offset + length - i];
+		} else {
+			kmer[0] = (byte) (kmerChain[offset + length - i] & ((1 << posInByteOfChain) - 1));
+		}
+		return kmer;
+	}
+
+	/**
+	 * Merge kmer with next neighbor in gene-code format.
+	 * The k of new kmer will increase by 1
+	 * e.g. AAGCT merge with A => AAGCTA
+	 * @param k :input k of kmer
+	 * @param kmer : input bytes of kmer
+	 * @param nextCode: next neighbor in gene-code format
+	 * @return the merged Kmer, this K of this Kmer is k+1
+	 */
+	public static byte[] mergeKmerWithNextCode(int k, byte[] kmer, int offset, int length, byte nextCode) {
+		int byteNum = length;
+		if (k % 4 == 0) {
+			byteNum++;
+		}
+		byte[] mergedKmer = new byte[byteNum];
+		for (int i = 1; i <= length; i++) {
+			mergedKmer[mergedKmer.length - i] = kmer[offset + length - i];
+		}
+		if (mergedKmer.length > length) {
+			mergedKmer[0] = (byte) (nextCode & 0x3);
+		} else {
+			mergedKmer[0] = (byte) (kmer[offset] | ((nextCode & 0x3) << ((k % 4) << 1)));
+		}
+		return mergedKmer;
+	}
+
+	/**
+	 * Merge kmer with previous neighbor in gene-code format.
+	 * The k of new kmer will increase by 1
+	 * e.g. AAGCT merge with A => AAAGCT
+	 * @param k :input k of kmer
+	 * @param kmer : input bytes of kmer
+	 * @param preCode: next neighbor in gene-code format
+	 * @return the merged Kmer,this K of this Kmer is k+1
+	 */
+	public static byte[] mergeKmerWithPreCode(int k, byte[] kmer, int offset, int length, byte preCode) {
+		int byteNum = length;
+		byte[] mergedKmer = null;
+		int byteInMergedKmer = 0;
+		if (k % 4 == 0) {
+			byteNum++;
+			mergedKmer = new byte[byteNum];
+			mergedKmer[0] = (byte) ((kmer[offset] >> 6) & 0x3);
+			byteInMergedKmer++;
+		} else {
+			mergedKmer = new byte[byteNum];
+		}
+		for (int i = 0; i < length - 1; i++, byteInMergedKmer++) {
+			mergedKmer[byteInMergedKmer] = (byte) ((kmer[offset + i] << 2) | ((kmer[offset + i + 1] >> 6) & 0x3));
+		}
+		mergedKmer[byteInMergedKmer] = (byte) ((kmer[offset + length - 1] << 2) | (preCode & 0x3));
+		return mergedKmer;
+	}
+
+	/**
+	 * Merge two kmer to one kmer
+	 * e.g. ACTA + ACCGT => ACTAACCGT
+	 * @param preK : previous k of kmer
+	 * @param kmerPre : bytes array of previous kmer
+	 * @param nextK : next k of kmer
+	 * @param kmerNext : bytes array of next kmer
+	 * @return merged kmer, the new k is @preK + @nextK
+	 */
+	public static byte[] mergeTwoKmer(int preK, byte[] kmerPre, int offsetPre, int lengthPre, int nextK,
+			byte[] kmerNext, int offsetNext, int lengthNext) {
+		int byteNum = Kmer.getByteNumFromK(preK + nextK);
+		byte[] mergedKmer = new byte[byteNum];
+		int i = 1;
+		for (; i <= lengthPre; i++) {
+			mergedKmer[byteNum - i] = kmerPre[offsetPre + lengthPre - i];
+		}
+		if ( i > 1){
+			i--;
+		}
+		if (preK % 4 == 0) {
+			for (int j = 1; j <= lengthNext; j++) {
+				mergedKmer[byteNum - i - j] = kmerNext[offsetNext + lengthNext - j];
+			}
+		} else {
+			int posNeedToMove = ((preK % 4) << 1);
+			mergedKmer[byteNum - i] |= kmerNext[offsetNext + lengthNext - 1] << posNeedToMove;
+			for (int j = 1; j < lengthNext; j++) {
+				mergedKmer[byteNum - i - j] = (byte) (((kmerNext[offsetNext + lengthNext
+						- j] & 0xff) >> (8 - posNeedToMove)) | (kmerNext[offsetNext + lengthNext
+						- j - 1] << posNeedToMove));
+			}
+			if ( nextK % 4 == 0 || (nextK % 4) * 2 + posNeedToMove > 8) {
+				mergedKmer[0] = (byte) ((0xff & kmerNext[offsetNext] )>> (8 - posNeedToMove));
+			}
+		}
+		return mergedKmer;
+	}
+	
+	/**
+	 * Safely shifted the kmer forward without change the input kmer
+	 * e.g. AGCGC shift with T => GCGCT
+	 * @param k: kmer length
+	 * @param kmer: input kmer
+	 * @param afterCode: input genecode 
+	 * @return new created kmer that shifted by afterCode, the K will not change
+	 */
+	public static byte[] shiftKmerWithNextCode(int k, final byte[] kmer, int offset, int length, byte afterCode){
+		byte[] shifted = Arrays.copyOfRange(kmer, offset, offset+length);
+		Kmer.moveKmer(k, shifted, Kmer.GENE_CODE.getSymbolFromCode(afterCode));
+		return shifted;
+	}
+	
+	/**
+	 * Safely shifted the kmer backward without change the input kmer
+	 * e.g. AGCGC shift with T => TAGCG
+	 * @param k: kmer length
+	 * @param kmer: input kmer
+	 * @param preCode: input genecode 
+	 * @return new created kmer that shifted by preCode, the K will not change
+	 */
+	public static byte[] shiftKmerWithPreCode(int k, final byte[] kmer, int offset, int length, byte preCode){
+		byte[] shifted = Arrays.copyOfRange(kmer, offset, offset+length);
+		Kmer.moveKmerReverse(k, shifted, Kmer.GENE_CODE.getSymbolFromCode(preCode));
+		return shifted;
+	}
+
+	public static byte getGeneCodeAtPosition(int pos, int k, final byte[] kmer,
+			int offset, int length) {
+		if (pos >= k) {
+			return -1;
+		}
+		int posByte = pos / 4;
+		int shift = (pos  % 4) << 1;
+		return (byte) ((kmer[offset + length - 1 - posByte] >> shift) & 0x3);
+	}
+}
diff --git a/genomix/genomix-data/src/main/resources/conf/cluster.properties b/genomix/genomix-data/src/main/resources/conf/cluster.properties
new file mode 100644
index 0000000..eabd81b
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/conf/cluster.properties
@@ -0,0 +1,40 @@
+#The CC port for Hyracks clients
+CC_CLIENTPORT=3099
+
+#The CC port for Hyracks cluster management
+CC_CLUSTERPORT=1099
+
+#The directory of hyracks binaries
+HYRACKS_HOME="../../../../hyracks"
+
+#The tmp directory for cc to install jars
+CCTMP_DIR=/tmp/t1
+
+#The tmp directory for nc to install jars
+NCTMP_DIR=/tmp/t2
+
+#The directory to put cc logs
+CCLOGS_DIR=$CCTMP_DIR/logs
+
+#The directory to put nc logs
+NCLOGS_DIR=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS="/tmp/t3,/tmp/t4"
+
+#The JAVA_HOME
+JAVA_HOME=$JAVA_HOME
+
+#HADOOP_HOME 
+CLASSPATH="${HADOOP_HOME}:${CLASSPATH}:."
+
+#The frame size of the internal dataflow engine
+FRAME_SIZE=65536
+
+#CC JAVA_OPTS
+CCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7001,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+# Yourkit option: -agentpath:/grid/0/dev/vborkar/tools/yjp-10.0.4/bin/linux-x86-64/libyjpagent.so=port=20001"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7002,server=y,suspend=n -Xmx10g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/genomix/genomix-data/src/main/resources/conf/debugnc.properties b/genomix/genomix-data/src/main/resources/conf/debugnc.properties
new file mode 100644
index 0000000..27afa26
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/conf/debugnc.properties
@@ -0,0 +1,12 @@
+#The tmp directory for nc to install jars
+NCTMP_DIR2=/tmp/t-1
+
+#The directory to put nc logs
+NCLOGS_DIR2=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS2="/tmp/t-2,/tmp/t-3"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS2="-Xdebug -Xrunjdwp:transport=dt_socket,address=7003,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/genomix/genomix-data/src/main/resources/conf/master b/genomix/genomix-data/src/main/resources/conf/master
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/conf/master
@@ -0,0 +1 @@
+localhost
diff --git a/genomix/genomix-data/src/main/resources/conf/slaves b/genomix/genomix-data/src/main/resources/conf/slaves
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/conf/slaves
@@ -0,0 +1 @@
+localhost
diff --git a/genomix/genomix-data/src/main/resources/scripts/genomix b/genomix/genomix-data/src/main/resources/scripts/genomix
new file mode 100644
index 0000000..bdd7f20
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/genomix
@@ -0,0 +1,113 @@
+#!/bin/sh
+# ----------------------------------------------------------------------------
+#  Copyright 2001-2006 The Apache Software Foundation.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+# ----------------------------------------------------------------------------
+#
+#   Copyright (c) 2001-2006 The Apache Software Foundation.  All rights
+#   reserved.
+
+
+# resolve links - $0 may be a softlink
+PRG="$0"
+
+while [ -h "$PRG" ]; do
+  ls=`ls -ld "$PRG"`
+  link=`expr "$ls" : '.*-> \(.*\)$'`
+  if expr "$link" : '/.*' > /dev/null; then
+    PRG="$link"
+  else
+    PRG=`dirname "$PRG"`/"$link"
+  fi
+done
+
+PRGDIR=`dirname "$PRG"`
+BASEDIR=`cd "$PRGDIR/.." >/dev/null; pwd`
+
+
+
+# OS specific support.  $var _must_ be set to either true or false.
+cygwin=false;
+darwin=false;
+case "`uname`" in
+  CYGWIN*) cygwin=true ;;
+  Darwin*) darwin=true
+           if [ -z "$JAVA_VERSION" ] ; then
+             JAVA_VERSION="CurrentJDK"
+           else
+             echo "Using Java version: $JAVA_VERSION"
+           fi
+           if [ -z "$JAVA_HOME" ] ; then
+             JAVA_HOME=/System/Library/Frameworks/JavaVM.framework/Versions/${JAVA_VERSION}/Home
+           fi
+           ;;
+esac
+
+if [ -z "$JAVA_HOME" ] ; then
+  if [ -r /etc/gentoo-release ] ; then
+    JAVA_HOME=`java-config --jre-home`
+  fi
+fi
+
+# For Cygwin, ensure paths are in UNIX format before anything is touched
+if $cygwin ; then
+  [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
+  [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
+fi
+
+# If a specific java binary isn't specified search for the standard 'java' binary
+if [ -z "$JAVACMD" ] ; then
+  if [ -n "$JAVA_HOME"  ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+      # IBM's JDK on AIX uses strange locations for the executables
+      JAVACMD="$JAVA_HOME/jre/sh/java"
+    else
+      JAVACMD="$JAVA_HOME/bin/java"
+    fi
+  else
+    JAVACMD=`which java`
+  fi
+fi
+
+if [ ! -x "$JAVACMD" ] ; then
+  echo "Error: JAVA_HOME is not defined correctly." 1>&2
+  echo "  We cannot execute $JAVACMD" 1>&2
+  exit 1
+fi
+
+if [ -z "$REPO" ]
+then
+  REPO="$BASEDIR"/lib
+fi
+
+CLASSPATH=$CLASSPATH_PREFIX:"$BASEDIR"/etc:"$REPO"/hyracks-dataflow-std-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-api-0.2.3-SNAPSHOT.jar:"$REPO"/json-20090211.jar:"$REPO"/httpclient-4.1-alpha2.jar:"$REPO"/httpcore-4.1-beta1.jar:"$REPO"/commons-logging-1.1.1.jar:"$REPO"/commons-codec-1.4.jar:"$REPO"/args4j-2.0.12.jar:"$REPO"/commons-lang3-3.1.jar:"$REPO"/hyracks-dataflow-common-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-data-std-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-control-cc-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-control-common-0.2.3-SNAPSHOT.jar:"$REPO"/jetty-server-8.0.0.RC0.jar:"$REPO"/servlet-api-3.0.20100224.jar:"$REPO"/jetty-continuation-8.0.0.RC0.jar:"$REPO"/jetty-http-8.0.0.RC0.jar:"$REPO"/jetty-io-8.0.0.RC0.jar:"$REPO"/jetty-webapp-8.0.0.RC0.jar:"$REPO"/jetty-xml-8.0.0.RC0.jar:"$REPO"/jetty-util-8.0.0.RC0.jar:"$REPO"/jetty-servlet-8.0.0.RC0.jar:"$REPO"/jetty-security-8.0.0.RC0.jar:"$REPO"/wicket-core-1.5.2.jar:"$REPO"/wicket-util-1.5.2.jar:"$REPO"/wicket-request-1.5.2.jar:"$REPO"/slf4j-api-1.6.1.jar:"$REPO"/slf4j-jcl-1.6.3.jar:"$REPO"/hyracks-control-nc-0.2.3-SNAPSHOT.jar:"$REPO"/dcache-client-0.0.1.jar:"$REPO"/jetty-client-8.0.0.M0.jar:"$REPO"/hyracks-net-0.2.3-SNAPSHOT.jar:"$REPO"/commons-io-1.3.1.jar:"$REPO"/hyracks-ipc-0.2.3-SNAPSHOT.jar:"$REPO"/genomix-0.2.3-SNAPSHOT.pom
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin; then
+  [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
+  [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"`
+  [ -n "$HOME" ] && HOME=`cygpath --path --windows "$HOME"`
+  [ -n "$BASEDIR" ] && BASEDIR=`cygpath --path --windows "$BASEDIR"`
+  [ -n "$REPO" ] && REPO=`cygpath --path --windows "$REPO"`
+fi
+
+exec "$JAVACMD" $JAVA_OPTS  \
+  -classpath "$CLASSPATH" \
+  -Dapp.name="genomix" \
+  -Dapp.pid="$$" \
+  -Dapp.repo="$REPO" \
+  -Dapp.home="$BASEDIR" \
+  -Dbasedir="$BASEDIR" \
+  edu.uci.ics.genomix.driver.Driver \
+  "$@"
diff --git a/genomix/genomix-data/src/main/resources/scripts/genomix.bat b/genomix/genomix-data/src/main/resources/scripts/genomix.bat
new file mode 100644
index 0000000..1bd2098
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/genomix.bat
@@ -0,0 +1,108 @@
+@REM ----------------------------------------------------------------------------

+@REM  Copyright 2001-2006 The Apache Software Foundation.

+@REM

+@REM  Licensed under the Apache License, Version 2.0 (the "License");

+@REM  you may not use this file except in compliance with the License.

+@REM  You may obtain a copy of the License at

+@REM

+@REM       http://www.apache.org/licenses/LICENSE-2.0

+@REM

+@REM  Unless required by applicable law or agreed to in writing, software

+@REM  distributed under the License is distributed on an "AS IS" BASIS,

+@REM  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+@REM  See the License for the specific language governing permissions and

+@REM  limitations under the License.

+@REM ----------------------------------------------------------------------------

+@REM

+@REM   Copyright (c) 2001-2006 The Apache Software Foundation.  All rights

+@REM   reserved.

+

+@echo off

+

+set ERROR_CODE=0

+

+:init

+@REM Decide how to startup depending on the version of windows

+

+@REM -- Win98ME

+if NOT "%OS%"=="Windows_NT" goto Win9xArg

+

+@REM set local scope for the variables with windows NT shell

+if "%OS%"=="Windows_NT" @setlocal

+

+@REM -- 4NT shell

+if "%eval[2+2]" == "4" goto 4NTArgs

+

+@REM -- Regular WinNT shell

+set CMD_LINE_ARGS=%*

+goto WinNTGetScriptDir

+

+@REM The 4NT Shell from jp software

+:4NTArgs

+set CMD_LINE_ARGS=%$

+goto WinNTGetScriptDir

+

+:Win9xArg

+@REM Slurp the command line arguments.  This loop allows for an unlimited number

+@REM of arguments (up to the command line limit, anyway).

+set CMD_LINE_ARGS=

+:Win9xApp

+if %1a==a goto Win9xGetScriptDir

+set CMD_LINE_ARGS=%CMD_LINE_ARGS% %1

+shift

+goto Win9xApp

+

+:Win9xGetScriptDir

+set SAVEDIR=%CD%

+%0\

+cd %0\..\.. 

+set BASEDIR=%CD%

+cd %SAVEDIR%

+set SAVE_DIR=

+goto repoSetup

+

+:WinNTGetScriptDir

+set BASEDIR=%~dp0\..

+

+:repoSetup

+

+

+if "%JAVACMD%"=="" set JAVACMD=java

+

+if "%REPO%"=="" set REPO=%BASEDIR%\lib

+

+set CLASSPATH="%BASEDIR%"\etc;"%REPO%"\hyracks-dataflow-std-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-api-0.2.3-SNAPSHOT.jar;"%REPO%"\json-20090211.jar;"%REPO%"\httpclient-4.1-alpha2.jar;"%REPO%"\httpcore-4.1-beta1.jar;"%REPO%"\commons-logging-1.1.1.jar;"%REPO%"\commons-codec-1.4.jar;"%REPO%"\args4j-2.0.12.jar;"%REPO%"\commons-lang3-3.1.jar;"%REPO%"\hyracks-dataflow-common-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-data-std-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-control-cc-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-control-common-0.2.3-SNAPSHOT.jar;"%REPO%"\jetty-server-8.0.0.RC0.jar;"%REPO%"\servlet-api-3.0.20100224.jar;"%REPO%"\jetty-continuation-8.0.0.RC0.jar;"%REPO%"\jetty-http-8.0.0.RC0.jar;"%REPO%"\jetty-io-8.0.0.RC0.jar;"%REPO%"\jetty-webapp-8.0.0.RC0.jar;"%REPO%"\jetty-xml-8.0.0.RC0.jar;"%REPO%"\jetty-util-8.0.0.RC0.jar;"%REPO%"\jetty-servlet-8.0.0.RC0.jar;"%REPO%"\jetty-security-8.0.0.RC0.jar;"%REPO%"\wicket-core-1.5.2.jar;"%REPO%"\wicket-util-1.5.2.jar;"%REPO%"\wicket-request-1.5.2.jar;"%REPO%"\slf4j-api-1.6.1.jar;"%REPO%"\slf4j-jcl-1.6.3.jar;"%REPO%"\hyracks-control-nc-0.2.3-SNAPSHOT.jar;"%REPO%"\dcache-client-0.0.1.jar;"%REPO%"\jetty-client-8.0.0.M0.jar;"%REPO%"\hyracks-net-0.2.3-SNAPSHOT.jar;"%REPO%"\commons-io-1.3.1.jar;"%REPO%"\hyracks-ipc-0.2.3-SNAPSHOT.jar;"%REPO%"\genomix-0.2.3-SNAPSHOT.pom

+goto endInit

+

+@REM Reaching here means variables are defined and arguments have been captured

+:endInit

+

+%JAVACMD% %JAVA_OPTS%  -classpath %CLASSPATH_PREFIX%;%CLASSPATH% -Dapp.name="genomix" -Dapp.repo="%REPO%" -Dapp.home="%BASEDIR%" -Dbasedir="%BASEDIR%" edu.uci.ics.genomix.driver.Driver %CMD_LINE_ARGS%

+if ERRORLEVEL 1 goto error

+goto end

+

+:error

+if "%OS%"=="Windows_NT" @endlocal

+set ERROR_CODE=%ERRORLEVEL%

+

+:end

+@REM set local scope for the variables with windows NT shell

+if "%OS%"=="Windows_NT" goto endNT

+

+@REM For old DOS remove the set variables from ENV - we assume they were not set

+@REM before we started - at least we don't leave any baggage around

+set CMD_LINE_ARGS=

+goto postExec

+

+:endNT

+@REM If error code is set to 1 then the endlocal was done already in :error.

+if %ERROR_CODE% EQU 0 @endlocal

+

+

+:postExec

+

+if "%FORCE_EXIT_ON_ERROR%" == "on" (

+  if %ERROR_CODE% NEQ 0 exit %ERROR_CODE%

+)

+

+exit /B %ERROR_CODE%

diff --git a/genomix/genomix-data/src/main/resources/scripts/getip.sh b/genomix/genomix-data/src/main/resources/scripts/getip.sh
new file mode 100644
index 0000000..e0cdf73
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/getip.sh
@@ -0,0 +1,21 @@
+#get the OS
+OS_NAME=`uname -a|awk '{print $1}'`
+LINUX_OS='Linux'
+
+if [ $OS_NAME = $LINUX_OS ];
+then
+        #Get IP Address
+        IPADDR=`/sbin/ifconfig eth0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+	if [ "$IPADDR" = "" ]
+        then
+		IPADDR=`/sbin/ifconfig lo | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+        fi 
+else
+        IPADDR=`/sbin/ifconfig en1 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+	if [ "$IPADDR" = "" ]
+        then
+                IPADDR=`/sbin/ifconfig lo0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+        fi
+
+fi
+echo $IPADDR
diff --git a/genomix/genomix-data/src/main/resources/scripts/startAllNCs.sh b/genomix/genomix-data/src/main/resources/scripts/startAllNCs.sh
new file mode 100644
index 0000000..5e38c40
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/startAllNCs.sh
@@ -0,0 +1,6 @@
+GENOMIX_PATH=`pwd`
+
+for i in `cat conf/slaves`
+do
+   ssh $i "cd ${GENOMIX_PATH}; bin/startnc.sh"
+done
diff --git a/genomix/genomix-data/src/main/resources/scripts/startCluster.sh b/genomix/genomix-data/src/main/resources/scripts/startCluster.sh
new file mode 100755
index 0000000..4727764
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/startCluster.sh
@@ -0,0 +1,19 @@
+bin/startcc.sh
+sleep 5
+bin/startAllNCs.sh
+
+. conf/cluster.properties
+# do we need to specify the version somewhere?
+hyrackcmd=`ls ${HYRACKS_HOME}/hyracks-cli/target/hyracks-cli-*-binary-assembly/bin/hyrackscli`
+# find zip file
+appzip=`ls $PWD/../genomix-*-binary-assembly.zip`
+
+[ -f $hyrackcmd ] || { echo "Hyracks commandline is missing"; exit -1;}
+[ -f $appzip ] || { echo "Genomix binary-assembly.zip is missing"; exit -1;}
+
+CCHOST_NAME=`cat conf/master`
+
+IPADDR=`bin/getip.sh`
+echo "connect to \"${IPADDR}:${CC_CLIENTPORT}\"; create application genomix \"$appzip\";" | $hyrackcmd 
+echo ""
+
diff --git a/genomix/genomix-data/src/main/resources/scripts/startDebugNc.sh b/genomix/genomix-data/src/main/resources/scripts/startDebugNc.sh
new file mode 100644
index 0000000..fe6cf27
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/startDebugNc.sh
@@ -0,0 +1,50 @@
+hostname
+
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CURRENT_PATH=`pwd`
+CCHOST=`ssh ${CCHOST_NAME} "cd ${CURRENT_PATH}; bin/getip.sh"`
+
+#Import cluster properties
+. conf/cluster.properties
+. conf/debugnc.properties
+
+#Clean up temp dir
+
+rm -rf $NCTMP_DIR2
+mkdir $NCTMP_DIR2
+
+#Clean up log dir
+rm -rf $NCLOGS_DIR2
+mkdir $NCLOGS_DIR2
+
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS2 | tr "," "\n")
+for io_dir in $io_dirs
+do
+	rm -rf $io_dir
+	mkdir $io_dir
+done
+
+#Set JAVA_HOME
+export JAVA_HOME=$JAVA_HOME
+
+#Get OS
+IPADDR=`bin/getip.sh`
+
+#Get node ID
+NODEID=`hostname | cut -d '.' -f 1`
+NODEID=${NODEID}2
+
+#Set JAVA_OPTS
+export JAVA_OPTS=$NCJAVA_OPTS2
+
+cd $HYRACKS_HOME
+HYRACKS_HOME=`pwd`
+
+#Enter the temp dir
+cd $NCTMP_DIR2
+
+#Launch hyracks nc
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyracksnc -cc-host $CCHOST -cc-port $CC_CLUSTERPORT -cluster-net-ip-address $IPADDR  -data-ip-address $IPADDR -node-id $NODEID -iodevices "${IO_DIRS2}" &> $NCLOGS_DIR2/$NODEID.log &
diff --git a/genomix/genomix-data/src/main/resources/scripts/startcc.sh b/genomix/genomix-data/src/main/resources/scripts/startcc.sh
new file mode 100644
index 0000000..fe2551d
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/startcc.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+hostname
+
+#Import cluster properties
+. conf/cluster.properties
+
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CCHOST=`bin/getip.sh`
+
+#Remove the temp dir
+rm -rf $CCTMP_DIR
+mkdir $CCTMP_DIR
+
+#Remove the logs dir
+rm -rf $CCLOGS_DIR
+mkdir $CCLOGS_DIR
+
+#Export JAVA_HOME and JAVA_OPTS
+export JAVA_HOME=$JAVA_HOME
+export JAVA_OPTS=$CCJAVA_OPTS
+
+#Launch hyracks cc script
+chmod -R 755 $HYRACKS_HOME
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyrackscc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -max-heartbeat-lapse-periods 999999 -default-max-job-attempts 0 -job-history-size 3 &> $CCLOGS_DIR/cc.log &
diff --git a/genomix/genomix-data/src/main/resources/scripts/startnc.sh b/genomix/genomix-data/src/main/resources/scripts/startnc.sh
new file mode 100644
index 0000000..6e0f90e
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/startnc.sh
@@ -0,0 +1,49 @@
+hostname
+
+MY_NAME=`hostname`
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CURRENT_PATH=`pwd`
+CCHOST=`ssh ${CCHOST_NAME} "cd ${CURRENT_PATH}; bin/getip.sh"`
+
+#Import cluster properties
+. conf/cluster.properties
+
+#Clean up temp dir
+
+rm -rf $NCTMP_DIR
+mkdir $NCTMP_DIR
+
+#Clean up log dir
+rm -rf $NCLOGS_DIR
+mkdir $NCLOGS_DIR
+
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS | tr "," "\n")
+for io_dir in $io_dirs
+do
+	rm -rf $io_dir
+	mkdir $io_dir
+done
+
+#Set JAVA_HOME
+export JAVA_HOME=$JAVA_HOME
+
+IPADDR=`bin/getip.sh`
+#echo $IPADDR
+
+#Get node ID
+NODEID=`hostname | cut -d '.' -f 1`
+
+#Set JAVA_OPTS
+export JAVA_OPTS=$NCJAVA_OPTS
+
+cd $HYRACKS_HOME
+HYRACKS_HOME=`pwd`
+
+#Enter the temp dir
+cd $NCTMP_DIR
+
+#Launch hyracks nc
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyracksnc -cc-host $CCHOST -cc-port $CC_CLUSTERPORT -cluster-net-ip-address $IPADDR  -data-ip-address $IPADDR -node-id $NODEID -iodevices "${IO_DIRS}" &> $NCLOGS_DIR/$NODEID.log &
diff --git a/genomix/genomix-data/src/main/resources/scripts/stopAllNCs.sh b/genomix/genomix-data/src/main/resources/scripts/stopAllNCs.sh
new file mode 100644
index 0000000..66ed866
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/stopAllNCs.sh
@@ -0,0 +1,6 @@
+GENOMIX_PATH=`pwd`
+
+for i in `cat conf/slaves`
+do
+   ssh $i "cd ${GENOMIX_PATH}; bin/stopnc.sh"
+done
diff --git a/genomix/genomix-data/src/main/resources/scripts/stopCluster.sh b/genomix/genomix-data/src/main/resources/scripts/stopCluster.sh
new file mode 100644
index 0000000..4889934
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/stopCluster.sh
@@ -0,0 +1,3 @@
+bin/stopAllNCs.sh
+sleep 2
+bin/stopcc.sh
diff --git a/genomix/genomix-data/src/main/resources/scripts/stopcc.sh b/genomix/genomix-data/src/main/resources/scripts/stopcc.sh
new file mode 100644
index 0000000..1865054
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/stopcc.sh
@@ -0,0 +1,10 @@
+hostname
+. conf/cluster.properties
+
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep hyracks|awk '{print $2}'`
+echo $PID
+[ "$PID" != "" ] && kill -9 $PID
+
+#Clean up CC temp dir
+rm -rf $CCTMP_DIR/*
diff --git a/genomix/genomix-data/src/main/resources/scripts/stopnc.sh b/genomix/genomix-data/src/main/resources/scripts/stopnc.sh
new file mode 100644
index 0000000..3928bb7
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/stopnc.sh
@@ -0,0 +1,23 @@
+hostname
+. conf/cluster.properties
+
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=hyracksnc'|awk '{print $2}'`
+
+if [ "$PID" == "" ]; then
+  USERID=`id | sed 's/^uid=//;s/(.*$//'`
+  PID=`ps -ef|grep ${USERID}|grep java|grep 'Dapp.name=hyracksnc'|awk '{print $2}'`
+fi
+
+echo $PID
+[ "$PID" != "" ] && kill -9 $PID
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS | tr "," "\n")
+for io_dir in $io_dirs
+do
+	rm -rf $io_dir/*
+done
+
+#Clean up NC temp dir
+rm -rf $NCTMP_DIR/*
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerTest.java
new file mode 100644
index 0000000..d9c1846
--- /dev/null
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerTest.java
@@ -0,0 +1,81 @@
+package edu.uci.ics.genomix.example.kmer;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+import edu.uci.ics.genomix.type.Kmer;
+
+public class KmerTest {
+	static byte[] array = { 'A', 'A', 'T', 'A', 'G', 'A', 'A', 'G' };
+	static int k = 7;
+	
+	@Test
+	public void TestCompressKmer() {
+		byte[] kmer = Kmer.compressKmer(k, array, 0);
+		String result = Kmer.recoverKmerFrom(k, kmer, 0, kmer.length);
+		Assert.assertEquals(result, "AATAGAA");
+		
+		kmer = Kmer.compressKmer(k, array, 1);
+		result = Kmer.recoverKmerFrom(k, kmer, 0, kmer.length);
+		Assert.assertEquals(result, "ATAGAAG");
+	}
+	
+	@Test
+	public void TestMoveKmer(){
+		byte[] kmer = Kmer.compressKmer(k, array, 0);
+		String result = Kmer.recoverKmerFrom(k, kmer, 0, kmer.length); 
+		Assert.assertEquals(result, "AATAGAA");
+		
+		for (int i = k; i < array.length-1; i++) {
+			Kmer.moveKmer(k, kmer, array[i]);
+			Assert.assertTrue(false);
+		}
+
+		byte out = Kmer.moveKmer(k, kmer, array[array.length - 1]);
+		Assert.assertEquals(out, Kmer.GENE_CODE.getAdjBit((byte) 'A'));
+		result = Kmer.recoverKmerFrom(k, kmer, 0, kmer.length);
+		Assert.assertEquals(result, "ATAGAAG");
+	}
+
+	
+	@Test
+	public void TestReverseKmer(){
+		byte[] kmer = Kmer.compressKmer(k, array, 0);
+		String result = Kmer.recoverKmerFrom(k, kmer, 0, kmer.length);
+		Assert.assertEquals(result, "AATAGAA");
+		byte[] reversed = Kmer.reverseKmer(k, kmer);
+		result = Kmer.recoverKmerFrom(k, reversed, 0, kmer.length);
+		Assert.assertEquals(result, "AAGATAA");
+	}
+	
+	@Test
+	public void TestCompressKmerReverse() {
+		byte[] kmer = Kmer.compressKmerReverse(k, array, 0);
+		String result = Kmer.recoverKmerFrom(k, kmer, 0, kmer.length);
+		Assert.assertEquals(result, "AAGATAA");
+		
+		kmer = Kmer.compressKmerReverse(k, array, 1);
+		result = Kmer.recoverKmerFrom(k, kmer, 0, kmer.length);
+		Assert.assertEquals(result, "GAAGATA");
+	}
+	
+	@Test
+	public void TestMoveKmerReverse(){
+		byte[] kmer = Kmer.compressKmerReverse(k, array, 0);
+		String result = Kmer.recoverKmerFrom(k, kmer, 0, kmer.length);
+		Assert.assertEquals(result, "AAGATAA");
+		
+		for (int i = k; i < array.length-1; i++) {
+			Kmer.moveKmerReverse(k, kmer, array[i]);
+			Assert.assertTrue(false);
+		}
+
+		byte out = Kmer.moveKmerReverse(k, kmer, array[array.length - 1]);
+		Assert.assertEquals(out, Kmer.GENE_CODE.getAdjBit((byte) 'A'));
+		result = Kmer.recoverKmerFrom(k, kmer, 0, kmer.length);
+		Assert.assertEquals(result, "GAAGATA");
+	}
+
+
+}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerUtilTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerUtilTest.java
new file mode 100644
index 0000000..854bb47
--- /dev/null
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerUtilTest.java
@@ -0,0 +1,138 @@
+package edu.uci.ics.genomix.example.kmer;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.type.Kmer;
+import edu.uci.ics.genomix.type.KmerUtil;
+
+public class KmerUtilTest {
+	static byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C','G','T'};
+	
+	@Test
+	public void TestDegree(){
+		Assert.assertTrue(KmerUtil.inDegree((byte) 0xff) == 4); 
+		Assert.assertTrue(KmerUtil.outDegree((byte) 0xff) == 4);
+		Assert.assertTrue(KmerUtil.inDegree((byte) 0x3f) == 2);
+		Assert.assertTrue(KmerUtil.outDegree((byte) 0x01) == 1);
+		Assert.assertTrue(KmerUtil.inDegree((byte) 0x01) == 0);
+	}
+	
+	@Test
+	public void TestGetLastKmer(){
+		byte[] kmerChain = Kmer.compressKmer(9, array, 0);
+		Assert.assertEquals("AGCTGACCG", Kmer.recoverKmerFrom(9, kmerChain, 0, kmerChain.length));
+		byte[] lastKmer ;
+		for(int i = 8; i>0 ; i--){
+			lastKmer = KmerUtil.getLastKmerFromChain(i, 9, kmerChain, 0, kmerChain.length);
+//			System.out.println(Kmer.recoverKmerFrom(i, lastKmer, 0, lastKmer.length));
+			Assert.assertEquals("AGCTGACCG".substring(9-i), Kmer.recoverKmerFrom(i, lastKmer, 0, lastKmer.length));
+		}
+	}
+	
+	@Test
+	public void TestMergeNext(){
+		byte[] kmer = Kmer.compressKmer(9, array, 0);
+		String text = "AGCTGACCG";
+		Assert.assertEquals(text, Kmer.recoverKmerFrom(9, kmer, 0, kmer.length));
+		for(byte x = Kmer.GENE_CODE.A; x<= Kmer.GENE_CODE.T ; x++){
+			kmer = KmerUtil.mergeKmerWithNextCode(9+x, kmer, 0, kmer.length, x);
+//			System.out.println(Kmer.recoverKmerFrom(9+x+1, kmer, 0, kmer.length));
+			text = text + (char)Kmer.GENE_SYMBOL[x];
+			Assert.assertEquals(text, Kmer.recoverKmerFrom(9+x+1, kmer, 0, kmer.length));
+		}
+		for(byte x = Kmer.GENE_CODE.A; x<= Kmer.GENE_CODE.T ; x++){
+			kmer = KmerUtil.mergeKmerWithNextCode(13+x, kmer,0, kmer.length, x);
+//			System.out.println(Kmer.recoverKmerFrom(13+x+1, kmer, 0, kmer.length));
+			text = text + (char)Kmer.GENE_SYMBOL[x];
+			Assert.assertEquals(text, Kmer.recoverKmerFrom(13+x+1, kmer, 0, kmer.length));
+		}
+	}
+	
+	@Test
+	public void TestMergePre(){
+		byte[] kmer = Kmer.compressKmer(9, array, 0);
+		String text = "AGCTGACCG";
+		Assert.assertEquals(text, Kmer.recoverKmerFrom(9, kmer, 0, kmer.length));
+		for(byte x = Kmer.GENE_CODE.A; x<= Kmer.GENE_CODE.T ; x++){
+			kmer = KmerUtil.mergeKmerWithPreCode(9+x, kmer, 0, kmer.length,x);
+//			System.out.println(Kmer.recoverKmerFrom(9+x+1, kmer, 0, kmer.length));
+			text = (char)Kmer.GENE_SYMBOL[x] + text;
+			Assert.assertEquals(text , Kmer.recoverKmerFrom(9+x+1, kmer, 0, kmer.length));
+		}
+		for(byte x = Kmer.GENE_CODE.A; x<= Kmer.GENE_CODE.T ; x++){
+			kmer = KmerUtil.mergeKmerWithPreCode(13+x, kmer,0, kmer.length, x);
+//			System.out.println(Kmer.recoverKmerFrom(13+x+1, kmer, 0, kmer.length));
+			text = (char)Kmer.GENE_SYMBOL[x] + text;
+			Assert.assertEquals(text , Kmer.recoverKmerFrom(13+x+1, kmer, 0, kmer.length));
+		}
+	}
+	
+	@Test
+	public void TestMergeTwoKmer(){
+		byte[] kmer1 = Kmer.compressKmer(9, array, 0);
+		String text1 = "AGCTGACCG";
+		byte[] kmer2 = Kmer.compressKmer(9, array, 1);
+		String text2 = "GCTGACCGT";
+		Assert.assertEquals(text1, Kmer.recoverKmerFrom(9, kmer1, 0, kmer1.length));
+		Assert.assertEquals(text2, Kmer.recoverKmerFrom(9, kmer2, 0, kmer2.length));
+		
+		byte[] merged = KmerUtil.mergeTwoKmer(9, kmer1,0,kmer1.length, 9, kmer2,0,kmer2.length);
+		Assert.assertEquals(text1+text2, Kmer.recoverKmerFrom(9+9, merged, 0, merged.length));
+		
+		byte[] kmer3 = Kmer.compressKmer(3, array, 1);
+		String text3 = "GCT";
+		Assert.assertEquals(text3, Kmer.recoverKmerFrom(3, kmer3, 0, kmer3.length));
+		merged = KmerUtil.mergeTwoKmer(9, kmer1, 0 , kmer1.length, 3, kmer3, 0, kmer3.length);
+		Assert.assertEquals(text1+text3, Kmer.recoverKmerFrom(9+3, merged, 0, merged.length));
+		merged = KmerUtil.mergeTwoKmer(3, kmer3, 0 , kmer3.length, 9, kmer1, 0, kmer1.length);
+		Assert.assertEquals(text3+text1, Kmer.recoverKmerFrom(9+3, merged, 0, merged.length));
+		
+		byte[] kmer4 = Kmer.compressKmer(8, array, 0);
+		String text4 = "AGCTGACC";
+		Assert.assertEquals(text4, Kmer.recoverKmerFrom(8, kmer4, 0, kmer4.length));
+		merged = KmerUtil.mergeTwoKmer(8, kmer4, 0, kmer4.length, 3, kmer3, 0, kmer3.length);
+		Assert.assertEquals(text4+text3, Kmer.recoverKmerFrom(8+3, merged, 0, merged.length));
+		
+		byte[] kmer5 = Kmer.compressKmer(7, array, 0);
+		String text5 = "AGCTGAC";
+		byte[] kmer6 = Kmer.compressKmer(9, array, 1);
+		String text6 = "GCTGACCGT";
+		merged = KmerUtil.mergeTwoKmer(7, kmer5, 0, kmer5.length,9, kmer6, 0, kmer6.length);
+		Assert.assertEquals(text5+text6, Kmer.recoverKmerFrom(7+9, merged, 0, merged.length));
+		
+		byte[] kmer7 = Kmer.compressKmer(6, array, 1);
+		String text7 = "GCTGAC";
+		merged = KmerUtil.mergeTwoKmer(7, kmer5, 0, kmer5.length, 6, kmer7, 0, kmer7.length);
+		Assert.assertEquals(text5+text7, Kmer.recoverKmerFrom(7+6, merged, 0, merged.length));
+		
+		byte[] kmer8 = Kmer.compressKmer(4, array, 1);
+		String text8 = "GCTG";
+		merged = KmerUtil.mergeTwoKmer(7, kmer5, 0, kmer5.length, 4, kmer8, 0, kmer8.length);
+		Assert.assertEquals(text5+text8, Kmer.recoverKmerFrom(7+4, merged, 0, merged.length));
+
+	}
+	@Test 
+	public void TestShift(){
+		byte[] kmer = Kmer.compressKmer(9, array, 0);
+		String text = "AGCTGACCG";
+		Assert.assertEquals(text, Kmer.recoverKmerFrom(9, kmer, 0, kmer.length));
+		
+		byte [] kmerForward = KmerUtil.shiftKmerWithNextCode(9, kmer,0, kmer.length, Kmer.GENE_CODE.A);
+		Assert.assertEquals(text, Kmer.recoverKmerFrom(9, kmer, 0, kmer.length));
+		Assert.assertEquals("GCTGACCGA", Kmer.recoverKmerFrom(9, kmerForward, 0, kmerForward.length));
+		byte [] kmerBackward = KmerUtil.shiftKmerWithPreCode(9, kmer,0, kmer.length,Kmer.GENE_CODE.C);
+		Assert.assertEquals(text, Kmer.recoverKmerFrom(9, kmer, 0, kmer.length));
+		Assert.assertEquals("CAGCTGACC", Kmer.recoverKmerFrom(9, kmerBackward, 0, kmerBackward.length));
+		
+	}
+	@Test
+	public void TestGetGene(){
+		byte[] kmer = Kmer.compressKmer(9, array, 0);
+		String text = "AGCTGACCG";
+		for(int i =0; i < 9; i++){
+			Assert.assertEquals(text.charAt(i), 
+					(char)(Kmer.GENE_CODE.getSymbolFromCode(KmerUtil.getGeneCodeAtPosition(i, 9, kmer, 0, kmer.length))));
+		}
+	}
+}
diff --git a/genomix/genomix-data/src/test/resources/data/0/text.txt b/genomix/genomix-data/src/test/resources/data/0/text.txt
new file mode 100755
index 0000000..f63a141
--- /dev/null
+++ b/genomix/genomix-data/src/test/resources/data/0/text.txt
@@ -0,0 +1,4 @@
+@625E1AAXX100810:1:100:10000:10271/1

+AATAGAAG

++

+EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?

diff --git a/genomix/genomix-data/src/test/resources/data/webmap/text.txt b/genomix/genomix-data/src/test/resources/data/webmap/text.txt
new file mode 100755
index 0000000..f63a141
--- /dev/null
+++ b/genomix/genomix-data/src/test/resources/data/webmap/text.txt
@@ -0,0 +1,4 @@
+@625E1AAXX100810:1:100:10000:10271/1

+AATAGAAG

++

+EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?

diff --git a/genomix/genomix-data/src/test/resources/expected/result2 b/genomix/genomix-data/src/test/resources/expected/result2
new file mode 100755
index 0000000..5e76458
--- /dev/null
+++ b/genomix/genomix-data/src/test/resources/expected/result2
@@ -0,0 +1,4 @@
+AATAG	|A	1
+AGAAG	T|	1
+ATAGA	A|A	1
+TAGAA	A|G	1
diff --git a/genomix/genomix-data/src/test/resources/hadoop/conf/core-site.xml b/genomix/genomix-data/src/test/resources/hadoop/conf/core-site.xml
new file mode 100644
index 0000000..47dfac5
--- /dev/null
+++ b/genomix/genomix-data/src/test/resources/hadoop/conf/core-site.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+<property>
+    <name>fs.default.name</name>
+    <value>hdfs://127.0.0.1:31888</value>
+</property>
+<property>
+    <name>hadoop.tmp.dir</name>
+    <value>/tmp/hadoop</value>
+</property>
+
+
+</configuration>
diff --git a/genomix/genomix-data/src/test/resources/hadoop/conf/hdfs-site.xml b/genomix/genomix-data/src/test/resources/hadoop/conf/hdfs-site.xml
new file mode 100644
index 0000000..8d29b1d
--- /dev/null
+++ b/genomix/genomix-data/src/test/resources/hadoop/conf/hdfs-site.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+<property>
+   <name>dfs.replication</name>
+   <value>1</value>
+</property>
+
+<property>
+	<name>dfs.block.size</name>
+	<value>65536</value>
+</property>
+
+</configuration>
diff --git a/genomix/genomix-data/src/test/resources/hadoop/conf/log4j.properties b/genomix/genomix-data/src/test/resources/hadoop/conf/log4j.properties
new file mode 100755
index 0000000..d5e6004
--- /dev/null
+++ b/genomix/genomix-data/src/test/resources/hadoop/conf/log4j.properties
@@ -0,0 +1,94 @@
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=FATAL,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshhold=FATAL
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this 
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# Rolling File Appender
+#
+
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+#
+# FSNamesystem Audit logging
+# All audit events are logged at INFO level
+#
+log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
diff --git a/genomix/genomix-data/src/test/resources/hadoop/conf/mapred-site.xml b/genomix/genomix-data/src/test/resources/hadoop/conf/mapred-site.xml
new file mode 100644
index 0000000..39b6505
--- /dev/null
+++ b/genomix/genomix-data/src/test/resources/hadoop/conf/mapred-site.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+  <property>
+    <name>mapred.job.tracker</name>
+    <value>localhost:29007</value>
+  </property>
+  <property>
+     <name>mapred.tasktracker.map.tasks.maximum</name>
+     <value>20</value>
+  </property>
+   <property>
+      <name>mapred.tasktracker.reduce.tasks.maximum</name>
+      <value>20</value>
+   </property>
+   <property>
+      <name>mapred.max.split.size</name>
+      <value>2048</value>
+   </property>
+
+</configuration>
diff --git a/genomix/genomix-hadoop/actual1/conf.xml b/genomix/genomix-hadoop/actual1/conf.xml
new file mode 100644
index 0000000..506913d
--- /dev/null
+++ b/genomix/genomix-hadoop/actual1/conf.xml
@@ -0,0 +1,179 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>dfs.namenode.decommission.nodes.per.interval</name><value>5</value></property>
+<property><name>dfs.https.need.client.auth</name><value>false</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>dfs.namenode.logging.level</name><value>info</value></property>
+<property><name>dfs.datanode.address</name><value>127.0.0.1:0</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>fs.default.name</name><value>hdfs://localhost:61115</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>dfs.safemode.threshold.pct</name><value>0.999f</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>dfs.namenode.handler.count</name><value>10</value></property>
+<property><name>dfs.blockreport.initialDelay</name><value>0</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>dfs.safemode.extension</name><value>0</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.StaticMapping</value></property>
+<property><name>dfs.https.server.keystore.resource</name><value>ssl-server.xml</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>dfs.name.edits.dir</name><value>${dfs.name.dir}</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>hadoop.job.ugi</name><value>hadoop,staff,everyone,localaccounts</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>dfs.block.size</name><value>67108864</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>dfs.datanode.ipc.address</name><value>127.0.0.1:0</value></property>
+<property><name>dfs.permissions</name><value>true</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>dfs.datanode.https.address</name><value>0.0.0.0:50475</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>dfs.secondary.http.address</name><value>0.0.0.0:50090</value></property>
+<property><name>dfs.replication.max</name><value>512</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>dfs.https.client.keystore.resource</name><value>ssl-client.xml</value></property>
+<property><name>dfs.namenode.startup</name><value>REGULAR</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>dfs.https.address</name><value>0.0.0.0:50470</value></property>
+<property><name>dfs.balance.bandwidthPerSec</name><value>1048576</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>fs.checkpoint.dir</name><value>build/test/data/dfs/namesecondary1,build/test/data/dfs/namesecondary2</value></property>
+<property><name>mapred.reduce.tasks</name><value>1</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>dfs.max.objects</name><value>0</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>dfs.datanode.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>dfs.blockreport.intervalMsec</name><value>3600000</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>dfs.client.block.write.retries</name><value>3</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>dfs.https.enable</name><value>false</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>dfs.default.chunk.view.size</name><value>32768</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>dfs.datanode.du.reserved</name><value>0</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>dfs.web.ugi</name><value>webuser,webgroup</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>dfs.df.interval</name><value>60000</value></property>
+<property><name>dfs.data.dir</name><value>${hadoop.tmp.dir}/dfs/data</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>dfs.datanode.dns.interface</name><value>default</value></property>
+<property><name>dfs.support.append</name><value>false</value></property>
+<property><name>dfs.permissions.supergroup</name><value>supergroup</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>dfs.replication.min</name><value>1</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>dfs.namenode.decommission.interval</name><value>3</value></property>
+<property><name>dfs.http.address</name><value>localhost:61116</value></property>
+<property><name>dfs.heartbeat.interval</name><value>3</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>dfs.name.dir</name><value>build/test/data/dfs/name1,build/test/data/dfs/name2</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>dfs.datanode.http.address</name><value>127.0.0.1:0</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>dfs.replication.interval</name><value>3</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>dfs.replication</name><value>2</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>dfs.access.time.precision</name><value>3600000</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>dfs.datanode.handler.count</name><value>3</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>dfs.replication.considerLoad</name><value>true</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+</configuration>
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/actual1/result1/.part-00000.crc b/genomix/genomix-hadoop/actual1/result1/.part-00000.crc
new file mode 100644
index 0000000..3422e04
--- /dev/null
+++ b/genomix/genomix-hadoop/actual1/result1/.part-00000.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/actual1/result1/part-00000 b/genomix/genomix-hadoop/actual1/result1/part-00000
new file mode 100755
index 0000000..c21f5f6
--- /dev/null
+++ b/genomix/genomix-hadoop/actual1/result1/part-00000
Binary files differ
diff --git a/genomix/genomix-hadoop/actual2/conf.xml b/genomix/genomix-hadoop/actual2/conf.xml
new file mode 100644
index 0000000..ff11b9e
--- /dev/null
+++ b/genomix/genomix-hadoop/actual2/conf.xml
@@ -0,0 +1,179 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>dfs.namenode.decommission.nodes.per.interval</name><value>5</value></property>
+<property><name>dfs.https.need.client.auth</name><value>false</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>dfs.namenode.logging.level</name><value>info</value></property>
+<property><name>dfs.datanode.address</name><value>127.0.0.1:0</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>fs.default.name</name><value>hdfs://localhost:61195</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>dfs.safemode.threshold.pct</name><value>0.999f</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>dfs.namenode.handler.count</name><value>10</value></property>
+<property><name>dfs.blockreport.initialDelay</name><value>0</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>dfs.safemode.extension</name><value>0</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.StaticMapping</value></property>
+<property><name>dfs.https.server.keystore.resource</name><value>ssl-server.xml</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>dfs.name.edits.dir</name><value>${dfs.name.dir}</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>hadoop.job.ugi</name><value>hadoop,staff,everyone,localaccounts</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>dfs.block.size</name><value>67108864</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>dfs.datanode.ipc.address</name><value>127.0.0.1:0</value></property>
+<property><name>dfs.permissions</name><value>true</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>dfs.datanode.https.address</name><value>0.0.0.0:50475</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>dfs.secondary.http.address</name><value>0.0.0.0:50090</value></property>
+<property><name>dfs.replication.max</name><value>512</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>dfs.https.client.keystore.resource</name><value>ssl-client.xml</value></property>
+<property><name>dfs.namenode.startup</name><value>REGULAR</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>dfs.https.address</name><value>0.0.0.0:50470</value></property>
+<property><name>dfs.balance.bandwidthPerSec</name><value>1048576</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>fs.checkpoint.dir</name><value>build/test/data/dfs/namesecondary1,build/test/data/dfs/namesecondary2</value></property>
+<property><name>mapred.reduce.tasks</name><value>1</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>dfs.max.objects</name><value>0</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>dfs.datanode.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>dfs.blockreport.intervalMsec</name><value>3600000</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>dfs.client.block.write.retries</name><value>3</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>dfs.https.enable</name><value>false</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>dfs.default.chunk.view.size</name><value>32768</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>dfs.datanode.du.reserved</name><value>0</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>dfs.web.ugi</name><value>webuser,webgroup</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>dfs.df.interval</name><value>60000</value></property>
+<property><name>dfs.data.dir</name><value>${hadoop.tmp.dir}/dfs/data</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>dfs.datanode.dns.interface</name><value>default</value></property>
+<property><name>dfs.support.append</name><value>false</value></property>
+<property><name>dfs.permissions.supergroup</name><value>supergroup</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>dfs.replication.min</name><value>1</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>dfs.namenode.decommission.interval</name><value>3</value></property>
+<property><name>dfs.http.address</name><value>localhost:61196</value></property>
+<property><name>dfs.heartbeat.interval</name><value>3</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>dfs.name.dir</name><value>build/test/data/dfs/name1,build/test/data/dfs/name2</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>dfs.datanode.http.address</name><value>127.0.0.1:0</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>dfs.replication.interval</name><value>3</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>dfs.replication</name><value>2</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>dfs.access.time.precision</name><value>3600000</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>dfs.datanode.handler.count</name><value>3</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>dfs.replication.considerLoad</name><value>true</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+</configuration>
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/actual2/result2/.part-00000.crc b/genomix/genomix-hadoop/actual2/result2/.part-00000.crc
new file mode 100644
index 0000000..3f8c2c5
--- /dev/null
+++ b/genomix/genomix-hadoop/actual2/result2/.part-00000.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/actual2/result2/part-00000 b/genomix/genomix-hadoop/actual2/result2/part-00000
new file mode 100755
index 0000000..ea3e875
--- /dev/null
+++ b/genomix/genomix-hadoop/actual2/result2/part-00000
Binary files differ
diff --git a/genomix/genomix-hadoop/actual3/complete2/.complete2-r-00000.crc b/genomix/genomix-hadoop/actual3/complete2/.complete2-r-00000.crc
new file mode 100644
index 0000000..b0b2753
--- /dev/null
+++ b/genomix/genomix-hadoop/actual3/complete2/.complete2-r-00000.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/actual3/complete2/complete2-r-00000 b/genomix/genomix-hadoop/actual3/complete2/complete2-r-00000
new file mode 100755
index 0000000..d3d3667
--- /dev/null
+++ b/genomix/genomix-hadoop/actual3/complete2/complete2-r-00000
Binary files differ
diff --git a/genomix/genomix-hadoop/actual3/conf.xml b/genomix/genomix-hadoop/actual3/conf.xml
new file mode 100644
index 0000000..16a0edc
--- /dev/null
+++ b/genomix/genomix-hadoop/actual3/conf.xml
@@ -0,0 +1,179 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>dfs.namenode.decommission.nodes.per.interval</name><value>5</value></property>
+<property><name>dfs.https.need.client.auth</name><value>false</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>dfs.namenode.logging.level</name><value>info</value></property>
+<property><name>dfs.datanode.address</name><value>127.0.0.1:0</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>fs.default.name</name><value>hdfs://localhost:62106</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>dfs.safemode.threshold.pct</name><value>0.999f</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>dfs.namenode.handler.count</name><value>10</value></property>
+<property><name>dfs.blockreport.initialDelay</name><value>0</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>dfs.safemode.extension</name><value>0</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.StaticMapping</value></property>
+<property><name>dfs.https.server.keystore.resource</name><value>ssl-server.xml</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>dfs.name.edits.dir</name><value>${dfs.name.dir}</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>hadoop.job.ugi</name><value>hadoop,staff,everyone,localaccounts</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>dfs.block.size</name><value>67108864</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>dfs.datanode.ipc.address</name><value>127.0.0.1:0</value></property>
+<property><name>dfs.permissions</name><value>true</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>dfs.datanode.https.address</name><value>0.0.0.0:50475</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>dfs.secondary.http.address</name><value>0.0.0.0:50090</value></property>
+<property><name>dfs.replication.max</name><value>512</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>dfs.https.client.keystore.resource</name><value>ssl-client.xml</value></property>
+<property><name>dfs.namenode.startup</name><value>REGULAR</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>dfs.https.address</name><value>0.0.0.0:50470</value></property>
+<property><name>dfs.balance.bandwidthPerSec</name><value>1048576</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>fs.checkpoint.dir</name><value>build/test/data/dfs/namesecondary1,build/test/data/dfs/namesecondary2</value></property>
+<property><name>mapred.reduce.tasks</name><value>1</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>dfs.max.objects</name><value>0</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>dfs.datanode.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>dfs.blockreport.intervalMsec</name><value>3600000</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>dfs.client.block.write.retries</name><value>3</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>dfs.https.enable</name><value>false</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>dfs.default.chunk.view.size</name><value>32768</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>dfs.datanode.du.reserved</name><value>0</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>dfs.web.ugi</name><value>webuser,webgroup</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>dfs.df.interval</name><value>60000</value></property>
+<property><name>dfs.data.dir</name><value>${hadoop.tmp.dir}/dfs/data</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>dfs.datanode.dns.interface</name><value>default</value></property>
+<property><name>dfs.support.append</name><value>false</value></property>
+<property><name>dfs.permissions.supergroup</name><value>supergroup</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>dfs.replication.min</name><value>1</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>dfs.namenode.decommission.interval</name><value>3</value></property>
+<property><name>dfs.http.address</name><value>localhost:62107</value></property>
+<property><name>dfs.heartbeat.interval</name><value>3</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>dfs.name.dir</name><value>build/test/data/dfs/name1,build/test/data/dfs/name2</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>dfs.datanode.http.address</name><value>127.0.0.1:0</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>dfs.replication.interval</name><value>3</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>dfs.replication</name><value>2</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>dfs.access.time.precision</name><value>3600000</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>dfs.datanode.handler.count</name><value>3</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>dfs.replication.considerLoad</name><value>true</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+</configuration>
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/actual5/conf.xml b/genomix/genomix-hadoop/actual5/conf.xml
new file mode 100644
index 0000000..d19b061
--- /dev/null
+++ b/genomix/genomix-hadoop/actual5/conf.xml
@@ -0,0 +1,179 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>dfs.namenode.decommission.nodes.per.interval</name><value>5</value></property>
+<property><name>dfs.https.need.client.auth</name><value>false</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>dfs.namenode.logging.level</name><value>info</value></property>
+<property><name>dfs.datanode.address</name><value>127.0.0.1:0</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>fs.default.name</name><value>hdfs://localhost:58289</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>dfs.safemode.threshold.pct</name><value>0.999f</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>dfs.namenode.handler.count</name><value>10</value></property>
+<property><name>dfs.blockreport.initialDelay</name><value>0</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>dfs.safemode.extension</name><value>0</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.StaticMapping</value></property>
+<property><name>dfs.https.server.keystore.resource</name><value>ssl-server.xml</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>dfs.name.edits.dir</name><value>${dfs.name.dir}</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>hadoop.job.ugi</name><value>hadoop,staff,everyone,localaccounts</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>dfs.block.size</name><value>67108864</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>dfs.datanode.ipc.address</name><value>127.0.0.1:0</value></property>
+<property><name>dfs.permissions</name><value>true</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>dfs.datanode.https.address</name><value>0.0.0.0:50475</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>dfs.secondary.http.address</name><value>0.0.0.0:50090</value></property>
+<property><name>dfs.replication.max</name><value>512</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>dfs.https.client.keystore.resource</name><value>ssl-client.xml</value></property>
+<property><name>dfs.namenode.startup</name><value>REGULAR</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>dfs.https.address</name><value>0.0.0.0:50470</value></property>
+<property><name>dfs.balance.bandwidthPerSec</name><value>1048576</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>fs.checkpoint.dir</name><value>build/test/data/dfs/namesecondary1,build/test/data/dfs/namesecondary2</value></property>
+<property><name>mapred.reduce.tasks</name><value>1</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>dfs.max.objects</name><value>0</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>dfs.datanode.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>dfs.blockreport.intervalMsec</name><value>3600000</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>dfs.client.block.write.retries</name><value>3</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>dfs.https.enable</name><value>false</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>dfs.default.chunk.view.size</name><value>32768</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>dfs.datanode.du.reserved</name><value>0</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>dfs.web.ugi</name><value>webuser,webgroup</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>dfs.df.interval</name><value>60000</value></property>
+<property><name>dfs.data.dir</name><value>${hadoop.tmp.dir}/dfs/data</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>dfs.datanode.dns.interface</name><value>default</value></property>
+<property><name>dfs.support.append</name><value>false</value></property>
+<property><name>dfs.permissions.supergroup</name><value>supergroup</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>dfs.replication.min</name><value>1</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>dfs.namenode.decommission.interval</name><value>3</value></property>
+<property><name>dfs.http.address</name><value>localhost:58290</value></property>
+<property><name>dfs.heartbeat.interval</name><value>3</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>dfs.name.dir</name><value>build/test/data/dfs/name1,build/test/data/dfs/name2</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>dfs.datanode.http.address</name><value>127.0.0.1:0</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>dfs.replication.interval</name><value>3</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>dfs.replication</name><value>2</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>dfs.access.time.precision</name><value>3600000</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>dfs.datanode.handler.count</name><value>3</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>dfs.replication.considerLoad</name><value>true</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+</configuration>
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/actual5/result5/.part-00000.crc b/genomix/genomix-hadoop/actual5/result5/.part-00000.crc
new file mode 100644
index 0000000..dafaae3
--- /dev/null
+++ b/genomix/genomix-hadoop/actual5/result5/.part-00000.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/actual5/result5/part-00000 b/genomix/genomix-hadoop/actual5/result5/part-00000
new file mode 100755
index 0000000..deeff28
--- /dev/null
+++ b/genomix/genomix-hadoop/actual5/result5/part-00000
Binary files differ
diff --git a/genomix/genomix-hadoop/compare/result1/comparesource.txt b/genomix/genomix-hadoop/compare/result1/comparesource.txt
new file mode 100644
index 0000000..ba52008
--- /dev/null
+++ b/genomix/genomix-hadoop/compare/result1/comparesource.txt
@@ -0,0 +1,8 @@
+GCA	ACT|T	3
+AGC	|A	1
+CGC	T|AT	2
+TGC	|A	1
+ATC	C|G	2
+TCG	A|C	2
+CAT	G|C	2
+GCT	C|	1
diff --git a/genomix/genomix-hadoop/compare/result2/comparesource.txt b/genomix/genomix-hadoop/compare/result2/comparesource.txt
new file mode 100644
index 0000000..db55a38
--- /dev/null
+++ b/genomix/genomix-hadoop/compare/result2/comparesource.txt
@@ -0,0 +1,8 @@
+GCA	-72
+AGC	1
+CGC	-119
+TGC	1
+ATC	36
+TCG	18
+CAT	66
+GCT	32
diff --git a/genomix/genomix-hadoop/compare/result3/comparesource.txt b/genomix/genomix-hadoop/compare/result3/comparesource.txt
new file mode 100644
index 0000000..5f9dd78
--- /dev/null
+++ b/genomix/genomix-hadoop/compare/result3/comparesource.txt
@@ -0,0 +1 @@
+02 71	66	1
diff --git a/genomix/genomix-hadoop/compare/result5/comparesource.txt b/genomix/genomix-hadoop/compare/result5/comparesource.txt
new file mode 100644
index 0000000..6f4bd5e
--- /dev/null
+++ b/genomix/genomix-hadoop/compare/result5/comparesource.txt
@@ -0,0 +1,3 @@
+GCA	AT|T	2
+AGC	|A	1
+TGC	|A	1
diff --git a/genomix/genomix-hadoop/data/webmap/Test.txt b/genomix/genomix-hadoop/data/webmap/Test.txt
new file mode 100755
index 0000000..6d02b25
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/Test.txt
@@ -0,0 +1,10 @@
+@625E1AAXX100810:1:100:10000:10271/1

+AGCATCGCA

++

+EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?
+@625E1AAXX100810:1:100:10000:10271/1

+TGCATCGCT

++

+EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?
+
+

diff --git a/genomix/genomix-hadoop/data/webmap/text.txt b/genomix/genomix-hadoop/data/webmap/text.txt
new file mode 100755
index 0000000..c6cd7fe
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/text.txt
@@ -0,0 +1,4 @@
+@625E1AAXX100810:1:100:10000:10271/1

+AATAGAAGATCGAT

++

+EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?

diff --git a/genomix/genomix-hadoop/expected/result1 b/genomix/genomix-hadoop/expected/result1
new file mode 100644
index 0000000..ba52008
--- /dev/null
+++ b/genomix/genomix-hadoop/expected/result1
@@ -0,0 +1,8 @@
+GCA	ACT|T	3
+AGC	|A	1
+CGC	T|AT	2
+TGC	|A	1
+ATC	C|G	2
+TCG	A|C	2
+CAT	G|C	2
+GCT	C|	1
diff --git a/genomix/genomix-hadoop/expected/result2 b/genomix/genomix-hadoop/expected/result2
new file mode 100755
index 0000000..db55a38
--- /dev/null
+++ b/genomix/genomix-hadoop/expected/result2
@@ -0,0 +1,8 @@
+GCA	-72
+AGC	1
+CGC	-119
+TGC	1
+ATC	36
+TCG	18
+CAT	66
+GCT	32
diff --git a/genomix/genomix-hadoop/expected/result3 b/genomix/genomix-hadoop/expected/result3
new file mode 100644
index 0000000..5f9dd78
--- /dev/null
+++ b/genomix/genomix-hadoop/expected/result3
@@ -0,0 +1 @@
+02 71	66	1
diff --git a/genomix/genomix-hadoop/pom.xml b/genomix/genomix-hadoop/pom.xml
new file mode 100755
index 0000000..195ce7f
--- /dev/null
+++ b/genomix/genomix-hadoop/pom.xml
@@ -0,0 +1,159 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+	<artifactId>genomix-hadoop</artifactId>
+	<name>genomix-hadoop</name>
+
+	<parent>
+		<groupId>edu.uci.ics.hyracks</groupId>
+		<artifactId>genomix</artifactId>
+		<version>0.2.4-SNAPSHOT</version>
+	</parent>
+
+	<properties>
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+	</properties>
+	
+	<build>
+		<plugins>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-compiler-plugin</artifactId>
+				<version>2.0.2</version>
+				<configuration>
+					<source>1.6</source>
+					<target>1.6</target>
+				</configuration>
+			</plugin>
+			<plugin>
+				<artifactId>maven-assembly-plugin</artifactId>
+				<configuration>
+					<descriptorRefs>
+						<descriptorRef>jar-with-dependencies</descriptorRef>
+					</descriptorRefs>
+				</configuration>
+				<executions>
+					<execution>
+						<id>make-my-jar-with-dependencies</id>
+						<phase>package</phase>
+						<goals>
+							<goal>single</goal>
+						</goals>
+					</execution>
+				</executions>
+			</plugin>
+			<plugin>
+				<groupId>org.codehaus.mojo</groupId>
+				<artifactId>appassembler-maven-plugin</artifactId>
+				<executions>
+					<execution>
+						<configuration>
+							<programs>
+								<program>
+									<mainClass>edu.uci.ics.maxclique.Driver</mainClass>
+									<name>maxclique</name>
+								</program>
+							</programs>
+							<repositoryLayout>flat</repositoryLayout>
+							<repositoryName>lib</repositoryName>
+						</configuration>
+						<phase>package</phase>
+						<goals>
+							<goal>assemble</goal>
+						</goals>
+					</execution>
+				</executions>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-surefire-plugin</artifactId>
+				<version>2.7.2</version>
+				<configuration>
+					<forkMode>pertest</forkMode>
+					<argLine>-enableassertions -Xmx512m -XX:MaxPermSize=300m
+						-Dfile.encoding=UTF-8
+						-Djava.util.logging.config.file=src/test/resources/logging.properties</argLine>
+					<includes>
+						<include>**/*TestSuite.java</include>
+						<include>**/*Test.java</include>
+					</includes>
+				</configuration>
+			</plugin>
+			<plugin>
+				<artifactId>maven-clean-plugin</artifactId>
+				<configuration>
+					<filesets>
+						<fileset>
+							<directory>.</directory>
+							<includes>
+								<include>teststore*</include>
+								<include>edu*</include>
+								<include>build*</include>
+								<include>log*</include>
+								<include>ClusterController*</include>
+							</includes>
+						</fileset>
+					</filesets>
+				</configuration>
+			</plugin>
+		</plugins>
+	</build>
+	
+	<dependencies>
+		<dependency>
+			<groupId>junit</groupId>
+			<artifactId>junit</artifactId>
+			<version>4.8.1</version>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-core</artifactId>
+			<version>0.20.2</version>
+			<scope>compile</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-test</artifactId>
+			<version>0.20.2</version>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
+			<groupId>com.kenai.nbpwr</groupId>
+			<artifactId>org-apache-commons-io</artifactId>
+			<version>1.3.1-201002241208</version>
+			<type>nbm</type>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.slf4j</groupId>
+			<artifactId>slf4j-jcl</artifactId>
+			<version>1.6.3</version>
+		</dependency>
+		<dependency>
+			<groupId>org.slf4j</groupId>
+			<artifactId>slf4j-api</artifactId>
+			<version>1.6.3</version>
+		</dependency>
+		<dependency>
+			<groupId>args4j</groupId>
+			<artifactId>args4j</artifactId>
+			<version>2.0.16</version>
+		</dependency>
+		<dependency>
+			<groupId>com.kenai.nbpwr</groupId>
+			<artifactId>org-apache-commons-io</artifactId>
+			<version>1.3.1-201002241208</version>
+			<type>nbm</type>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
+			<groupId>edu.uci.ics.hyracks</groupId>
+			<artifactId>genomix-data</artifactId>
+			<version>0.2.4-SNAPSHOT</version>
+			<type>jar</type>
+			<scope>compile</scope>
+		</dependency>
+	</dependencies>
+</project>
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingDriver.java
new file mode 100644
index 0000000..132f6e0
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingDriver.java
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.gbresultschecking;
+
+import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+@SuppressWarnings("deprecation")
+public class ResultsCheckingDriver {
+    private static class Options {
+        @Option(name = "-inputpath1", usage = "the input path", required = true)
+        public String inputPath1;
+
+        @Option(name = "-inputpath2", usage = "the input path", required = true)
+        public String inputPath2;
+
+        @Option(name = "-outputpath", usage = "the output path", required = true)
+        public String outputPath;
+
+        @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+        public int numReducers;
+
+        @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+        public int sizeKmer;
+
+    }
+
+    public void run(String inputPath1, String inputPath2, String outputPath, int numReducers, int sizeKmer,
+            String defaultConfPath) throws IOException {
+
+        JobConf conf = new JobConf(ResultsCheckingDriver.class);
+
+        conf.setInt("sizeKmer", sizeKmer);
+
+        if (defaultConfPath != null) {
+            conf.addResource(new Path(defaultConfPath));
+        }
+
+        conf.setJobName("Results Checking");
+        conf.setMapperClass(ResultsCheckingMapper.class);
+        conf.setReducerClass(ResultsCheckingReducer.class);
+
+        conf.setMapOutputKeyClass(Text.class);
+        conf.setMapOutputValueClass(Text.class);
+
+        conf.setInputFormat(SequenceFileInputFormat.class);
+        conf.setOutputFormat(TextOutputFormat.class);
+
+        conf.setOutputKeyClass(Text.class);
+        conf.setOutputValueClass(Text.class);
+
+        Path[] inputList = new Path[2];
+        inputList[0] = new Path(inputPath1);
+        inputList[1] = new Path(inputPath2);
+
+        FileInputFormat.setInputPaths(conf, inputList);
+        FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+        conf.setNumReduceTasks(numReducers);
+
+        FileSystem dfs = FileSystem.get(conf);
+        dfs.delete(new Path(outputPath), true);
+        JobClient.runJob(conf);
+    }
+
+    public static void main(String[] args) throws Exception {
+        Options options = new Options();
+        CmdLineParser parser = new CmdLineParser(options);
+        parser.parseArgument(args);
+        ResultsCheckingDriver driver = new ResultsCheckingDriver();
+        driver.run(options.inputPath1, options.inputPath2, options.outputPath, options.numReducers, options.sizeKmer,
+                null);
+    }
+
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingMapper.java
new file mode 100644
index 0000000..fe56e1a
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingMapper.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.gbresultschecking;
+
+import java.io.IOException;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+import edu.uci.ics.genomix.type.Kmer;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+@SuppressWarnings({ "unused", "deprecation" })
+public class ResultsCheckingMapper extends MapReduceBase implements Mapper<BytesWritable, KmerCountValue, Text, Text> {
+    BytesWritable valWriter = new BytesWritable();
+    private final static IntWritable one = new IntWritable(1);
+    public static Text textkey = new Text();
+    public static Text textvalue = new Text();
+    public static String INPUT_PATH;
+    public static int KMER_SIZE;
+
+    public void configure(JobConf job) {
+        KMER_SIZE = job.getInt("sizeKmer", 0);
+    }
+
+    @Override
+    public void map(BytesWritable key, KmerCountValue value, OutputCollector<Text, Text> output, Reporter reporter)
+            throws IOException {
+
+        FileSplit fileSplit = (FileSplit) reporter.getInputSplit();
+        String filename = fileSplit.getPath().getName();
+        byte[] bkey = key.getBytes();
+        textkey.set(Kmer.recoverKmerFrom(KMER_SIZE, key.getBytes(), 0, key.getLength()) + "\t" + value.toString());
+        textvalue.set(filename);
+        output.collect(textkey, textvalue);
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingReducer.java
new file mode 100644
index 0000000..6f02136
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingReducer.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.gbresultschecking;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+@SuppressWarnings("deprecation")
+public class ResultsCheckingReducer extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
+
+    public static Text textkey = new Text();
+    public static Text textvalue = new Text();
+
+    @Override
+    public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
+            throws IOException {
+        textkey.set(key);
+        textvalue.set(values.next());
+        if (values.hasNext() == false) {
+            output.collect(textkey, textvalue);
+        }
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java
new file mode 100755
index 0000000..5d39928
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.graphbuilding;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+/**
+ * This class implement the combiner operator of Mapreduce model
+ */
+@SuppressWarnings("deprecation")
+public class GenomixCombiner extends MapReduceBase implements
+        Reducer<BytesWritable, KmerCountValue, BytesWritable, KmerCountValue> {
+    public KmerCountValue vaWriter = new KmerCountValue();
+
+    @Override
+    public void reduce(BytesWritable key, Iterator<KmerCountValue> values,
+            OutputCollector<BytesWritable, KmerCountValue> output, Reporter reporter) throws IOException {
+        byte groupByAdjList = 0;
+        int count = 0;
+        byte bytCount = 0;
+        while (values.hasNext()) {
+            //Merge By the all adjacent Nodes;
+            KmerCountValue geneValue = values.next();
+            groupByAdjList = (byte) (groupByAdjList | geneValue.getAdjBitMap());
+            count = count + (int) geneValue.getCount();
+        }
+        if (count >= 127)
+            bytCount = (byte) 127;
+        else
+            bytCount = (byte) count;
+        vaWriter.set(groupByAdjList, bytCount);
+        output.collect(key, vaWriter);
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java
new file mode 100755
index 0000000..b90ab23
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.graphbuilding;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+/**
+ * This class implement driver which start the mapreduce program for graphbuilding
+ */
+@SuppressWarnings("deprecation")
+public class GenomixDriver {
+    private static class Options {
+        @Option(name = "-inputpath", usage = "the input path", required = true)
+        public String inputPath;
+
+        @Option(name = "-outputpath", usage = "the output path", required = true)
+        public String outputPath;
+
+        @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+        public int numReducers;
+
+        @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+        public int sizeKmer;
+    }
+
+    public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, String defaultConfPath)
+            throws IOException {
+
+        JobConf conf = new JobConf(GenomixDriver.class);
+        conf.setInt("sizeKmer", sizeKmer);
+
+        if (defaultConfPath != null) {
+            conf.addResource(new Path(defaultConfPath));
+        }
+
+        conf.setJobName("Genomix Graph Building");
+        conf.setMapperClass(GenomixMapper.class);
+        conf.setReducerClass(GenomixReducer.class);
+        conf.setCombinerClass(GenomixCombiner.class);
+
+        conf.setMapOutputKeyClass(BytesWritable.class);
+        conf.setMapOutputValueClass(KmerCountValue.class);
+
+        conf.setInputFormat(TextInputFormat.class);
+        conf.setOutputFormat(SequenceFileOutputFormat.class);
+        conf.setOutputKeyClass(BytesWritable.class);
+        conf.setOutputValueClass(KmerCountValue.class);
+        FileInputFormat.setInputPaths(conf, new Path(inputPath));
+        FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+        conf.setNumReduceTasks(numReducers);
+
+        FileSystem dfs = FileSystem.get(conf);
+        dfs.delete(new Path(outputPath), true);
+        JobClient.runJob(conf);
+    }
+
+    public static void main(String[] args) throws Exception {
+        Options options = new Options();
+        CmdLineParser parser = new CmdLineParser(options);
+        parser.parseArgument(args);
+        GenomixDriver driver = new GenomixDriver();
+        driver.run(options.inputPath, options.outputPath, options.numReducers, options.sizeKmer, null);
+    }
+
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java
new file mode 100755
index 0000000..cd8b7e3
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.graphbuilding;
+
+import java.io.IOException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+import edu.uci.ics.genomix.type.Kmer;
+import edu.uci.ics.genomix.type.Kmer.GENE_CODE;
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+/**
+ * This class implement mapper operator of mapreduce model
+ */
+@SuppressWarnings("deprecation")
+public class GenomixMapper extends MapReduceBase implements
+        Mapper<LongWritable, Text, BytesWritable, KmerCountValue> {
+
+    public class CurrenByte {
+        public byte curByte;
+        public byte preMarker;
+    }
+
+    public static int KMER_SIZE;
+    public KmerCountValue outputAdjList = new KmerCountValue();
+    public BytesWritable outputKmer = new BytesWritable();
+
+    @Override
+    public void configure(JobConf job) {
+        KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
+    }
+
+    /*succeed node
+      A 00000001 1
+      C 00000010 2
+      G 00000100 4
+      T 00001000 8
+      precursor node
+      A 00010000 16
+      C 00100000 32
+      G 01000000 64
+      T 10000000 128*/
+    @Override
+    public void map(LongWritable key, Text value, OutputCollector<BytesWritable, KmerCountValue> output,
+            Reporter reporter) throws IOException {
+        /* A 00
+           C 01
+           G 10
+           T 11*/
+        String geneLine = value.toString(); // Read the Real Gene Line
+        Pattern genePattern = Pattern.compile("[AGCT]+");
+        Matcher geneMatcher = genePattern.matcher(geneLine);
+        boolean isValid = geneMatcher.matches();
+        if (isValid == true) {
+            /** first kmer */
+            byte count = 1;
+            byte[] array = geneLine.getBytes();
+            byte[] kmer = Kmer.compressKmer(KMER_SIZE, array, 0);
+            byte pre = 0;
+            byte next = GENE_CODE.getAdjBit(array[KMER_SIZE]);
+            byte adj = GENE_CODE.mergePreNextAdj(pre, next);
+            outputAdjList.set(adj, count);
+            outputKmer.set(kmer, 0, kmer.length);
+            output.collect(outputKmer, outputAdjList);
+            /** middle kmer */
+            for (int i = KMER_SIZE; i < array.length - 1; i++) {
+                pre = Kmer.moveKmer(KMER_SIZE, kmer, array[i]);
+                next = GENE_CODE.getAdjBit(array[i + 1]);
+                adj = GENE_CODE.mergePreNextAdj(pre, next);
+                outputAdjList.set(adj, count);
+                outputKmer.set(kmer, 0, kmer.length);
+                output.collect(outputKmer, outputAdjList);
+            }
+            /** last kmer */
+            pre = Kmer.moveKmer(KMER_SIZE, kmer, array[array.length - 1]);
+            next = 0;
+            adj = GENE_CODE.mergePreNextAdj(pre, next);
+            outputAdjList.set(adj, count);
+            outputKmer.set(kmer, 0, kmer.length);
+            output.collect(outputKmer, outputAdjList);
+        }
+    }
+}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java
new file mode 100755
index 0000000..676d6f1
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.graphbuilding;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+/**
+ * This class implement reducer operator of mapreduce model
+ */
+@SuppressWarnings("deprecation")
+public class GenomixReducer extends MapReduceBase implements
+        Reducer<BytesWritable, KmerCountValue, BytesWritable, KmerCountValue> {
+    KmerCountValue valWriter = new KmerCountValue();
+    static enum MyCounters { NUM_RECORDS };
+    @Override
+    public void reduce(BytesWritable key, Iterator<KmerCountValue> values,
+            OutputCollector<BytesWritable, KmerCountValue> output, Reporter reporter) throws IOException {
+        byte groupByAdjList = 0;
+        int count = 0;
+        byte bytCount = 0;
+        while (values.hasNext()) {
+            //Merge By the all adjacent Nodes;
+            KmerCountValue geneValue = values.next();            
+            groupByAdjList = (byte) (groupByAdjList | geneValue.getAdjBitMap());
+            count = count + (int) geneValue.getCount();
+        }
+        if (count >= 127)
+            bytCount = (byte) 127;
+        else
+            bytCount = (byte) count;
+        valWriter.set(groupByAdjList, bytCount);
+        output.collect(key, valWriter);
+        reporter.incrCounter(MyCounters.NUM_RECORDS, 1);
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterDriver.java
new file mode 100644
index 0000000..b3a6102
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterDriver.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.graphcountfilter;
+
+import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+@SuppressWarnings("deprecation")
+public class CountFilterDriver {
+    private static class Options {
+        @Option(name = "-inputpath", usage = "the input path", required = true)
+        public String inputPath;
+
+        @Option(name = "-outputpath", usage = "the output path", required = true)
+        public String outputPath;
+
+        @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+        public int numReducers;
+
+        @Option(name = "-count-threshold", usage = "the threshold of count", required = true)
+        public int countThreshold;
+    }
+   
+    public void run(String inputPath, String outputPath, int numReducers, int countThreshold, String defaultConfPath)
+            throws IOException {
+
+        JobConf conf = new JobConf(CountFilterDriver.class);
+        conf.setInt("countThreshold", countThreshold);
+
+        if (defaultConfPath != null) {
+            conf.addResource(new Path(defaultConfPath));
+        }
+
+        conf.setJobName("Count Filter");
+        conf.setMapperClass(CountFilterMapper.class);
+        conf.setReducerClass(CountFilterReducer.class);
+        conf.setCombinerClass(CountFilterReducer.class);
+
+        conf.setMapOutputKeyClass(BytesWritable.class);
+        conf.setMapOutputValueClass(ByteWritable.class);
+
+        conf.setInputFormat(SequenceFileInputFormat.class);
+        conf.setOutputFormat(SequenceFileOutputFormat.class);
+        
+        conf.setOutputKeyClass(BytesWritable.class);
+        conf.setOutputValueClass(ByteWritable.class);
+        
+        FileInputFormat.setInputPaths(conf, new Path(inputPath));
+        FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+        conf.setNumReduceTasks(numReducers);
+
+        FileSystem dfs = FileSystem.get(conf);
+        dfs.delete(new Path(outputPath), true);
+        JobClient.runJob(conf);
+    }
+
+    public static void main(String[] args) throws Exception {
+        Options options = new Options();
+        CmdLineParser parser = new CmdLineParser(options);
+        parser.parseArgument(args);
+        CountFilterDriver driver = new CountFilterDriver();
+        driver.run(options.inputPath, options.outputPath, options.numReducers, options.countThreshold, null);
+    }
+
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterMapper.java
new file mode 100644
index 0000000..80557e9
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterMapper.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.graphcountfilter;
+
+import java.io.IOException;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+
+@SuppressWarnings({ "unused", "deprecation" })
+public class CountFilterMapper extends MapReduceBase implements
+        Mapper<BytesWritable, KmerCountValue, BytesWritable, ByteWritable> {
+    public static int THRESHOLD;
+    public BytesWritable outputKmer = new BytesWritable();
+    public ByteWritable outputAdjList = new ByteWritable();
+    @Override
+    public void configure(JobConf job) {
+        THRESHOLD = Integer.parseInt(job.get("countThreshold"));
+    }
+    public void map(BytesWritable key, KmerCountValue value, OutputCollector<BytesWritable, ByteWritable> output,
+            Reporter reporter) throws IOException {
+        if(value.getCount() >= THRESHOLD){
+            outputKmer.set(key);
+            outputAdjList.set(value.getAdjBitMap());
+            output.collect(outputKmer, outputAdjList);
+        }
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterReducer.java
new file mode 100644
index 0000000..c692336
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterReducer.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.graphcountfilter;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+@SuppressWarnings("deprecation")
+public class CountFilterReducer extends MapReduceBase implements
+        Reducer<BytesWritable, ByteWritable, BytesWritable, ByteWritable> {
+    @Override
+    public void reduce(BytesWritable key, Iterator<ByteWritable> values,
+            OutputCollector<BytesWritable, ByteWritable> output, Reporter reporter) throws IOException {
+        output.collect(key, values.next()); //Output the Pair
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathDriver.java
new file mode 100644
index 0000000..5025a7b
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathDriver.java
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmerging;
+
+import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+@SuppressWarnings("deprecation")
+public class MergePathDriver {
+    
+    private static class Options {
+        @Option(name = "-inputpath", usage = "the input path", required = true)
+        public String inputPath;
+
+        @Option(name = "-outputpath", usage = "the output path", required = true)
+        public String outputPath;
+
+        @Option(name = "-mergeresultpath", usage = "the merging results path", required = true)
+        public String mergeResultPath;
+        
+        @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+        public int numReducers;
+
+        @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+        public int sizeKmer;
+        
+        @Option(name = "-merge-rounds", usage = "the while rounds of merging", required = true)
+        public int mergeRound;
+
+    }
+
+
+    public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer, int mergeRound, String defaultConfPath)
+            throws IOException{
+
+        JobConf conf = new JobConf(MergePathDriver.class);
+        conf.setInt("sizeKmer", sizeKmer);
+        
+        if (defaultConfPath != null) {
+            conf.addResource(new Path(defaultConfPath));
+        }
+        conf.setJobName("Initial Path-Starting-Points Table");
+        conf.setMapperClass(SNodeInitialMapper.class); 
+        conf.setReducerClass(SNodeInitialReducer.class);
+        
+        conf.setMapOutputKeyClass(BytesWritable.class);
+        conf.setMapOutputValueClass(MergePathValueWritable.class);
+        
+        conf.setInputFormat(SequenceFileInputFormat.class);
+        conf.setOutputFormat(SequenceFileOutputFormat.class);
+        
+        conf.setOutputKeyClass(BytesWritable.class);
+        conf.setOutputValueClass(MergePathValueWritable.class);
+        
+        FileInputFormat.setInputPaths(conf, new Path(inputPath));
+        FileOutputFormat.setOutputPath(conf, new Path(inputPath + "-step1"));
+        conf.setNumReduceTasks(numReducers);
+        FileSystem dfs = FileSystem.get(conf);
+        dfs.delete(new Path(inputPath + "-step1"), true);
+        JobClient.runJob(conf);
+/*----------------------------------------------------------------------*/
+        for(int iMerge = 0; iMerge < mergeRound; iMerge ++){
+        
+            conf = new JobConf(MergePathDriver.class);
+            conf.setInt("sizeKmer", sizeKmer);
+            conf.setInt("iMerge", iMerge);
+            
+            if (defaultConfPath != null) {
+                conf.addResource(new Path(defaultConfPath));
+            }
+            conf.setJobName("Path Merge");
+            
+            conf.setMapperClass(MergePathMapper.class);
+            conf.setReducerClass(MergePathReducer.class);
+            
+            conf.setMapOutputKeyClass(BytesWritable.class);
+            conf.setMapOutputValueClass(MergePathValueWritable.class);
+            
+            conf.setInputFormat(SequenceFileInputFormat.class);
+            conf.setOutputFormat(MultipleSequenceFileOutputFormat.class);
+            
+            String uncomplete = "uncomplete" + iMerge;
+            String complete = "complete" + iMerge;
+           
+            MultipleOutputs.addNamedOutput(conf, uncomplete,
+                    MergePathMultiSeqOutputFormat.class, BytesWritable.class,
+                    MergePathValueWritable.class);
+
+            MultipleOutputs.addNamedOutput(conf, complete,
+                    MergePathMultiSeqOutputFormat.class, BytesWritable.class,
+                    MergePathValueWritable.class);
+            
+            conf.setOutputKeyClass(BytesWritable.class);
+            conf.setOutputValueClass(MergePathValueWritable.class);
+            
+            FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
+            FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+            conf.setNumReduceTasks(numReducers);
+            dfs.delete(new Path(outputPath), true);
+            JobClient.runJob(conf);
+            dfs.delete(new Path(inputPath + "-step1"), true);
+            dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
+            dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));
+        }
+    }
+
+    public static void main(String[] args) throws Exception {
+        Options options = new Options();
+        CmdLineParser parser = new CmdLineParser(options);
+        parser.parseArgument(args);
+        MergePathDriver driver = new MergePathDriver();
+        driver.run(options.inputPath, options.outputPath, options.mergeResultPath, options.numReducers, options.sizeKmer, options.mergeRound, null);
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMapper.java
new file mode 100644
index 0000000..1d772b2
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMapper.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmerging;
+
+import java.io.IOException;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.Kmer;
+import edu.uci.ics.genomix.type.KmerUtil;
+import edu.uci.ics.genomix.type.Kmer.GENE_CODE;
+
+@SuppressWarnings("deprecation")
+public class MergePathMapper extends MapReduceBase implements
+        Mapper<BytesWritable, MergePathValueWritable, BytesWritable, MergePathValueWritable> {
+    public static int KMER_SIZE;
+    public BytesWritable outputKmer = new BytesWritable();
+    public MergePathValueWritable outputAdjList = new MergePathValueWritable();
+
+
+    public void configure(JobConf job) {
+        KMER_SIZE = job.getInt("sizeKmer", 0);
+    }
+
+    @Override
+    public void map(BytesWritable key, MergePathValueWritable value,
+            OutputCollector<BytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+
+        byte precursor = (byte) 0xF0;
+        byte succeed = (byte) 0x0F;
+        byte adjBitMap = value.getAdjBitMap();
+        byte bitFlag = value.getFlag();
+        precursor = (byte) (precursor & adjBitMap);
+        precursor = (byte) ((precursor & 0xff) >> 4);
+        succeed = (byte) (succeed & adjBitMap);
+
+        byte[] kmerValue = key.getBytes();
+        int kmerLength = key.getLength();
+        if (bitFlag == 1) {
+            byte succeedCode = GENE_CODE.getGeneCodeFromBitMap(succeed);
+            int originalByteNum = Kmer.getByteNumFromK(KMER_SIZE);
+            byte[] tmpKmer = KmerUtil.getLastKmerFromChain(KMER_SIZE, value.getKmerSize(), kmerValue, 0, kmerLength);
+            byte[] newKmer = KmerUtil.shiftKmerWithNextCode(KMER_SIZE, tmpKmer,0, tmpKmer.length, succeedCode);
+            outputKmer.set(newKmer, 0, originalByteNum);
+
+            int mergeByteNum = Kmer.getByteNumFromK(value.getKmerSize() - (KMER_SIZE - 1));
+            byte[] mergeKmer = KmerUtil.getFirstKmerFromChain(value.getKmerSize() - (KMER_SIZE - 1),
+                    value.getKmerSize(), kmerValue, 0, kmerLength);
+            outputAdjList.set(mergeKmer, 0, mergeByteNum, adjBitMap, bitFlag, value.getKmerSize() - (KMER_SIZE - 1));
+            output.collect(outputKmer, outputAdjList);
+        } else {
+            outputKmer.set(key);
+            outputAdjList.set(value);
+            output.collect(outputKmer, outputAdjList);
+        }
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiSeqOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiSeqOutputFormat.java
new file mode 100644
index 0000000..64fbb91
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiSeqOutputFormat.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmerging;
+
+import java.io.File;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
+
+
+public class MergePathMultiSeqOutputFormat extends MultipleSequenceFileOutputFormat<BytesWritable, MergePathValueWritable>{
+    @Override
+    protected String generateLeafFileName(String name) {
+        // TODO Auto-generated method stub System.out.println(name); 
+        String[] names = name.split("-");
+        return names[0] + File.separator + name;
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiTextOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiTextOutputFormat.java
new file mode 100644
index 0000000..29d3b68
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiTextOutputFormat.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmerging;
+
+import java.io.File;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
+
+public class MergePathMultiTextOutputFormat extends MultipleTextOutputFormat<Text, Text>{
+    @Override
+    protected String generateLeafFileName(String name) {
+        // TODO Auto-generated method stub System.out.println(name); 
+        String[] names = name.split("-");
+        return names[0] + File.separator + name;
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathReducer.java
new file mode 100644
index 0000000..2397a98
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathReducer.java
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmerging;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+
+import edu.uci.ics.genomix.type.Kmer;
+import edu.uci.ics.genomix.type.KmerUtil;
+
+@SuppressWarnings("deprecation")
+public class MergePathReducer extends MapReduceBase implements
+        Reducer<BytesWritable, MergePathValueWritable, BytesWritable, MergePathValueWritable> {
+    public BytesWritable outputKmer = new BytesWritable();
+    public static int KMER_SIZE;
+    public MergePathValueWritable outputAdjList = new MergePathValueWritable();
+    MultipleOutputs mos = null;
+    public static int I_MERGE;
+
+    public void configure(JobConf job) {
+        mos = new MultipleOutputs(job);
+        I_MERGE = Integer.parseInt(job.get("iMerge"));
+        KMER_SIZE = job.getInt("sizeKmer", 0);
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public void reduce(BytesWritable key, Iterator<MergePathValueWritable> values,
+            OutputCollector<BytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+        outputAdjList = values.next();
+
+        if (values.hasNext() == true) {
+
+            byte[] keyBytes = key.getBytes();
+            int keyLength = key.getLength();
+            if (outputAdjList.getFlag() == 1) {
+                byte adjBitMap = outputAdjList.getAdjBitMap();
+                byte bitFlag = outputAdjList.getFlag();
+                int kmerSize = outputAdjList.getKmerSize();
+                int mergeByteNum = Kmer.getByteNumFromK(KMER_SIZE + kmerSize);
+                byte[] valueBytes = outputAdjList.getBytes();
+                int valueLength = outputAdjList.getLength();
+                
+                byte[] mergeKmer = KmerUtil.mergeTwoKmer(outputAdjList.getKmerSize(), valueBytes,0, valueLength, 
+                		KMER_SIZE, keyBytes, 0, keyLength);
+                outputKmer.set(mergeKmer, 0, mergeByteNum);
+
+                outputAdjList = values.next();
+                byte nextAdj = outputAdjList.getAdjBitMap();
+                byte succeed = (byte) 0x0F;
+                succeed = (byte) (succeed & nextAdj);
+                adjBitMap = (byte) (adjBitMap & 0xF0);
+                adjBitMap = (byte) (adjBitMap | succeed);
+                outputAdjList.set(null, 0, 0, adjBitMap, bitFlag, KMER_SIZE + kmerSize);
+
+                mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputAdjList);
+            } else {
+                byte nextAdj = outputAdjList.getAdjBitMap();
+                byte succeed = (byte) 0x0F;
+                succeed = (byte) (succeed & nextAdj);
+                outputAdjList = values.next();
+                byte adjBitMap = outputAdjList.getAdjBitMap();
+                byte flag = outputAdjList.getFlag();
+                int kmerSize = outputAdjList.getKmerSize();
+                int mergeByteNum = Kmer.getByteNumFromK(KMER_SIZE + kmerSize);
+                byte[] valueBytes = outputAdjList.getBytes();
+                int valueLength =outputAdjList.getLength();
+                byte[] mergeKmer = KmerUtil.mergeTwoKmer(outputAdjList.getKmerSize(), valueBytes, 0, valueLength,
+                		KMER_SIZE, keyBytes, 0, keyLength);
+                outputKmer.set(mergeKmer, 0, mergeByteNum);
+
+                adjBitMap = (byte) (adjBitMap & 0xF0);
+                adjBitMap = (byte) (adjBitMap | succeed);
+                outputAdjList.set(null, 0, 0, adjBitMap, flag, KMER_SIZE + kmerSize);
+
+                mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputAdjList);
+            }
+        } else {
+            byte[] keyBytes = key.getBytes();
+            int keyLength = key.getLength();
+            if (outputAdjList.getFlag() != 0) {
+                byte adjBitMap = outputAdjList.getAdjBitMap();
+                byte flag = outputAdjList.getFlag();
+                int kmerSize = outputAdjList.getKmerSize();
+                int mergeByteNum = Kmer.getByteNumFromK(KMER_SIZE - 1 + kmerSize);
+                byte[] tmpKmer = KmerUtil.getFirstKmerFromChain(KMER_SIZE - 1, KMER_SIZE, keyBytes,0,keyLength);
+                byte[] valueBytes = outputAdjList.getBytes();
+                int valueLength = outputAdjList.getLength();
+                byte[] mergeKmer = KmerUtil.mergeTwoKmer(outputAdjList.getKmerSize(), valueBytes,0, valueLength,
+                		KMER_SIZE - 1, tmpKmer,0,tmpKmer.length);
+                outputKmer.set(mergeKmer, 0, mergeByteNum);
+                outputAdjList.set(null, 0, 0, adjBitMap, flag, KMER_SIZE + kmerSize);
+                mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputAdjList);
+            } else
+                mos.getCollector("uncomplete" + I_MERGE, reporter).collect(key, outputAdjList);
+        }
+    }
+
+    public void close() throws IOException {
+        // TODO Auto-generated method stub
+        mos.close();
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathValueWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathValueWritable.java
new file mode 100644
index 0000000..f1dee39
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathValueWritable.java
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmerging;
+
+import java.io.IOException;
+import java.io.DataInput;
+import java.io.DataOutput;
+import org.apache.hadoop.io.BinaryComparable;
+import org.apache.hadoop.io.WritableComparable;
+
+import edu.uci.ics.genomix.type.Kmer;
+
+public class MergePathValueWritable extends BinaryComparable implements WritableComparable<BinaryComparable> {
+
+    private static final byte[] EMPTY_BYTES = {};
+    private int size;
+    private byte[] bytes;
+
+    private byte adjBitMap;
+    private byte flag;
+    private int kmerSize;
+
+    public MergePathValueWritable() {
+        this((byte) 0, (byte) 0, (byte) 0, EMPTY_BYTES);
+    }
+
+    public MergePathValueWritable(byte adjBitMap, byte flag, byte kmerSize, byte[] bytes) {
+        this.adjBitMap = adjBitMap;
+        this.flag = flag;
+        this.kmerSize = kmerSize;
+
+        this.bytes = bytes;
+        this.size = bytes.length;
+    }
+
+    public void setSize(int size) {
+        if (size > getCapacity()) {
+            setCapacity(size * 3 / 2);
+        }
+        this.size = size;
+    }
+
+    public int getCapacity() {
+        return bytes.length;
+    }
+
+    public void setCapacity(int new_cap) {
+        if (new_cap != getCapacity()) {
+            byte[] new_data = new byte[new_cap];
+            if (new_cap < size) {
+                size = new_cap;
+            }
+            if (size != 0) {
+                System.arraycopy(bytes, 0, new_data, 0, size);
+            }
+            bytes = new_data;
+        }
+    }
+
+    public void set(MergePathValueWritable newData) {
+        set(newData.bytes, 0, newData.size, newData.adjBitMap, newData.flag, newData.kmerSize);
+    }
+
+    public void set(byte[] newData, int offset, int length, byte adjBitMap, byte flag, int kmerSize) {
+        setSize(0);        
+        if (length != 0) {
+            setSize(length);
+            System.arraycopy(newData, offset, bytes, 0, size);
+        }
+            this.adjBitMap = adjBitMap;
+            this.flag = flag;
+            this.kmerSize = kmerSize;
+    }
+
+    @Override
+    public void readFields(DataInput arg0) throws IOException {
+        // TODO Auto-generated method stub
+        setSize(0); // clear the old data
+        setSize(arg0.readInt());
+        if(size != 0){
+        arg0.readFully(bytes, 0, size);
+        }
+        adjBitMap = arg0.readByte();
+        flag = arg0.readByte();
+        kmerSize = arg0.readInt();
+    }
+
+    @Override
+    public void write(DataOutput arg0) throws IOException {
+        // TODO Auto-generated method stub
+        arg0.writeInt(size);
+        arg0.write(bytes, 0, size);
+        arg0.writeByte(adjBitMap);
+        arg0.writeByte(flag);
+        arg0.writeInt(kmerSize);
+    }
+
+    @Override
+    public byte[] getBytes() {
+        // TODO Auto-generated method stub
+        return bytes;
+    }
+
+    @Override
+    public int getLength() {
+        // TODO Auto-generated method stub
+        return size;
+    }
+
+    public byte getAdjBitMap() {
+        return this.adjBitMap;
+    }
+
+    public byte getFlag() {
+        return this.flag;
+    }
+
+    public int getKmerSize() {
+        return this.kmerSize;
+    }
+
+    public String toString() {
+        StringBuffer sb = new StringBuffer(3 * size);
+        for (int idx = 0; idx < size; idx++) {
+            // if not the first, put a blank separator in
+            if (idx != 0) {
+                sb.append(' ');
+            }
+            String num = Integer.toHexString(0xff & bytes[idx]);
+            // if it is only one digit, add a leading 0.
+            if (num.length() < 2) {
+                sb.append('0');
+            }
+            sb.append(num);
+        }
+        return Kmer.GENE_CODE.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(flag) + '\t' + sb.toString();
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialMapper.java
new file mode 100644
index 0000000..2e25c0d
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialMapper.java
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmerging;
+
+import java.io.IOException;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.KmerUtil;
+
+@SuppressWarnings("deprecation")
+public class SNodeInitialMapper extends MapReduceBase implements
+        Mapper<BytesWritable, ByteWritable, BytesWritable, MergePathValueWritable> {
+
+    public static int KMER_SIZE;
+    public BytesWritable outputKmer = new BytesWritable();
+    public MergePathValueWritable outputAdjList = new MergePathValueWritable();
+
+    public void configure(JobConf job) {
+        KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
+    }
+
+    boolean measureDegree(byte adjacent) {
+        boolean result = true;
+        switch (adjacent) {
+            case 0:
+                result = true;
+                break;
+            case 1:
+                result = false;
+                break;
+            case 2:
+                result = false;
+                break;
+            case 3:
+                result = true;
+                break;
+            case 4:
+                result = false;
+                break;
+            case 5:
+                result = true;
+                break;
+            case 6:
+                result = true;
+                break;
+            case 7:
+                result = true;
+                break;
+            case 8:
+                result = false;
+                break;
+            case 9:
+                result = true;
+                break;
+            case 10:
+                result = true;
+                break;
+            case 11:
+                result = true;
+                break;
+            case 12:
+                result = true;
+                break;
+            case 13:
+                result = true;
+                break;
+            case 14:
+                result = true;
+                break;
+            case 15:
+                result = true;
+                break;
+        }
+        return result;
+    }
+
+    @Override
+    public void map(BytesWritable key, ByteWritable value,
+            OutputCollector<BytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+
+        byte precursor = (byte) 0xF0;
+        byte succeed = (byte) 0x0F;
+        byte adjBitMap = value.get();
+        byte flag = (byte) 0;
+        precursor = (byte) (precursor & adjBitMap);
+        precursor = (byte) ((precursor & 0xff) >> 4);
+        succeed = (byte) (succeed & adjBitMap);
+        boolean inDegree = measureDegree(precursor);
+        boolean outDegree = measureDegree(succeed);
+        byte initial = 0;
+        byte[] kmerValue = key.getBytes();
+        int kmerLength = key.getLength();
+        if (inDegree == true && outDegree == false) {
+            flag = (byte) 2;
+            switch (succeed) {
+                case 1:
+                    initial = (byte) 0x00;
+                    break;
+                case 2:
+                    initial = (byte) 0x01;
+                    break;
+                case 4:
+                    initial = (byte) 0x02;
+                    break;
+                case 8:
+                    initial = (byte) 0x03;
+                    break;
+            }
+            byte[] newKmer = KmerUtil.shiftKmerWithNextCode(KMER_SIZE, kmerValue,0, kmerLength, initial);
+            outputKmer.set(newKmer, 0, kmerValue.length);
+            adjBitMap = (byte) (adjBitMap & 0xF0);
+            outputAdjList.set(null, 0, 0, adjBitMap, flag, KMER_SIZE);
+            output.collect(outputKmer, outputAdjList);
+        }
+        if (inDegree == false && outDegree == false) {
+            outputKmer.set(key);
+            outputAdjList.set(null, 0, 0, adjBitMap, flag, KMER_SIZE);
+            output.collect(outputKmer, outputAdjList);
+        }
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialReducer.java
new file mode 100644
index 0000000..44a0af2
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialReducer.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmerging;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+@SuppressWarnings("deprecation")
+public class SNodeInitialReducer extends MapReduceBase implements
+        Reducer<BytesWritable, MergePathValueWritable, BytesWritable, MergePathValueWritable> {
+    public BytesWritable outputKmer = new BytesWritable();
+    public MergePathValueWritable outputAdjList = new MergePathValueWritable();
+
+    @Override
+    public void reduce(BytesWritable key, Iterator<MergePathValueWritable> values,
+            OutputCollector<BytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+        outputAdjList = values.next();
+        outputKmer.set(key);
+        if (values.hasNext() == true) {
+            if (outputAdjList.getFlag() != 2) {
+                byte adjBitMap = outputAdjList.getAdjBitMap();
+                int kmerSize = outputAdjList.getKmerSize();
+                byte bitFlag = 1;
+                outputAdjList.set(null, 0, 0, adjBitMap, bitFlag, kmerSize);
+                output.collect(outputKmer, outputAdjList);
+                
+            } else {
+                boolean flag = false;
+                while (values.hasNext()) {
+                    outputAdjList = values.next();
+                    if (outputAdjList.getFlag() != 2) {
+                        flag = true;
+                        break;
+                    }
+                }
+                if (flag == true) {
+                    byte adjBitMap = outputAdjList.getAdjBitMap();
+                    int kmerSize = outputAdjList.getKmerSize();
+                    byte bitFlag = 1;
+                    outputAdjList.set(null, 0, 0, adjBitMap, bitFlag, kmerSize);
+                    output.collect(outputKmer, outputAdjList);
+                }
+            }
+        }
+        else {
+            output.collect(outputKmer, outputAdjList);
+        }
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatDriver.java
new file mode 100644
index 0000000..efe1589
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatDriver.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.statistics;
+
+import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+@SuppressWarnings("deprecation")
+public class GenomixStatDriver {
+    private static class Options {
+        @Option(name = "-inputpath", usage = "the input path", required = true)
+        public String inputPath;
+
+        @Option(name = "-outputpath", usage = "the output path", required = true)
+        public String outputPath;
+
+        @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+        public int numReducers;
+
+    }
+   
+    public void run(String inputPath, String outputPath, int numReducers, String defaultConfPath)
+            throws IOException {
+
+        JobConf conf = new JobConf(GenomixStatDriver.class);
+
+        if (defaultConfPath != null) {
+            conf.addResource(new Path(defaultConfPath));
+        }
+
+        conf.setJobName("Genomix Statistics");
+        conf.setMapperClass(GenomixStatMapper.class);
+        conf.setReducerClass(GenomixStatReducer.class);
+        conf.setCombinerClass(GenomixStatReducer.class);
+
+        conf.setMapOutputKeyClass(BytesWritable.class);
+        conf.setMapOutputValueClass(KmerCountValue.class);
+
+        conf.setInputFormat(SequenceFileInputFormat.class);
+        conf.setOutputFormat(SequenceFileOutputFormat.class);
+        
+        conf.setOutputKeyClass(BytesWritable.class);
+        conf.setOutputValueClass(KmerCountValue.class);
+        
+        FileInputFormat.setInputPaths(conf, new Path(inputPath));
+        FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+        conf.setNumReduceTasks(numReducers);
+
+        FileSystem dfs = FileSystem.get(conf);
+        dfs.delete(new Path(outputPath), true);
+        JobClient.runJob(conf);
+    }
+
+    public static void main(String[] args) throws Exception {
+        Options options = new Options();
+        CmdLineParser parser = new CmdLineParser(options);
+        parser.parseArgument(args);
+        GenomixStatDriver driver = new GenomixStatDriver();
+        driver.run(options.inputPath, options.outputPath, options.numReducers, null);
+    }
+}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatMapper.java
new file mode 100644
index 0000000..c5feefe
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatMapper.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.statistics;
+
+import java.io.IOException;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+@SuppressWarnings({ "unused", "deprecation" })
+public class GenomixStatMapper extends MapReduceBase implements
+        Mapper<BytesWritable, KmerCountValue, BytesWritable, KmerCountValue> {
+    
+    boolean measureDegree(byte adjacent) {
+        boolean result = true;
+        switch (adjacent) {
+            case 0:
+                result = true;
+                break;
+            case 1:
+                result = false;
+                break;
+            case 2:
+                result = false;
+                break;
+            case 3:
+                result = true;
+                break;
+            case 4:
+                result = false;
+                break;
+            case 5:
+                result = true;
+                break;
+            case 6:
+                result = true;
+                break;
+            case 7:
+                result = true;
+                break;
+            case 8:
+                result = false;
+                break;
+            case 9:
+                result = true;
+                break;
+            case 10:
+                result = true;
+                break;
+            case 11:
+                result = true;
+                break;
+            case 12:
+                result = true;
+                break;
+            case 13:
+                result = true;
+                break;
+            case 14:
+                result = true;
+                break;
+            case 15:
+                result = true;
+                break;
+        }
+        return result;
+    }
+    @Override
+    public void map(BytesWritable key, KmerCountValue value, OutputCollector<BytesWritable, KmerCountValue> output,
+            Reporter reporter) throws IOException {
+        byte precursor = (byte) 0xF0;
+        byte succeed = (byte) 0x0F;
+        byte adj = value.getAdjBitMap();
+        precursor = (byte) (precursor & adj);
+        precursor = (byte) ((precursor & 0xff) >> 4);
+        succeed = (byte) (succeed & adj);
+        boolean inDegree = measureDegree(precursor);
+        boolean outDegree = measureDegree(succeed);
+        if (inDegree == true && outDegree == false) {
+            output.collect(key, value);
+        }
+    }
+}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatReducer.java
new file mode 100644
index 0000000..ea9a915
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatReducer.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.statistics;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+@SuppressWarnings("deprecation")
+public class GenomixStatReducer extends MapReduceBase implements
+        Reducer<BytesWritable, KmerCountValue, BytesWritable, KmerCountValue> {
+    static enum MyCounters { NUM_RECORDS };
+    KmerCountValue valWriter = new KmerCountValue();
+    @Override
+    public void reduce(BytesWritable key, Iterator<KmerCountValue> values,
+            OutputCollector<BytesWritable, KmerCountValue> output, Reporter reporter) throws IOException {
+        reporter.incrCounter(MyCounters.NUM_RECORDS, 1);
+        valWriter = values.next();
+        output.collect(key, valWriter); 
+    }
+}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/gbresultschecking/ResultsCheckingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/gbresultschecking/ResultsCheckingTest.java
new file mode 100644
index 0000000..72e9b45
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/gbresultschecking/ResultsCheckingTest.java
@@ -0,0 +1,76 @@
+package edu.uci.ics.gbresultschecking;
+
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.junit.Test;
+
+@SuppressWarnings("deprecation")
+public class ResultsCheckingTest {
+    private static final String ACTUAL_RESULT_DIR = "actual4";
+    private JobConf conf = new JobConf();
+    private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+    private static final String DATA_PATH1 = "ResultsCheckingData" + "/part-00000";
+    private static final String DATA_PATH2 = "ResultsCheckingData" + "/part-00001";
+    private static final String HDFS_PATH1 = "/webmap1";
+    private static final String HDFS_PATH2 = "/webmap2";
+    private static final String RESULT_PATH = "/result4";
+    private static final int COUNT_REDUCER = 4;
+    private static final int SIZE_KMER = 3;
+    private MiniDFSCluster dfsCluster;
+    private MiniMRCluster mrCluster;
+    private FileSystem dfs;
+
+    @Test
+    public void test() throws Exception {
+        FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+        FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+        startHadoop();
+        ResultsCheckingDriver tldriver = new ResultsCheckingDriver();
+        tldriver.run(HDFS_PATH1, HDFS_PATH2, RESULT_PATH, COUNT_REDUCER, SIZE_KMER, HADOOP_CONF_PATH);
+        dumpResult();
+        cleanupHadoop();
+
+    }
+    private void startHadoop() throws IOException {
+        FileSystem lfs = FileSystem.getLocal(new Configuration());
+        lfs.delete(new Path("build"), true);
+        System.setProperty("hadoop.log.dir", "logs");
+        dfsCluster = new MiniDFSCluster(conf, 2, true, null);
+        dfs = dfsCluster.getFileSystem();
+        mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
+
+        Path src = new Path(DATA_PATH1);
+        Path dest = new Path(HDFS_PATH1 + "/");
+        dfs.mkdirs(dest);
+        dfs.copyFromLocalFile(src, dest);
+        src = new Path(DATA_PATH2);
+        dest = new Path(HDFS_PATH2 + "/");
+        dfs.copyFromLocalFile(src, dest);
+        
+        DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+        conf.writeXml(confOutput);
+        confOutput.flush();
+        confOutput.close();
+    }
+
+    private void cleanupHadoop() throws IOException {
+        mrCluster.shutdown();
+        dfsCluster.shutdown();
+    }
+
+    private void dumpResult() throws IOException {
+        Path src = new Path(RESULT_PATH);
+        Path dest = new Path(ACTUAL_RESULT_DIR + "/");
+        dfs.copyToLocalFile(src, dest);
+    }
+}
+
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
new file mode 100755
index 0000000..6bd3bd5
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
@@ -0,0 +1,122 @@
+package edu.uci.ics.graphbuilding;
+
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.type.Kmer;
+import edu.uci.ics.genomix.type.KmerCountValue;
+import edu.uci.ics.utils.TestUtils;
+/**
+ * This class test the correctness of graphbuilding program
+ */
+@SuppressWarnings("deprecation")
+public class GraphBuildingTest {
+
+    private static final String ACTUAL_RESULT_DIR = "actual1";
+    private static final String COMPARE_DIR = "compare";
+    private JobConf conf = new JobConf();
+    private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+    private static final String DATA_PATH = "data/webmap/Test.txt";
+    private static final String HDFS_PATH = "/webmap";
+    private static final String RESULT_PATH = "/result1";
+    private static final String EXPECTED_PATH = "expected/result1";
+    private static final String TEST_SOURCE_DIR = COMPARE_DIR + RESULT_PATH + "/comparesource.txt";
+    private static final int COUNT_REDUCER = 4;
+    private static final int SIZE_KMER = 3;
+    
+    private MiniDFSCluster dfsCluster;
+    private MiniMRCluster mrCluster;
+    private FileSystem dfs;
+
+    @SuppressWarnings("resource")
+    @Test
+    public void test() throws Exception {
+        FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+        FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+        startHadoop();
+
+        // run graph transformation tests
+        GenomixDriver tldriver = new GenomixDriver();
+        tldriver.run(HDFS_PATH, RESULT_PATH, COUNT_REDUCER, SIZE_KMER, HADOOP_CONF_PATH);
+
+        SequenceFile.Reader reader = null;
+        Path path = new Path(RESULT_PATH + "/part-00000");
+        reader = new SequenceFile.Reader(dfs, path, conf); 
+        BytesWritable key = (BytesWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+        KmerCountValue value = (KmerCountValue) ReflectionUtils.newInstance(reader.getValueClass(), conf);
+        File filePathTo = new File(TEST_SOURCE_DIR);
+        BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
+        
+        while (reader.next(key, value)) {
+            bw.write(Kmer.recoverKmerFrom(SIZE_KMER, key.getBytes(), 0, key.getLength()) + "\t" + value.toString());
+            bw.newLine();
+        }
+        bw.close();
+
+        dumpResult();
+        TestUtils.compareWithResult(new File(TEST_SOURCE_DIR), new File(EXPECTED_PATH));
+
+        cleanupHadoop();
+
+    }
+
+    private void startHadoop() throws IOException {
+        FileSystem lfs = FileSystem.getLocal(new Configuration());
+        lfs.delete(new Path("build"), true);
+        System.setProperty("hadoop.log.dir", "logs");
+        dfsCluster = new MiniDFSCluster(conf, 2, true, null);
+        dfs = dfsCluster.getFileSystem();
+        mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
+
+        Path src = new Path(DATA_PATH);
+        Path dest = new Path(HDFS_PATH + "/");
+        dfs.mkdirs(dest);
+        dfs.copyFromLocalFile(src, dest);
+
+        DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+        conf.writeXml(confOutput);
+        confOutput.flush();
+        confOutput.close();
+    }
+
+    private void cleanupHadoop() throws IOException {
+        mrCluster.shutdown();
+        dfsCluster.shutdown();
+    }
+
+    private void dumpResult() throws IOException {
+        Path src = new Path(RESULT_PATH);
+        Path dest = new Path(ACTUAL_RESULT_DIR);
+        dfs.copyToLocalFile(src, dest);
+    }
+}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphcountfilter/CountFilterTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphcountfilter/CountFilterTest.java
new file mode 100644
index 0000000..4bf0be7
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphcountfilter/CountFilterTest.java
@@ -0,0 +1,102 @@
+package edu.uci.ics.graphcountfilter;
+
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.junit.Test;
+import edu.uci.ics.genomix.type.Kmer;
+import edu.uci.ics.utils.TestUtils;
+
+
+@SuppressWarnings("deprecation")
+public class CountFilterTest {
+    private static final String ACTUAL_RESULT_DIR = "actual2";
+    private static final String COMPARE_DIR = "compare";
+    private JobConf conf = new JobConf();
+    private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+    private static final String DATA_PATH = "actual1" + "/result1" + "/part-00000";
+    private static final String HDFS_PATH = "/webmap";
+    private static final String RESULT_PATH = "/result2";
+    private static final String EXPECTED_PATH = "expected/result2";
+    private static final String TEST_SOURCE_DIR = COMPARE_DIR + RESULT_PATH + "/comparesource.txt";
+    private static final int COUNT_REDUCER = 4;
+    private static final int SIZE_KMER = 3;
+    private MiniDFSCluster dfsCluster;
+    private MiniMRCluster mrCluster;
+    private FileSystem dfs;
+
+    @SuppressWarnings("resource")
+    @Test
+    public void test() throws Exception {
+        FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+        FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+        startHadoop();
+
+        // run graph transformation tests
+        CountFilterDriver tldriver = new CountFilterDriver();
+        tldriver.run(HDFS_PATH, RESULT_PATH, COUNT_REDUCER, 1, HADOOP_CONF_PATH);
+
+        SequenceFile.Reader reader = null;
+        Path path = new Path(RESULT_PATH + "/part-00000");
+        reader = new SequenceFile.Reader(dfs, path, conf);
+        BytesWritable key = (BytesWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+        ByteWritable value = (ByteWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
+        File filePathTo = new File(TEST_SOURCE_DIR);
+        BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
+        while (reader.next(key, value)) {
+            bw.write(Kmer.recoverKmerFrom(SIZE_KMER, key.getBytes(), 0, key.getLength()) + "\t" + value.toString());
+            bw.newLine();
+        }
+        bw.close();
+
+        dumpResult();
+        TestUtils.compareWithResult(new File(TEST_SOURCE_DIR), new File(EXPECTED_PATH));
+
+        cleanupHadoop();
+
+    }
+    private void startHadoop() throws IOException {
+        FileSystem lfs = FileSystem.getLocal(new Configuration());
+        lfs.delete(new Path("build"), true);
+        System.setProperty("hadoop.log.dir", "logs");
+        dfsCluster = new MiniDFSCluster(conf, 2, true, null);
+        dfs = dfsCluster.getFileSystem();
+        mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
+
+        Path src = new Path(DATA_PATH);
+        Path dest = new Path(HDFS_PATH + "/");
+        dfs.mkdirs(dest);
+        dfs.copyFromLocalFile(src, dest);
+
+        DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+        conf.writeXml(confOutput);
+        confOutput.flush();
+        confOutput.close();
+    }
+
+    private void cleanupHadoop() throws IOException {
+        mrCluster.shutdown();
+        dfsCluster.shutdown();
+    }
+
+    private void dumpResult() throws IOException {
+        Path src = new Path(RESULT_PATH);
+        Path dest = new Path(ACTUAL_RESULT_DIR + "/");
+        dfs.copyToLocalFile(src, dest);
+    }
+}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/pathmerging/MergePathTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/pathmerging/MergePathTest.java
new file mode 100644
index 0000000..7b8d285
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/pathmerging/MergePathTest.java
@@ -0,0 +1,102 @@
+package edu.uci.ics.pathmerging;
+
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.junit.Test;
+import edu.uci.ics.utils.TestUtils;
+
+@SuppressWarnings("deprecation")
+public class MergePathTest {
+    private static final String ACTUAL_RESULT_DIR = "actual3";
+    private static final String COMPARE_DIR = "compare";
+    private JobConf conf = new JobConf();
+    private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+    private static final String DATA_PATH = "actual2" + "/result2" + "/part-00000";
+    private static final String HDFS_PATH = "/webmap";
+    private static final String HDFA_PATH_DATA = "/webmapdata";
+    
+    private static final String RESULT_PATH = "/result3";
+    private static final String EXPECTED_PATH = "expected/result3";
+    private static final String TEST_SOURCE_DIR = COMPARE_DIR + RESULT_PATH + "/comparesource.txt";
+    private static final int COUNT_REDUCER = 4;
+    private static final int SIZE_KMER = 3;
+
+    private MiniDFSCluster dfsCluster;
+    private MiniMRCluster mrCluster;
+    private FileSystem dfs;
+
+    @SuppressWarnings("resource")
+    @Test
+    public void test() throws Exception {
+        FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+        FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+        startHadoop();
+
+        MergePathDriver tldriver = new MergePathDriver();
+        tldriver.run(HDFS_PATH, RESULT_PATH, HDFA_PATH_DATA, COUNT_REDUCER, SIZE_KMER, 3, HADOOP_CONF_PATH);
+
+        SequenceFile.Reader reader = null;
+        Path path = new Path(HDFA_PATH_DATA + "/complete2" + "/complete2-r-00000");
+        reader = new SequenceFile.Reader(dfs, path, conf);
+        BytesWritable key = (BytesWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+        MergePathValueWritable value = (MergePathValueWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
+        File filePathTo = new File(TEST_SOURCE_DIR);
+        BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
+        while (reader.next(key, value)) {
+            bw.write(key.toString() + "\t" + value.getAdjBitMap() + "\t" + value.getFlag());
+            bw.newLine();
+        }
+        bw.close();
+
+        dumpResult();
+        TestUtils.compareWithResult(new File(TEST_SOURCE_DIR), new File(EXPECTED_PATH));
+
+        cleanupHadoop();
+
+    }
+    private void startHadoop() throws IOException {
+        FileSystem lfs = FileSystem.getLocal(new Configuration());
+        lfs.delete(new Path("build"), true);
+        System.setProperty("hadoop.log.dir", "logs");
+        dfsCluster = new MiniDFSCluster(conf, 2, true, null);
+        dfs = dfsCluster.getFileSystem();
+        mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
+
+        Path src = new Path(DATA_PATH);
+        Path dest = new Path(HDFS_PATH + "/");
+        dfs.mkdirs(dest);
+        dfs.copyFromLocalFile(src, dest);
+        Path data = new Path(HDFA_PATH_DATA + "/");
+        dfs.mkdirs(data);
+   
+        DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+        conf.writeXml(confOutput);
+        confOutput.flush();
+        confOutput.close();
+    }
+
+    private void cleanupHadoop() throws IOException {
+        mrCluster.shutdown();
+        dfsCluster.shutdown();
+    }
+
+    private void dumpResult() throws IOException {
+        Path src = new Path(HDFA_PATH_DATA + "/" + "complete2");
+        Path dest = new Path(ACTUAL_RESULT_DIR + "/");
+        dfs.copyToLocalFile(src, dest);
+    }
+}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java
new file mode 100755
index 0000000..015017a
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java
@@ -0,0 +1,75 @@
+package edu.uci.ics.utils;
+
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+
+/**
+ * This class offer the service for graphbuildingtest.class
+ */
+public class TestUtils {
+    public static void compareWithResult(File expectedFile, File actualFile) throws Exception {
+        BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
+        BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
+        String lineExpected, lineActual;
+        int num = 1;
+        try {
+            while ((lineExpected = readerExpected.readLine()) != null) {
+                lineActual = readerActual.readLine();
+                // Assert.assertEquals(lineExpected, lineActual);
+                if (lineActual == null) {
+                    throw new Exception("Actual result changed at line " + num + ":\n< " + lineExpected + "\n> ");
+                }
+                if (!equalStrings(lineExpected, lineActual)) {
+                    throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
+                            + lineActual);
+                }
+                ++num;
+            }
+            lineActual = readerActual.readLine();
+            if (lineActual != null) {
+                throw new Exception("Actual result changed at line " + num + ":\n< \n> " + lineActual);
+            }
+        } finally {
+            readerExpected.close();
+            readerActual.close();
+        }
+    }
+
+    private static boolean equalStrings(String s1, String s2) {
+        String[] rowsOne = s1.split("\t");
+        String[] rowsTwo = s2.split("\t");
+
+        if (rowsOne.length != rowsTwo.length)
+            return false;
+
+        for (int i = 0; i < rowsOne.length; i++) {
+            String row1 = rowsOne[i];
+            String row2 = rowsTwo[i];
+
+            if (row1.equals(row2))
+                continue;
+            else
+                return false;
+        }
+        return true;
+    }
+
+    public static void main(String[] args) throws Exception {
+        TestUtils TUtils = new TestUtils();
+    }
+}
diff --git a/genomix/genomix-hadoop/testactual/source.txt b/genomix/genomix-hadoop/testactual/source.txt
new file mode 100644
index 0000000..aa7a107
--- /dev/null
+++ b/genomix/genomix-hadoop/testactual/source.txt
@@ -0,0 +1,3 @@
+ATAGAAGATCGA	A|T	1
+AATAGAAGATCG	|A	1
+TAGAAGATCGAT	A|	1
diff --git a/genomix/genomix-hyracks/HyracksCodeFormatProfile.xml b/genomix/genomix-hyracks/HyracksCodeFormatProfile.xml
new file mode 100644
index 0000000..733ca5c
--- /dev/null
+++ b/genomix/genomix-hyracks/HyracksCodeFormatProfile.xml
@@ -0,0 +1,784 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<profiles version="11">
+	<profile kind="CodeFormatterProfile" name="HyracksCodeFormatProfile"
+		version="11">
+		<setting
+			id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags"
+			value="insert" />
+		<setting id="org.eclipse.jdt.core.formatter.disabling_tag"
+			value="@formatter:off" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration"
+			value="end_of_line" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries"
+			value="true" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation"
+			value="do not insert" />
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field"
+			value="0" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while"
+			value="do not insert" />
+		<setting id="org.eclipse.jdt.core.formatter.use_on_off_tags"
+			value="false" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line"
+			value="false" />
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_ellipsis"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases"
+			value="true" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields"
+			value="16" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer"
+			value="16" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression"
+			value="80" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_binary_operator"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer"
+			value="end_of_line" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while"
+			value="insert" />
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package"
+			value="1" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters"
+			value="insert" />
+		<setting id="org.eclipse.jdt.core.formatter.continuation_indentation"
+			value="2" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation"
+			value="16" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces"
+			value="do not insert" />
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk"
+			value="1" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_binary_operator"
+			value="insert" />
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package"
+			value="0" />
+		<setting id="org.eclipse.jdt.core.compiler.source" value="1.5" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration"
+			value="do not insert" />
+		<setting id="org.eclipse.jdt.core.formatter.comment.format_line_comments"
+			value="false" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations"
+			value="insert" />
+		<setting id="org.eclipse.jdt.core.formatter.join_wrapped_lines"
+			value="true" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call"
+			value="16" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type"
+			value="1" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.align_type_members_on_columns"
+			value="false" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation"
+			value="16" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.comment.indent_parameter_description"
+			value="true" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment"
+			value="false" />
+		<setting id="org.eclipse.jdt.core.formatter.lineSplit" value="120" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration"
+			value="0" />
+		<setting id="org.eclipse.jdt.core.formatter.indentation.size"
+			value="4" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration"
+			value="do not insert" />
+		<setting id="org.eclipse.jdt.core.formatter.enabling_tag"
+			value="@formatter:on" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration"
+			value="16" />
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_assignment"
+			value="0" />
+		<setting id="org.eclipse.jdt.core.compiler.problem.assertIdentifier"
+			value="error" />
+		<setting id="org.eclipse.jdt.core.formatter.tabulation.char"
+			value="space" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body"
+			value="true" />
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method"
+			value="1" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested"
+			value="true" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line"
+			value="false" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration"
+			value="16" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration"
+			value="end_of_line" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_method_declaration"
+			value="0" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if"
+			value="do not insert" />
+		<setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch"
+			value="end_of_line" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments"
+			value="do not insert" />
+		<setting id="org.eclipse.jdt.core.compiler.problem.enumIdentifier"
+			value="error" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch"
+			value="true" />
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_ellipsis"
+			value="do not insert" />
+		<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block"
+			value="end_of_line" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration"
+			value="end_of_line" />
+		<setting id="org.eclipse.jdt.core.formatter.compact_else_if"
+			value="true" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column"
+			value="true" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant"
+			value="end_of_line" />
+		<setting id="org.eclipse.jdt.core.formatter.comment.indent_root_tags"
+			value="true" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression"
+			value="do not insert" />
+		<setting id="org.eclipse.jdt.core.formatter.tabulation.size"
+			value="4" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment"
+			value="true" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration"
+			value="16" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator"
+			value="insert" />
+		<setting id="org.eclipse.jdt.core.formatter.indent_empty_lines"
+			value="false" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case"
+			value="end_of_line" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve"
+			value="1" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression"
+			value="16" />
+		<setting id="org.eclipse.jdt.core.compiler.compliance" value="1.5" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer"
+			value="2" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression"
+			value="16" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line"
+			value="false" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration"
+			value="insert" />
+		<setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line"
+			value="false" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration"
+			value="16" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression"
+			value="16" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration"
+			value="end_of_line" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while"
+			value="do not insert" />
+		<setting id="org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode"
+			value="enabled" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line"
+			value="false" />
+		<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_label"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant"
+			value="48" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments"
+			value="true" />
+		<setting id="org.eclipse.jdt.core.formatter.comment.line_length"
+			value="9999" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.blank_lines_between_import_groups"
+			value="1" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration"
+			value="end_of_line" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body"
+			value="0" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header"
+			value="true" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration"
+			value="do not insert" />
+		<setting id="org.eclipse.jdt.core.formatter.wrap_before_binary_operator"
+			value="true" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header"
+			value="true" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations"
+			value="1" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block"
+			value="true" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration"
+			value="16" />
+		<setting id="org.eclipse.jdt.core.formatter.join_lines_in_comments"
+			value="false" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations"
+			value="do not insert" />
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if"
+			value="16" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases"
+			value="true" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration"
+			value="do not insert" />
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports"
+			value="1" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert"
+			value="insert" />
+		<setting id="org.eclipse.jdt.core.formatter.comment.format_html"
+			value="true" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration"
+			value="16" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator"
+			value="do not insert" />
+		<setting id="org.eclipse.jdt.core.formatter.comment.format_source_code"
+			value="true" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration"
+			value="16" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer"
+			value="insert" />
+		<setting id="org.eclipse.jdt.core.compiler.codegen.targetPlatform"
+			value="1.5" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations"
+			value="false" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation"
+			value="0" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_member"
+			value="insert" />
+		<setting id="org.eclipse.jdt.core.formatter.comment.format_header"
+			value="false" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.comment.format_block_comments"
+			value="false" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant"
+			value="do not insert" />
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants"
+			value="49" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header"
+			value="true" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration"
+			value="end_of_line" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries"
+			value="true" />
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports"
+			value="1" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header"
+			value="true" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for"
+			value="insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column"
+			value="false" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments"
+			value="do not insert" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column"
+			value="false" />
+		<setting
+			id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line"
+			value="false" />
+	</profile>
+</profiles>
diff --git a/genomix/genomix-hyracks/pom.xml b/genomix/genomix-hyracks/pom.xml
new file mode 100644
index 0000000..6d13bc9
--- /dev/null
+++ b/genomix/genomix-hyracks/pom.xml
@@ -0,0 +1,257 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+	<artifactId>genomix-hyracks</artifactId>
+	<name>genomix-hyracks</name>
+
+	<parent>
+		<groupId>edu.uci.ics.hyracks</groupId>
+		<artifactId>genomix</artifactId>
+		<version>0.2.4-SNAPSHOT</version>
+	</parent>
+
+	<properties>
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+	</properties>
+
+	<build>
+		<plugins>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-compiler-plugin</artifactId>
+				<version>2.0.2</version>
+				<configuration>
+					<source>1.7</source>
+					<target>1.7</target>
+					<fork>true</fork>
+				</configuration>
+			</plugin>
+			<plugin>
+				<groupId>org.codehaus.mojo</groupId>
+				<artifactId>appassembler-maven-plugin</artifactId>
+				<executions>
+					<execution>
+						<configuration>
+							<programs>
+								<program>
+									<mainClass>edu.uci.ics.genomix.driver.Driver</mainClass>
+									<name>genomix</name>
+								</program>
+							</programs>
+							<repositoryLayout>flat</repositoryLayout>
+							<repositoryName>lib</repositoryName>
+						</configuration>
+						<phase>package</phase>
+						<goals>
+							<goal>assemble</goal>
+						</goals>
+					</execution>
+				</executions>
+			</plugin>
+			<plugin>
+				<artifactId>maven-assembly-plugin</artifactId>
+				<version>2.2-beta-5</version>
+				<configuration>
+					<descriptorRefs>
+						<descriptorRef>jar-with-dependencies</descriptorRef>
+					</descriptorRefs>
+				</configuration>
+				<executions>
+					<execution>
+						<configuration>
+							<descriptors>
+								<descriptor>src/main/assembly/binary-assembly.xml</descriptor>
+							</descriptors>
+						</configuration>
+						<phase>package</phase>
+						<goals>
+							<goal>attached</goal>
+						</goals>
+					</execution>
+					<execution>
+						<id>make-my-jar-with-dependencies</id>
+						<phase>package</phase>
+						<goals>
+							<goal>single</goal>
+						</goals>
+					</execution>
+				</executions>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-surefire-plugin</artifactId>
+				<version>2.7.2</version>
+				<configuration>
+					<forkMode>pertest</forkMode>
+					<argLine>-enableassertions -Xmx512m -XX:MaxPermSize=300m
+						-Dfile.encoding=UTF-8
+						-Djava.util.logging.config.file=src/test/resources/logging.properties</argLine>
+					<includes>
+						<include>**/*TestSuite.java</include>
+						<include>**/*Test.java</include>
+					</includes>
+				</configuration>
+			</plugin>
+			<plugin>
+				<artifactId>maven-clean-plugin</artifactId>
+				<configuration>
+					<filesets>
+						<fileset>
+							<directory>.</directory>
+							<includes>
+								<include>teststore*</include>
+								<include>edu*</include>
+								<include>actual*</include>
+								<include>build*</include>
+								<include>expect*</include>
+								<include>ClusterController*</include>
+							</includes>
+						</fileset>
+					</filesets>
+				</configuration>
+			</plugin>
+			<plugin>
+				<artifactId>maven-resources-plugin</artifactId>
+				<version>2.5</version>
+				<executions>
+					<execution>
+						<id>copy-scripts</id>
+						<!-- here the phase you need -->
+						<phase>package</phase>
+						<goals>
+							<goal>copy-resources</goal>
+						</goals>
+						<configuration>
+							<outputDirectory>target/appassembler/bin</outputDirectory>
+							<resources>
+								<resource>
+									<directory>src/main/resources/scripts</directory>
+								</resource>
+							</resources>
+						</configuration>
+					</execution>
+					<execution>
+						<id>copy-conf</id>
+						<!-- here the phase you need -->
+						<phase>package</phase>
+						<goals>
+							<goal>copy-resources</goal>
+						</goals>
+						<configuration>
+							<outputDirectory>target/appassembler/conf</outputDirectory>
+							<resources>
+								<resource>
+									<directory>src/main/resources/conf</directory>
+								</resource>
+							</resources>
+						</configuration>
+					</execution>
+				</executions>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-resources-plugin</artifactId>
+				<version>2.6</version>
+			</plugin>
+		</plugins>
+	</build>
+
+	<dependencies>
+		<dependency>
+			<groupId>junit</groupId>
+			<artifactId>junit</artifactId>
+			<version>4.8.1</version>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
+			<groupId>edu.uci.ics.hyracks</groupId>
+			<artifactId>hyracks-dataflow-std</artifactId>
+			<version>0.2.4-SNAPSHOT</version>
+			<type>jar</type>
+			<scope>compile</scope>
+		</dependency>
+		<dependency>
+			<groupId>edu.uci.ics.hyracks</groupId>
+			<artifactId>hyracks-api</artifactId>
+			<version>0.2.4-SNAPSHOT</version>
+			<type>jar</type>
+			<scope>compile</scope>
+		</dependency>
+		<dependency>
+			<groupId>edu.uci.ics.hyracks</groupId>
+			<artifactId>hyracks-dataflow-common</artifactId>
+			<version>0.2.4-SNAPSHOT</version>
+			<type>jar</type>
+			<scope>compile</scope>
+		</dependency>
+		<dependency>
+			<groupId>edu.uci.ics.hyracks</groupId>
+			<artifactId>hyracks-data-std</artifactId>
+			<version>0.2.4-SNAPSHOT</version>
+		</dependency>
+		<dependency>
+			<groupId>edu.uci.ics.hyracks</groupId>
+			<artifactId>hyracks-control-cc</artifactId>
+			<version>0.2.4-SNAPSHOT</version>
+			<type>jar</type>
+			<scope>compile</scope>
+		</dependency>
+		<dependency>
+			<groupId>edu.uci.ics.hyracks</groupId>
+			<artifactId>hyracks-control-nc</artifactId>
+			<version>0.2.4-SNAPSHOT</version>
+			<type>jar</type>
+			<scope>compile</scope>
+		</dependency>
+		<dependency>
+			<groupId>com.kenai.nbpwr</groupId>
+			<artifactId>org-apache-commons-io</artifactId>
+			<version>1.3.1-201002241208</version>
+			<type>nbm</type>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
+			<groupId>edu.uci.ics.hyracks.examples</groupId>
+			<artifactId>hyracks-integration-tests</artifactId>
+			<version>0.2.4-SNAPSHOT</version>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
+			<groupId>edu.uci.ics.hyracks</groupId>
+			<artifactId>hyracks-ipc</artifactId>
+			<version>0.2.4-SNAPSHOT</version>
+			<type>jar</type>
+			<scope>compile</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-core</artifactId>
+			<version>0.20.2</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-test</artifactId>
+			<version>0.20.2</version>
+		</dependency>
+		<dependency>
+			<groupId>edu.uci.ics.hyracks</groupId>
+			<artifactId>hyracks-hdfs-core</artifactId>
+			<version>0.2.4-SNAPSHOT</version>
+		</dependency>
+		<dependency>
+			<groupId>edu.uci.ics.hyracks</groupId>
+			<artifactId>hyracks-hdfs-core</artifactId>
+			<version>0.2.4-SNAPSHOT</version>
+			<type>test-jar</type>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
+			<groupId>edu.uci.ics.hyracks</groupId>
+			<artifactId>genomix-data</artifactId>
+			<version>0.2.4-SNAPSHOT</version>
+			<type>jar</type>
+			<scope>compile</scope>
+		</dependency>
+	</dependencies>
+</project>
diff --git a/genomix/genomix-hyracks/src/main/assembly/binary-assembly.xml b/genomix/genomix-hyracks/src/main/assembly/binary-assembly.xml
new file mode 100644
index 0000000..68d424a
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/assembly/binary-assembly.xml
@@ -0,0 +1,19 @@
+<assembly>
+	<id>binary-assembly</id>
+	<formats>
+		<format>zip</format>
+		<format>dir</format>
+	</formats>
+	<includeBaseDirectory>false</includeBaseDirectory>
+	<fileSets>
+		<fileSet>
+			<directory>target/appassembler/bin</directory>
+			<outputDirectory>bin</outputDirectory>
+			<fileMode>0755</fileMode>
+		</fileSet>
+		<fileSet>
+			<directory>target/appassembler/lib</directory>
+			<outputDirectory>lib</outputDirectory>
+		</fileSet>
+	</fileSets>
+</assembly>
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/ByteSerializerDeserializer.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/ByteSerializerDeserializer.java
new file mode 100644
index 0000000..98159c0
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/ByteSerializerDeserializer.java
@@ -0,0 +1,47 @@
+package edu.uci.ics.genomix.data.std.accessors;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
+public class ByteSerializerDeserializer implements
+		ISerializerDeserializer<Byte> {
+
+	private static final long serialVersionUID = 1L;
+
+	public static final ByteSerializerDeserializer INSTANCE = new ByteSerializerDeserializer();
+
+	private ByteSerializerDeserializer() {
+	}
+
+	@Override
+	public Byte deserialize(DataInput in) throws HyracksDataException {
+		try {
+			return in.readByte();
+		} catch (IOException e) {
+			throw new HyracksDataException(e);
+		}
+	}
+
+	@Override
+	public void serialize(Byte instance, DataOutput out)
+			throws HyracksDataException {
+		try {
+			out.writeByte(instance.intValue());
+		} catch (IOException e) {
+			throw new HyracksDataException(e);
+		}
+	}
+
+	public static byte getByte(byte[] bytes, int offset) {
+		return bytes[offset];
+	}
+
+	public static void putByte(byte val, byte[] bytes, int offset) {
+		bytes[offset] = val;
+	}
+
+}
\ No newline at end of file
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/KmerBinaryHashFunctionFamily.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/KmerBinaryHashFunctionFamily.java
new file mode 100644
index 0000000..e7aa481
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/KmerBinaryHashFunctionFamily.java
@@ -0,0 +1,29 @@
+package edu.uci.ics.genomix.data.std.accessors;

+

+import edu.uci.ics.genomix.data.std.primitive.KmerPointable;

+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;

+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFamily;

+

+public class KmerBinaryHashFunctionFamily implements IBinaryHashFunctionFamily {

+	private static final long serialVersionUID = 1L;

+

+	@Override

+	public IBinaryHashFunction createBinaryHashFunction(final int seed) {

+

+		return new IBinaryHashFunction() {

+			private KmerPointable p = new KmerPointable();

+			

+			@Override

+			public int hash(byte[] bytes, int offset, int length) {

+				if (length + offset >= bytes.length)

+					throw new IllegalStateException("out of bound");

+				p.set(bytes, offset, length);

+				int hash = p.hash() * (seed + 1);

+				if (hash < 0) {

+					hash = -(hash+1);

+				}

+				return hash;

+			}

+		};

+	}

+}
\ No newline at end of file
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/KmerHashPartitioncomputerFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/KmerHashPartitioncomputerFactory.java
new file mode 100644
index 0000000..231470a
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/KmerHashPartitioncomputerFactory.java
@@ -0,0 +1,44 @@
+package edu.uci.ics.genomix.data.std.accessors;

+

+import java.nio.ByteBuffer;

+

+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;

+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputer;

+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputerFactory;

+

+public class KmerHashPartitioncomputerFactory implements

+		ITuplePartitionComputerFactory {

+

+	private static final long serialVersionUID = 1L;

+

+	public static int hashBytes(byte[] bytes, int offset, int length) {

+		int hash = 1;

+		for (int i = offset; i < offset + length; i++)

+			hash = (31 * hash) + (int) bytes[i];

+		return hash;

+	}

+

+	@Override

+	public ITuplePartitionComputer createPartitioner() {

+		return new ITuplePartitionComputer() {

+			@Override

+			public int partition(IFrameTupleAccessor accessor, int tIndex,

+					int nParts) {

+				int startOffset = accessor.getTupleStartOffset(tIndex);

+				int fieldOffset = accessor.getFieldStartOffset(tIndex, 0);

+				int slotLength = accessor.getFieldSlotsLength();

+				int fieldLength = accessor.getFieldLength(tIndex, 0);

+

+				ByteBuffer buf = accessor.getBuffer();

+

+				int hash = hashBytes(buf.array(), startOffset + fieldOffset

+						+ slotLength, fieldLength);

+				if (hash < 0){

+					hash = -(hash+1);

+				}

+

+				return hash % nParts;

+			}

+		};

+	}

+}

diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/KmerNormarlizedComputerFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/KmerNormarlizedComputerFactory.java
new file mode 100644
index 0000000..1ca90c2
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/KmerNormarlizedComputerFactory.java
@@ -0,0 +1,23 @@
+package edu.uci.ics.genomix.data.std.accessors;

+

+import edu.uci.ics.genomix.data.std.primitive.KmerPointable;

+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;

+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;

+

+public class KmerNormarlizedComputerFactory implements

+		INormalizedKeyComputerFactory {

+	private static final long serialVersionUID = 8735044913496854551L;

+

+	@Override

+	public INormalizedKeyComputer createNormalizedKeyComputer() {

+		return new INormalizedKeyComputer() {

+			/**

+			 * read one int from Kmer, make sure this int is consistent whith Kmer compartor 

+			 */

+			@Override

+			public int normalize(byte[] bytes, int start, int length) {

+				return KmerPointable.getIntReverse(bytes, start, length);

+			}

+		};

+	}

+}
\ No newline at end of file
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/primitive/KmerPointable.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/primitive/KmerPointable.java
new file mode 100644
index 0000000..ae07355
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/primitive/KmerPointable.java
@@ -0,0 +1,138 @@
+package edu.uci.ics.genomix.data.std.primitive;

+

+import edu.uci.ics.genomix.data.std.accessors.KmerHashPartitioncomputerFactory;

+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;

+import edu.uci.ics.hyracks.data.std.api.AbstractPointable;

+import edu.uci.ics.hyracks.data.std.api.IComparable;

+import edu.uci.ics.hyracks.data.std.api.IHashable;

+import edu.uci.ics.hyracks.data.std.api.INumeric;

+import edu.uci.ics.hyracks.data.std.api.IPointable;

+import edu.uci.ics.hyracks.data.std.api.IPointableFactory;

+

+public final class KmerPointable extends AbstractPointable implements

+		IHashable, IComparable, INumeric {

+	public static final ITypeTraits TYPE_TRAITS = new ITypeTraits() {

+		private static final long serialVersionUID = 1L;

+

+		@Override

+		public boolean isFixedLength() {

+			return false;

+		}

+

+		@Override

+		public int getFixedLength() {

+			return -1;

+		}

+	};

+

+	public static final IPointableFactory FACTORY = new IPointableFactory() {

+		private static final long serialVersionUID = 1L;

+

+		@Override

+		public IPointable createPointable() {

+			return new KmerPointable();

+		}

+

+		@Override

+		public ITypeTraits getTypeTraits() {

+			return TYPE_TRAITS;

+		}

+	};

+

+	public static short getShortReverse(byte[] bytes, int offset, int length) {

+		if (length < 2) {

+			return (short) (bytes[offset] & 0xff);

+		}

+		return (short) (((bytes[offset + length - 1] & 0xff) << 8) + (bytes[offset

+				+ length - 2] & 0xff));

+	}

+

+	public static int getIntReverse(byte[] bytes, int offset, int length) {

+		int shortValue = getShortReverse(bytes, offset, length) & 0xffff;

+

+		if (length < 3) {

+			return shortValue;

+		}

+		if (length == 3) {

+			return (((bytes[offset + 2] & 0xff) << 16)

+					+ ((bytes[offset + 1] & 0xff) << 8) + ((bytes[offset] & 0xff)));

+		}

+		return ((bytes[offset + length - 1] & 0xff) << 24)

+				+ ((bytes[offset + length - 2] & 0xff) << 16)

+				+ ((bytes[offset + length - 3] & 0xff) << 8)

+				+ ((bytes[offset + length - 4] & 0xff) << 0);

+	}

+

+	public static long getLongReverse(byte[] bytes, int offset, int length) {

+		if (length < 8) {

+			return ((long) getIntReverse(bytes, offset, length)) & 0x0ffffffffL;

+		}

+		return (((long) (bytes[offset + length - 1] & 0xff)) << 56)

+				+ (((long) (bytes[offset + length - 2] & 0xff)) << 48)

+				+ (((long) (bytes[offset + length - 3] & 0xff)) << 40)

+				+ (((long) (bytes[offset + length - 4] & 0xff)) << 32)

+				+ (((long) (bytes[offset + length - 5] & 0xff)) << 24)

+				+ (((long) (bytes[offset + length - 6] & 0xff)) << 16)

+				+ (((long) (bytes[offset + length - 7] & 0xff)) << 8)

+				+ (((long) (bytes[offset + length - 8] & 0xff)));

+	}

+

+	@Override

+	public int compareTo(IPointable pointer) {

+		return compareTo(pointer.getByteArray(), pointer.getStartOffset(),

+				pointer.getLength());

+	}

+

+	@Override

+	public int compareTo(byte[] bytes, int offset, int length) {

+

+		if (this.length != length) {

+			return this.length - length;

+		}

+		for (int i = length - 1; i >= 0; i--) {

+			int cmp = (this.bytes[this.start + i] & 0xff) - (bytes[offset + i] & 0xff);

+			if (cmp !=0){

+				return cmp;

+			}

+		}

+

+		return 0;

+	}

+

+	@Override

+	public int hash() {

+		int hash = KmerHashPartitioncomputerFactory.hashBytes(bytes, start,

+				length);

+		return hash;

+	}

+

+	@Override

+	public byte byteValue() {

+		return bytes[start + length - 1];

+	}

+

+	@Override

+	public short shortValue() {

+		return getShortReverse(bytes, start, length);

+	}

+

+	@Override

+	public int intValue() {

+		return getIntReverse(bytes, start, length);

+	}

+

+	@Override

+	public long longValue() {

+		return getLongReverse(bytes, start, length);

+	}

+

+	@Override

+	public float floatValue() {

+		return Float.intBitsToFloat(intValue());

+	}

+

+	@Override

+	public double doubleValue() {

+		return Double.longBitsToDouble(longValue());

+	}

+}

diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/ConnectorPolicyAssignmentPolicy.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/ConnectorPolicyAssignmentPolicy.java
new file mode 100644
index 0000000..d3de2ba
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/ConnectorPolicyAssignmentPolicy.java
@@ -0,0 +1,30 @@
+package edu.uci.ics.genomix.dataflow;

+

+import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;

+import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicy;

+import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicyAssignmentPolicy;

+import edu.uci.ics.hyracks.api.dataflow.connectors.PipeliningConnectorPolicy;

+import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedPipeliningConnectorPolicy;

+import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor;

+

+/**

+ * used by precluster groupby

+ * 

+ */

+public class ConnectorPolicyAssignmentPolicy implements

+		IConnectorPolicyAssignmentPolicy {

+	private static final long serialVersionUID = 1L;

+	private IConnectorPolicy senderSideMaterializePolicy = new SendSideMaterializedPipeliningConnectorPolicy();

+	private IConnectorPolicy pipeliningPolicy = new PipeliningConnectorPolicy();

+

+	@Override

+	public IConnectorPolicy getConnectorPolicyAssignment(

+			IConnectorDescriptor c, int nProducers, int nConsumers,

+			int[] fanouts) {

+		if (c instanceof MToNPartitioningMergingConnectorDescriptor) {

+			return senderSideMaterializePolicy;

+		} else {

+			return pipeliningPolicy;

+		}

+	}

+}

diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/KMerSequenceWriterFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/KMerSequenceWriterFactory.java
new file mode 100644
index 0000000..8c3f277
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/KMerSequenceWriterFactory.java
@@ -0,0 +1,88 @@
+package edu.uci.ics.genomix.dataflow;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
+import org.apache.hadoop.io.SequenceFile.Writer;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.genomix.type.KmerCountValue;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
+import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
+import edu.uci.ics.hyracks.hdfs.dataflow.ConfFactory;
+
+@SuppressWarnings("deprecation")
+public class KMerSequenceWriterFactory implements ITupleWriterFactory {
+
+	private static final long serialVersionUID = 1L;
+	private ConfFactory confFactory;
+
+	public KMerSequenceWriterFactory(JobConf conf) throws HyracksDataException {
+		this.confFactory = new ConfFactory(conf);
+	}
+
+	public class TupleWriter implements ITupleWriter {
+		public TupleWriter(ConfFactory cf) {
+			this.cf = cf;
+		}
+
+		ConfFactory cf;
+		Writer writer = null;
+
+		KmerCountValue reEnterCount = new KmerCountValue();
+		BytesWritable reEnterKey = new BytesWritable();
+
+		/**
+		 * assumption is that output never change source!
+		 */
+		@Override
+		public void write(DataOutput output, ITupleReference tuple)
+				throws HyracksDataException {
+			try {
+				byte[] kmer = tuple.getFieldData(0);
+				int keyStart = tuple.getFieldStart(0);
+				int keyLength = tuple.getFieldLength(0);
+
+				byte bitmap = tuple.getFieldData(1)[tuple.getFieldStart(1)];
+				byte count = tuple.getFieldData(2)[tuple.getFieldStart(2)];
+				reEnterCount.set(bitmap, count);
+				reEnterKey.set(kmer, keyStart, keyLength);
+				writer.append(reEnterKey, reEnterCount);
+				// @mark: this method can not used for read in hadoop 0.20.2.
+				// writer.appendRaw(kmer, keyStart, keyLength, reEnterCount);
+			} catch (IOException e) {
+				throw new HyracksDataException(e);
+			}
+		}
+
+		@Override
+		public void open(DataOutput output) throws HyracksDataException {
+			try {
+				writer = SequenceFile.createWriter(cf.getConf(),
+						(FSDataOutputStream) output, BytesWritable.class,
+						KmerCountValue.class, CompressionType.NONE, null);
+			} catch (IOException e) {
+				throw new HyracksDataException(e);
+			}
+		}
+
+		@Override
+		public void close(DataOutput output) throws HyracksDataException {
+			// TODO Auto-generated method stub
+		}
+	}
+
+	@Override
+	public ITupleWriter getTupleWriter(IHyracksTaskContext ctx)
+			throws HyracksDataException {
+		return new TupleWriter(confFactory);
+	}
+
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/KMerTextWriterFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/KMerTextWriterFactory.java
new file mode 100644
index 0000000..0975fd2
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/KMerTextWriterFactory.java
@@ -0,0 +1,64 @@
+package edu.uci.ics.genomix.dataflow;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import edu.uci.ics.genomix.type.Kmer;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
+import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
+
+public class KMerTextWriterFactory implements ITupleWriterFactory {
+
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = 1L;
+	private final int KMER;
+
+	public KMerTextWriterFactory(int kmer) {
+		KMER = kmer;
+	}
+
+	public class TupleWriter implements ITupleWriter {
+		@Override
+		public void write(DataOutput output, ITupleReference tuple)
+				throws HyracksDataException {
+			try {
+				output.write(Kmer.recoverKmerFrom(KMER,
+						tuple.getFieldData(0), tuple.getFieldStart(0),
+						tuple.getFieldLength(0)).getBytes());
+				output.writeByte('\t');
+				output.write(Kmer.GENE_CODE.getSymbolFromBitMap(tuple
+						.getFieldData(1)[tuple.getFieldStart(1)]).getBytes());
+				output.writeByte('\t');
+				output.write(String.valueOf((int)tuple
+						.getFieldData(2)[tuple.getFieldStart(2)]).getBytes());
+				output.writeByte('\n');
+			} catch (IOException e) {
+				throw new HyracksDataException(e);
+			}
+		}
+
+		@Override
+		public void open(DataOutput output) throws HyracksDataException {
+			// TODO Auto-generated method stub
+			
+		}
+
+		@Override
+		public void close(DataOutput output) throws HyracksDataException {
+			// TODO Auto-generated method stub
+		}
+	}
+
+	@Override
+	public ITupleWriter getTupleWriter(IHyracksTaskContext ctx)
+			throws HyracksDataException {
+		// TODO Auto-generated method stub
+		return new TupleWriter();
+	}
+
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java
new file mode 100644
index 0000000..e5b7fa9
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java
@@ -0,0 +1,139 @@
+package edu.uci.ics.genomix.dataflow;

+

+import java.nio.ByteBuffer;

+import java.util.regex.Matcher;

+import java.util.regex.Pattern;

+

+import org.apache.hadoop.io.LongWritable;

+import org.apache.hadoop.io.Text;

+

+import edu.uci.ics.genomix.data.std.accessors.ByteSerializerDeserializer;

+import edu.uci.ics.genomix.type.Kmer;

+import edu.uci.ics.genomix.type.Kmer.GENE_CODE;

+import edu.uci.ics.hyracks.api.comm.IFrameWriter;

+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;

+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;

+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;

+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;

+import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;

+import edu.uci.ics.hyracks.hdfs.api.IKeyValueParser;

+import edu.uci.ics.hyracks.hdfs.api.IKeyValueParserFactory;

+

+public class ReadsKeyValueParserFactory implements

+		IKeyValueParserFactory<LongWritable, Text> {

+	private static final long serialVersionUID = 1L;

+

+	private int k;

+	private int byteNum;

+	private boolean bReversed;

+

+	public ReadsKeyValueParserFactory(int k, boolean bGenerateReversed) {

+		this.k = k;

+		byteNum = (byte) Math.ceil((double) k / 4.0);

+		bReversed = bGenerateReversed;

+	}

+

+	@Override

+	public IKeyValueParser<LongWritable, Text> createKeyValueParser(

+			final IHyracksTaskContext ctx) {

+		final ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(2);

+		final ByteBuffer outputBuffer = ctx.allocateFrame();

+		final FrameTupleAppender outputAppender = new FrameTupleAppender(

+				ctx.getFrameSize());

+		outputAppender.reset(outputBuffer, true);

+

+		return new IKeyValueParser<LongWritable, Text>() {

+

+			@Override

+			public void parse(LongWritable key, Text value, IFrameWriter writer)

+					throws HyracksDataException {

+				String geneLine = value.toString(); // Read the Real Gene Line

+				Pattern genePattern = Pattern.compile("[AGCT]+");

+				Matcher geneMatcher = genePattern.matcher(geneLine);

+				boolean isValid = geneMatcher.matches();

+				if (isValid) {

+					SplitReads(geneLine.getBytes(), writer);

+				}

+			}

+

+			private void SplitReads(byte[] array, IFrameWriter writer) {

+				/** first kmer */

+				byte[] kmer = Kmer.compressKmer(k, array, 0);

+				byte pre = 0;

+				byte next = GENE_CODE.getAdjBit(array[k]);

+				InsertToFrame(kmer, pre, next, writer);

+

+				/** middle kmer */

+				for (int i = k; i < array.length - 1; i++) {

+					pre = Kmer.moveKmer(k, kmer, array[i]);

+					next = GENE_CODE.getAdjBit(array[i + 1]);

+					InsertToFrame(kmer, pre, next, writer);

+

+				}

+				/** last kmer */

+				pre = Kmer.moveKmer(k, kmer, array[array.length - 1]);

+				next = 0;

+				InsertToFrame(kmer, pre, next, writer);

+

+				if (bReversed) {

+					/** first kmer */

+					kmer = Kmer.compressKmerReverse(k, array, 0);

+					next = 0;

+					pre = GENE_CODE.getAdjBit(array[k]);

+					InsertToFrame(kmer, pre, next, writer);

+					/** middle kmer */

+					for (int i = k; i < array.length - 1; i++) {

+						next = Kmer.moveKmerReverse(k, kmer, array[i]);

+						pre = GENE_CODE.getAdjBit(array[i + 1]);

+						InsertToFrame(kmer, pre, next, writer);

+					}

+					/** last kmer */

+					next = Kmer.moveKmerReverse(k, kmer,

+							array[array.length - 1]);

+					pre = 0;

+					InsertToFrame(kmer, pre, next, writer);

+				}

+			}

+

+			private void InsertToFrame(byte[] kmer, byte pre, byte next,

+					IFrameWriter writer) {

+				try {

+					byte adj = GENE_CODE.mergePreNextAdj(pre, next);

+					tupleBuilder.reset();

+					tupleBuilder.addField(kmer, 0, byteNum);

+					tupleBuilder.addField(ByteSerializerDeserializer.INSTANCE,

+							adj);

+

+					if (!outputAppender.append(

+							tupleBuilder.getFieldEndOffsets(),

+							tupleBuilder.getByteArray(), 0,

+							tupleBuilder.getSize())) {

+						FrameUtils.flushFrame(outputBuffer, writer);

+						outputAppender.reset(outputBuffer, true);

+						if (!outputAppender.append(

+								tupleBuilder.getFieldEndOffsets(),

+								tupleBuilder.getByteArray(), 0,

+								tupleBuilder.getSize())) {

+							throw new IllegalStateException(

+									"Failed to copy an record into a frame: the record size is too large.");

+						}

+					}

+				} catch (Exception e) {

+					throw new IllegalStateException(e);

+				}

+			}

+

+			@Override

+			public void open(IFrameWriter writer) throws HyracksDataException {

+				// TODO Auto-generated method stub

+

+			}

+

+			@Override

+			public void close(IFrameWriter writer) throws HyracksDataException {

+				FrameUtils.flushFrame(outputBuffer, writer);

+			}

+		};

+	}

+

+}
\ No newline at end of file
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/DistributedMergeLmerAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/DistributedMergeLmerAggregateFactory.java
new file mode 100644
index 0000000..62680ed
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/DistributedMergeLmerAggregateFactory.java
@@ -0,0 +1,140 @@
+package edu.uci.ics.genomix.dataflow.aggregators;

+

+import java.io.DataOutput;

+import java.io.IOException;

+

+import edu.uci.ics.genomix.data.std.accessors.ByteSerializerDeserializer;

+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;

+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;

+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;

+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;

+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;

+import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;

+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;

+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;

+

+/**

+ * sum

+ * 

+ */

+public class DistributedMergeLmerAggregateFactory implements

+		IAggregatorDescriptorFactory {

+	private static final long serialVersionUID = 1L;

+

+	public DistributedMergeLmerAggregateFactory() {

+	}

+

+	public class DistributeAggregatorDescriptor implements IAggregatorDescriptor {

+		private static final int MAX = 127;

+

+		@Override

+		public void reset() {

+		}

+

+		@Override

+		public void close() {

+			// TODO Auto-generated method stub

+

+		}

+

+		@Override

+		public AggregateState createAggregateStates() {

+			return new AggregateState(new Object() {

+			});

+		}

+

+		protected byte getField(IFrameTupleAccessor accessor, int tIndex,

+				int fieldId) {

+			int tupleOffset = accessor.getTupleStartOffset(tIndex);

+			int fieldStart = accessor.getFieldStartOffset(tIndex, fieldId);

+			int offset = tupleOffset + fieldStart + accessor.getFieldSlotsLength();

+			byte data = ByteSerializerDeserializer.getByte(accessor.getBuffer()

+					.array(), offset);

+			return data;

+		}

+		

+		@Override

+		public void init(ArrayTupleBuilder tupleBuilder,

+				IFrameTupleAccessor accessor, int tIndex, AggregateState state)

+				throws HyracksDataException {

+			byte bitmap = getField(accessor, tIndex, 1);

+			byte count = getField(accessor, tIndex, 2);

+

+			DataOutput fieldOutput = tupleBuilder.getDataOutput();

+			try {

+				fieldOutput.writeByte(bitmap);

+				tupleBuilder.addFieldEndOffset();

+				fieldOutput.writeByte(count);

+				tupleBuilder.addFieldEndOffset();

+			} catch (IOException e) {

+				throw new HyracksDataException(

+						"I/O exception when initializing the aggregator.");

+			}

+		}

+

+		@Override

+		public void aggregate(IFrameTupleAccessor accessor, int tIndex,

+				IFrameTupleAccessor stateAccessor, int stateTupleIndex,

+				AggregateState state) throws HyracksDataException {

+			byte bitmap = getField(accessor, tIndex, 1);

+			short count = getField(accessor, tIndex, 2);

+

+			int statetupleOffset = stateAccessor

+					.getTupleStartOffset(stateTupleIndex);

+			int bitfieldStart = stateAccessor.getFieldStartOffset(stateTupleIndex,

+					1);

+			int countfieldStart = stateAccessor.getFieldStartOffset(

+					stateTupleIndex, 2);

+			int bitoffset = statetupleOffset + stateAccessor.getFieldSlotsLength()

+					+ bitfieldStart;

+			int countoffset = statetupleOffset

+					+ stateAccessor.getFieldSlotsLength() + countfieldStart;

+

+			byte[] data = stateAccessor.getBuffer().array();

+

+			bitmap |= data[bitoffset];

+			count += data[countoffset];

+			if (count >= MAX) {

+				count = (byte) MAX;

+			}

+			data[bitoffset] = bitmap;

+			data[countoffset] = (byte) count;

+		}

+

+		@Override

+		public void outputPartialResult(ArrayTupleBuilder tupleBuilder,

+				IFrameTupleAccessor accessor, int tIndex, AggregateState state)

+				throws HyracksDataException {

+			byte bitmap = getField(accessor, tIndex, 1);

+			byte count = getField(accessor, tIndex, 2);

+			DataOutput fieldOutput = tupleBuilder.getDataOutput();

+			try {

+				fieldOutput.writeByte(bitmap);

+				tupleBuilder.addFieldEndOffset();

+				fieldOutput.writeByte(count);

+				tupleBuilder.addFieldEndOffset();

+			} catch (IOException e) {

+				throw new HyracksDataException(

+						"I/O exception when writing aggregation to the output buffer.");

+			}

+

+		}

+

+		@Override

+		public void outputFinalResult(ArrayTupleBuilder tupleBuilder,

+				IFrameTupleAccessor accessor, int tIndex, AggregateState state)

+				throws HyracksDataException {

+			outputPartialResult(tupleBuilder, accessor, tIndex, state);

+		}

+		

+	}

+

+	@Override

+	public IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx,

+			RecordDescriptor inRecordDescriptor,

+			RecordDescriptor outRecordDescriptor, int[] keyFields,

+			int[] keyFieldsInPartialResults) throws HyracksDataException {

+		return new DistributeAggregatorDescriptor();

+	}

+

+}

diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/LocalAggregatorDescriptor.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/LocalAggregatorDescriptor.java
new file mode 100644
index 0000000..330f950
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/LocalAggregatorDescriptor.java
@@ -0,0 +1,116 @@
+package edu.uci.ics.genomix.dataflow.aggregators;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import edu.uci.ics.genomix.data.std.accessors.ByteSerializerDeserializer;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
+
+public class LocalAggregatorDescriptor implements IAggregatorDescriptor {
+	private static final int MAX = 127;
+
+	@Override
+	public void reset() {
+	}
+
+	@Override
+	public void close() {
+		// TODO Auto-generated method stub
+
+	}
+
+	@Override
+	public AggregateState createAggregateStates() {
+		return new AggregateState(new Object() {
+		});
+	}
+
+	protected byte getField(IFrameTupleAccessor accessor, int tIndex,
+			int fieldId) {
+		int tupleOffset = accessor.getTupleStartOffset(tIndex);
+		int fieldStart = accessor.getFieldStartOffset(tIndex, fieldId);
+		int offset = tupleOffset + fieldStart + accessor.getFieldSlotsLength();
+		byte data = ByteSerializerDeserializer.getByte(accessor.getBuffer()
+				.array(), offset);
+		return data;
+	}
+
+	@Override
+	public void init(ArrayTupleBuilder tupleBuilder,
+			IFrameTupleAccessor accessor, int tIndex, AggregateState state)
+			throws HyracksDataException {
+		byte bitmap = getField(accessor, tIndex, 1);
+		byte count = 1;
+
+		DataOutput fieldOutput = tupleBuilder.getDataOutput();
+		try {
+			fieldOutput.writeByte(bitmap);
+			tupleBuilder.addFieldEndOffset();
+			fieldOutput.writeByte(count);
+			tupleBuilder.addFieldEndOffset();
+		} catch (IOException e) {
+			throw new HyracksDataException(
+					"I/O exception when initializing the aggregator.");
+		}
+	}
+
+	@Override
+	public void aggregate(IFrameTupleAccessor accessor, int tIndex,
+			IFrameTupleAccessor stateAccessor, int stateTupleIndex,
+			AggregateState state) throws HyracksDataException {
+		byte bitmap = getField(accessor, tIndex, 1);
+		short count = 1;
+
+		int statetupleOffset = stateAccessor
+				.getTupleStartOffset(stateTupleIndex);
+		int bitfieldStart = stateAccessor.getFieldStartOffset(stateTupleIndex,
+				1);
+		int countfieldStart = stateAccessor.getFieldStartOffset(
+				stateTupleIndex, 2);
+		int bitoffset = statetupleOffset + stateAccessor.getFieldSlotsLength()
+				+ bitfieldStart;
+		int countoffset = statetupleOffset
+				+ stateAccessor.getFieldSlotsLength() + countfieldStart;
+
+		byte[] data = stateAccessor.getBuffer().array();
+
+		bitmap |= data[bitoffset];
+		count += data[countoffset];
+		if (count >= MAX) {
+			count = (byte) MAX;
+		}
+		data[bitoffset] = bitmap;
+		data[countoffset] = (byte) count;
+	}
+
+	@Override
+	public void outputPartialResult(ArrayTupleBuilder tupleBuilder,
+			IFrameTupleAccessor accessor, int tIndex, AggregateState state)
+			throws HyracksDataException {
+		byte bitmap = getField(accessor, tIndex, 1);
+		byte count = getField(accessor, tIndex, 2);
+		DataOutput fieldOutput = tupleBuilder.getDataOutput();
+		try {
+			fieldOutput.writeByte(bitmap);
+			tupleBuilder.addFieldEndOffset();
+			fieldOutput.writeByte(count);
+			tupleBuilder.addFieldEndOffset();
+		} catch (IOException e) {
+			throw new HyracksDataException(
+					"I/O exception when writing aggregation to the output buffer.");
+		}
+
+	}
+
+	@Override
+	public void outputFinalResult(ArrayTupleBuilder tupleBuilder,
+			IFrameTupleAccessor accessor, int tIndex, AggregateState state)
+			throws HyracksDataException {
+		outputPartialResult(tupleBuilder, accessor, tIndex, state);
+	}
+
+};
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/MergeKmerAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/MergeKmerAggregateFactory.java
new file mode 100644
index 0000000..58ff8a2
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/MergeKmerAggregateFactory.java
@@ -0,0 +1,27 @@
+package edu.uci.ics.genomix.dataflow.aggregators;

+

+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;

+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;

+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;

+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;

+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;

+

+/**

+ * count

+ * 

+ */

+public class MergeKmerAggregateFactory implements IAggregatorDescriptorFactory {

+	private static final long serialVersionUID = 1L;

+

+	public MergeKmerAggregateFactory() {

+	}

+

+	@Override

+	public IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx,

+			RecordDescriptor inRecordDescriptor,

+			RecordDescriptor outRecordDescriptor, int[] keyFields,

+			int[] keyFieldsInPartialResults) throws HyracksDataException {

+		return new LocalAggregatorDescriptor();

+	}

+

+}

diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/driver/Driver.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/driver/Driver.java
new file mode 100644
index 0000000..27066a2
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/driver/Driver.java
@@ -0,0 +1,144 @@
+package edu.uci.ics.genomix.driver;
+
+import java.net.URL;
+import java.util.EnumSet;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.GenericOptionsParser;
+
+import edu.uci.ics.genomix.job.GenomixJob;
+import edu.uci.ics.genomix.job.JobGen;
+import edu.uci.ics.genomix.job.JobGenBrujinGraph;
+import edu.uci.ics.hyracks.api.client.HyracksConnection;
+import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
+import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.job.JobFlag;
+import edu.uci.ics.hyracks.api.job.JobId;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
+
+public class Driver {
+	public static enum Plan {
+		BUILD_DEBRUJIN_GRAPH, GRAPH_CLEANNING, CONTIGS_GENERATION,
+	}
+
+	private static final String IS_PROFILING = "genomix.driver.profiling";
+	private static final String CPARTITION_PER_MACHINE = "genomix.driver.duplicate.num";
+	private static final String applicationName = GenomixJob.JOB_NAME;
+	private static final Log LOG = LogFactory.getLog(Driver.class);
+	private JobGen jobGen;
+	private boolean profiling;
+
+	private int numPartitionPerMachine;
+
+	private IHyracksClientConnection hcc;
+	private Scheduler scheduler;
+
+	public Driver(String ipAddress, int port, int numPartitionPerMachine)
+			throws HyracksException {
+		try {
+			hcc = new HyracksConnection(ipAddress, port);
+			scheduler = new Scheduler(hcc.getNodeControllerInfos());
+		} catch (Exception e) {
+			throw new HyracksException(e);
+		}
+		this.numPartitionPerMachine = numPartitionPerMachine;
+	}
+
+	public void runJob(GenomixJob job) throws HyracksException {
+		runJob(job, Plan.BUILD_DEBRUJIN_GRAPH, false);
+	}
+
+	public void runJob(GenomixJob job, Plan planChoice, boolean profiling)
+			throws HyracksException {
+		/** add hadoop configurations */
+		URL hadoopCore = job.getClass().getClassLoader()
+				.getResource("core-site.xml");
+		job.addResource(hadoopCore);
+		URL hadoopMapRed = job.getClass().getClassLoader()
+				.getResource("mapred-site.xml");
+		job.addResource(hadoopMapRed);
+		URL hadoopHdfs = job.getClass().getClassLoader()
+				.getResource("hdfs-site.xml");
+		job.addResource(hadoopHdfs);
+
+		LOG.info("job started");
+		long start = System.currentTimeMillis();
+		long end = start;
+		long time = 0;
+
+		this.profiling = profiling;
+		try {
+			Map<String, NodeControllerInfo> ncMap = hcc
+					.getNodeControllerInfos();
+			LOG.info("ncmap:" + ncMap.size() + " " + ncMap.keySet().toString());
+			switch (planChoice) {
+			case BUILD_DEBRUJIN_GRAPH:
+			default:
+				jobGen = new JobGenBrujinGraph(job, scheduler, ncMap,
+						numPartitionPerMachine);
+				break;
+			}
+
+			start = System.currentTimeMillis();
+			runCreate(jobGen);
+			end = System.currentTimeMillis();
+			time = end - start;
+			LOG.info("result writing finished " + time + "ms");
+			LOG.info("job finished");
+		} catch (Exception e) {
+			throw new HyracksException(e);
+		}
+	}
+
+	private void runCreate(JobGen jobGen) throws Exception {
+		try {
+			JobSpecification createJob = jobGen.generateJob();
+			execute(createJob);
+		} catch (Exception e) {
+			e.printStackTrace();
+			throw e;
+		}
+	}
+
+	private void execute(JobSpecification job) throws Exception {
+		job.setUseConnectorPolicyForScheduling(false);
+		JobId jobId = hcc.startJob(
+				applicationName,
+				job,
+				profiling ? EnumSet.of(JobFlag.PROFILE_RUNTIME) : EnumSet
+						.noneOf(JobFlag.class));
+		hcc.waitForCompletion(jobId);
+	}
+
+	public static void main(String[] args) throws Exception {
+		GenomixJob jobConf = new GenomixJob();
+		String[] otherArgs = new GenericOptionsParser(jobConf, args)
+				.getRemainingArgs();
+		if (otherArgs.length < 4) {
+			System.err.println("Need <serverIP> <port> <input> <output>");
+			System.exit(-1);
+		}
+		String ipAddress = otherArgs[0];
+		int port = Integer.parseInt(otherArgs[1]);
+		int numOfDuplicate = jobConf.getInt(CPARTITION_PER_MACHINE, 2);
+		boolean bProfiling = jobConf.getBoolean(IS_PROFILING, true);
+		// FileInputFormat.setInputPaths(job, otherArgs[2]);
+		{
+			Path path = new Path(jobConf.getWorkingDirectory(), otherArgs[2]);
+			jobConf.set("mapred.input.dir", path.toString());
+
+			Path outputDir = new Path(jobConf.getWorkingDirectory(),
+					otherArgs[3]);
+			jobConf.set("mapred.output.dir", outputDir.toString());
+		}
+		// FileInputFormat.addInputPath(jobConf, new Path(otherArgs[2]));
+		// FileOutputFormat.setOutputPath(job, new Path(otherArgs[3]));
+		Driver driver = new Driver(ipAddress, port, numOfDuplicate);
+		driver.runJob(jobConf, Plan.BUILD_DEBRUJIN_GRAPH, bProfiling);
+	}
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/GenomixJob.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/GenomixJob.java
new file mode 100644
index 0000000..0751707
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/GenomixJob.java
@@ -0,0 +1,79 @@
+package edu.uci.ics.genomix.job;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.JobConf;
+
+public class GenomixJob extends JobConf {
+
+	public static final String JOB_NAME = "genomix";
+
+	/** Kmers length */
+	public static final String KMER_LENGTH = "genomix.kmer";
+	/** Frame Size */
+	public static final String FRAME_SIZE = "genomix.framesize";
+	/** Frame Limit, hyracks need */
+	public static final String FRAME_LIMIT = "genomix.framelimit";
+	/** Table Size, hyracks need */
+	public static final String TABLE_SIZE = "genomix.tablesize";
+	/** Groupby types */
+	public static final String GROUPBY_TYPE = "genomix.graph.groupby.type";
+	/** Graph outputformat */
+	public static final String OUTPUT_FORMAT = "genomix.graph.output";
+	/** Get reversed Kmer Sequence */
+	public static final String REVERSED_KMER = "genomix.kmer.reversed";
+
+	/** Configurations used by hybrid groupby function in graph build phrase */
+	public static final String GROUPBY_HYBRID_INPUTSIZE = "genomix.graph.groupby.hybrid.inputsize";
+	public static final String GROUPBY_HYBRID_INPUTKEYS = "genomix.graph.groupby.hybrid.inputkeys";
+	public static final String GROUPBY_HYBRID_RECORDSIZE_SINGLE = "genomix.graph.groupby.hybrid.recordsize.single";
+	public static final String GROUPBY_HYBRID_RECORDSIZE_CROSS = "genomix.graph.groupby.hybrid.recordsize.cross";
+	public static final String GROUPBY_HYBRID_HASHLEVEL = "genomix.graph.groupby.hybrid.hashlevel";
+
+	public static final int DEFAULT_KMER = 55;
+	public static final int DEFAULT_FRAME_SIZE = 32768;
+	public static final int DEFAULT_FRAME_LIMIT = 4096;
+	public static final int DEFAULT_TABLE_SIZE = 10485767;
+	public static final long DEFAULT_GROUPBY_HYBRID_INPUTSIZE = 154000000L;
+	public static final long DEFAULT_GROUPBY_HYBRID_INPUTKEYS = 38500000L;
+	public static final int DEFAULT_GROUPBY_HYBRID_RECORDSIZE_SINGLE = 9;
+	public static final int DEFAULT_GROUPBY_HYBRID_HASHLEVEL = 1;
+	public static final int DEFAULT_GROUPBY_HYBRID_RECORDSIZE_CROSS = 13;
+
+	public static final boolean DEFAULT_REVERSED = false;
+
+	public static final String DEFAULT_GROUPBY_TYPE = "hybrid";
+	public static final String DEFAULT_OUTPUT_FORMAT = "binary";
+
+	public GenomixJob() throws IOException {
+		super(new Configuration());
+	}
+
+	public GenomixJob(Configuration conf) throws IOException {
+		super(conf);
+	}
+
+	/**
+	 * Set the kmer length
+	 * 
+	 * @param the
+	 *            desired frame size
+	 */
+	final public void setKmerLength(int kmerlength) {
+		setInt(KMER_LENGTH, kmerlength);
+	}
+
+	final public void setFrameSize(int frameSize) {
+		setInt(FRAME_SIZE, frameSize);
+	}
+
+	final public void setFrameLimit(int frameLimit) {
+		setInt(FRAME_LIMIT, frameLimit);
+	}
+
+	final public void setTableSize(int tableSize) {
+		setInt(TABLE_SIZE, tableSize);
+	}
+
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/JobGen.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/JobGen.java
new file mode 100644
index 0000000..557da6b
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/JobGen.java
@@ -0,0 +1,27 @@
+package edu.uci.ics.genomix.job;
+
+import java.util.UUID;
+
+import org.apache.hadoop.conf.Configuration;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+
+public abstract class JobGen {
+
+	protected final Configuration conf;
+	protected final GenomixJob genomixJob;
+	protected String jobId = new UUID(System.currentTimeMillis(),
+			System.nanoTime()).toString();
+
+	public JobGen(GenomixJob job) {
+		this.conf = job;
+		this.genomixJob = job;
+		this.initJobConfiguration();
+	}
+
+	protected abstract void initJobConfiguration();
+
+	public abstract JobSpecification generateJob() throws HyracksException;
+
+}
\ No newline at end of file
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/JobGenBrujinGraph.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/JobGenBrujinGraph.java
new file mode 100644
index 0000000..683c0a1
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/JobGenBrujinGraph.java
@@ -0,0 +1,334 @@
+package edu.uci.ics.genomix.job;
+
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.genomix.data.std.accessors.ByteSerializerDeserializer;
+import edu.uci.ics.genomix.data.std.accessors.KmerBinaryHashFunctionFamily;
+import edu.uci.ics.genomix.data.std.accessors.KmerHashPartitioncomputerFactory;
+import edu.uci.ics.genomix.data.std.accessors.KmerNormarlizedComputerFactory;
+import edu.uci.ics.genomix.data.std.primitive.KmerPointable;
+import edu.uci.ics.genomix.dataflow.ConnectorPolicyAssignmentPolicy;
+import edu.uci.ics.genomix.dataflow.KMerSequenceWriterFactory;
+import edu.uci.ics.genomix.dataflow.KMerTextWriterFactory;
+import edu.uci.ics.genomix.dataflow.ReadsKeyValueParserFactory;
+import edu.uci.ics.genomix.dataflow.aggregators.DistributedMergeLmerAggregateFactory;
+import edu.uci.ics.genomix.dataflow.aggregators.MergeKmerAggregateFactory;
+import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
+import edu.uci.ics.hyracks.api.constraints.PartitionConstraintHelper;
+import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFamily;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
+import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory;
+import edu.uci.ics.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.group.HashSpillableTableFactory;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
+import edu.uci.ics.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.group.hybridhash.HybridHashGroupOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor;
+import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
+import edu.uci.ics.hyracks.hdfs.dataflow.HDFSReadOperatorDescriptor;
+import edu.uci.ics.hyracks.hdfs.dataflow.HDFSWriteOperatorDescriptor;
+import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
+
+public class JobGenBrujinGraph extends JobGen {
+	public enum GroupbyType {
+		EXTERNAL, PRECLUSTER, HYBRIDHASH,
+	}
+
+	public enum OutputFormat {
+		TEXT, BINARY,
+	}
+
+	JobConf job;
+	private static final Log LOG = LogFactory.getLog(JobGenBrujinGraph.class);
+	private Scheduler scheduler;
+	private String[] ncNodeNames;
+
+	private int kmers;
+	private int frameLimits;
+	private int frameSize;
+	private int tableSize;
+	private GroupbyType groupbyType;
+	private OutputFormat outputFormat;
+	private boolean bGenerateReversedKmer;
+
+	private AbstractOperatorDescriptor singleGrouper;
+	private IConnectorDescriptor connPartition;
+	private AbstractOperatorDescriptor crossGrouper;
+	private RecordDescriptor readOutputRec;
+	private RecordDescriptor combineOutputRec;
+
+	/** works for hybrid hashing */
+	private long inputSizeInRawRecords;
+	private long inputSizeInUniqueKeys;
+	private int recordSizeInBytes;
+	private int hashfuncStartLevel;
+
+	private void logDebug(String status) {
+		String names = "";
+		for (String str : ncNodeNames) {
+			names += str + " ";
+		}
+		LOG.info(status + " nc nodes:" + ncNodeNames.length + " " + names);
+	}
+
+	public JobGenBrujinGraph(GenomixJob job, Scheduler scheduler,
+			final Map<String, NodeControllerInfo> ncMap,
+			int numPartitionPerMachine) {
+		super(job);
+		this.scheduler = scheduler;
+		String[] nodes = new String[ncMap.size()];
+		ncMap.keySet().toArray(nodes);
+		ncNodeNames = new String[nodes.length * numPartitionPerMachine];
+		for (int i = 0; i < numPartitionPerMachine; i++) {
+			System.arraycopy(nodes, 0, ncNodeNames, i * nodes.length,
+					nodes.length);
+		}
+		logDebug("initialize");
+	}
+
+	private ExternalGroupOperatorDescriptor newExternalGroupby(
+			JobSpecification jobSpec, int[] keyFields,
+			IAggregatorDescriptorFactory aggeragater) {
+		return new ExternalGroupOperatorDescriptor(
+				jobSpec,
+				keyFields,
+				frameLimits,
+				new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
+						.of(KmerPointable.FACTORY) },
+				new KmerNormarlizedComputerFactory(),
+				aggeragater,
+				new DistributedMergeLmerAggregateFactory(),
+				combineOutputRec,
+				new HashSpillableTableFactory(
+						new FieldHashPartitionComputerFactory(
+								keyFields,
+								new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
+										.of(KmerPointable.FACTORY) }),
+						tableSize), true);
+	}
+
+	private HybridHashGroupOperatorDescriptor newHybridGroupby(
+			JobSpecification jobSpec, int[] keyFields,
+			long inputSizeInRawRecords, long inputSizeInUniqueKeys,
+			int recordSizeInBytes, int hashfuncStartLevel,
+			IAggregatorDescriptorFactory aggeragater)
+			throws HyracksDataException {
+		return new HybridHashGroupOperatorDescriptor(
+				jobSpec,
+				keyFields,
+				frameLimits,
+				inputSizeInRawRecords,
+				inputSizeInUniqueKeys,
+				recordSizeInBytes,
+				tableSize,
+				new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
+						.of(KmerPointable.FACTORY) },
+				new IBinaryHashFunctionFamily[] { new KmerBinaryHashFunctionFamily() },
+				hashfuncStartLevel, new KmerNormarlizedComputerFactory(),
+				aggeragater, new DistributedMergeLmerAggregateFactory(),
+				combineOutputRec, true);
+	}
+
+	private void generateDescriptorbyType(JobSpecification jobSpec)
+			throws HyracksDataException {
+		int[] keyFields = new int[] { 0 }; // the id of grouped key
+
+		switch (groupbyType) {
+		case EXTERNAL:
+			singleGrouper = newExternalGroupby(jobSpec, keyFields,
+					new MergeKmerAggregateFactory());
+			connPartition = new MToNPartitioningConnectorDescriptor(jobSpec,
+					new KmerHashPartitioncomputerFactory());
+			crossGrouper = newExternalGroupby(jobSpec, keyFields,
+					new DistributedMergeLmerAggregateFactory());
+			break;
+		case PRECLUSTER:
+			singleGrouper = newExternalGroupby(jobSpec, keyFields,
+					new MergeKmerAggregateFactory());
+			connPartition = new MToNPartitioningMergingConnectorDescriptor(
+					jobSpec,
+					new KmerHashPartitioncomputerFactory(),
+					keyFields,
+					new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
+							.of(KmerPointable.FACTORY) });
+			crossGrouper = new PreclusteredGroupOperatorDescriptor(
+					jobSpec,
+					keyFields,
+					new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
+							.of(KmerPointable.FACTORY) },
+					new DistributedMergeLmerAggregateFactory(),
+					combineOutputRec);
+			break;
+		case HYBRIDHASH:
+		default:
+			singleGrouper = newHybridGroupby(jobSpec, keyFields,
+					inputSizeInRawRecords, inputSizeInUniqueKeys,
+					recordSizeInBytes, hashfuncStartLevel,
+					new MergeKmerAggregateFactory());
+			connPartition = new MToNPartitioningConnectorDescriptor(jobSpec,
+					new KmerHashPartitioncomputerFactory());
+
+			crossGrouper = newHybridGroupby(jobSpec, keyFields,
+					inputSizeInRawRecords, inputSizeInUniqueKeys,
+					recordSizeInBytes, hashfuncStartLevel,
+					new DistributedMergeLmerAggregateFactory());
+			break;
+		}
+	}
+
+	public HDFSReadOperatorDescriptor createHDFSReader(JobSpecification jobSpec)
+			throws HyracksDataException {
+		try {
+
+			InputSplit[] splits = job.getInputFormat().getSplits(job,
+					ncNodeNames.length);
+
+			LOG.info("HDFS read into " + splits.length + " splits");
+			String[] readSchedule = scheduler.getLocationConstraints(splits);
+			String log = "";
+			for (String schedule : readSchedule) {
+				log += schedule + " ";
+			}
+			LOG.info("HDFS read schedule " + log);
+			return new HDFSReadOperatorDescriptor(jobSpec, readOutputRec, job,
+					splits, readSchedule, new ReadsKeyValueParserFactory(kmers,
+							bGenerateReversedKmer));
+		} catch (Exception e) {
+			throw new HyracksDataException(e);
+		}
+	}
+
+	@Override
+	public JobSpecification generateJob() throws HyracksException {
+
+		JobSpecification jobSpec = new JobSpecification();
+		readOutputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+				null, ByteSerializerDeserializer.INSTANCE });
+		combineOutputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+				null, ByteSerializerDeserializer.INSTANCE,
+				ByteSerializerDeserializer.INSTANCE });
+		jobSpec.setFrameSize(frameSize);
+
+		// File input
+		HDFSReadOperatorDescriptor readOperator = createHDFSReader(jobSpec);
+
+		logDebug("Read Operator");
+		PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec,
+				readOperator, ncNodeNames);
+
+		generateDescriptorbyType(jobSpec);
+		logDebug("SingleGroupby Operator");
+		PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec,
+				singleGrouper, ncNodeNames);
+
+		IConnectorDescriptor readfileConn = new OneToOneConnectorDescriptor(
+				jobSpec);
+		jobSpec.connect(readfileConn, readOperator, 0, singleGrouper, 0);
+
+		logDebug("CrossGrouper Operator");
+		PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec,
+				crossGrouper, ncNodeNames);
+		jobSpec.connect(connPartition, singleGrouper, 0, crossGrouper, 0);
+
+		// Output
+		ITupleWriterFactory writer = null;
+		switch (outputFormat) {
+		case TEXT:
+			writer = new KMerTextWriterFactory(kmers);
+			break;
+		case BINARY:
+		default:
+			writer = new KMerSequenceWriterFactory(job);
+			break;
+		}
+		HDFSWriteOperatorDescriptor writeOperator = new HDFSWriteOperatorDescriptor(
+				jobSpec, job, writer);
+
+		logDebug("WriteOperator");
+		PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec,
+				writeOperator, ncNodeNames);
+
+		IConnectorDescriptor printConn = new OneToOneConnectorDescriptor(
+				jobSpec);
+		jobSpec.connect(printConn, crossGrouper, 0, writeOperator, 0);
+		jobSpec.addRoot(writeOperator);
+
+		if (groupbyType == GroupbyType.PRECLUSTER) {
+			jobSpec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
+		}
+		return jobSpec;
+	}
+
+	@Override
+	protected void initJobConfiguration() {
+
+		kmers = conf.getInt(GenomixJob.KMER_LENGTH, GenomixJob.DEFAULT_KMER);
+		frameLimits = conf.getInt(GenomixJob.FRAME_LIMIT,
+				GenomixJob.DEFAULT_FRAME_LIMIT);
+		tableSize = conf.getInt(GenomixJob.TABLE_SIZE,
+				GenomixJob.DEFAULT_TABLE_SIZE);
+		frameSize = conf.getInt(GenomixJob.FRAME_SIZE,
+				GenomixJob.DEFAULT_FRAME_SIZE);
+		inputSizeInRawRecords = conf.getLong(
+				GenomixJob.GROUPBY_HYBRID_INPUTSIZE,
+				GenomixJob.DEFAULT_GROUPBY_HYBRID_INPUTSIZE);
+		inputSizeInUniqueKeys = conf.getLong(
+				GenomixJob.GROUPBY_HYBRID_INPUTKEYS,
+				GenomixJob.DEFAULT_GROUPBY_HYBRID_INPUTKEYS);
+		recordSizeInBytes = conf.getInt(
+				GenomixJob.GROUPBY_HYBRID_RECORDSIZE_SINGLE,
+				GenomixJob.DEFAULT_GROUPBY_HYBRID_RECORDSIZE_SINGLE);
+		hashfuncStartLevel = conf.getInt(GenomixJob.GROUPBY_HYBRID_HASHLEVEL,
+				GenomixJob.DEFAULT_GROUPBY_HYBRID_HASHLEVEL);
+		/** here read the different recordSize why ? */
+		recordSizeInBytes = conf.getInt(
+				GenomixJob.GROUPBY_HYBRID_RECORDSIZE_CROSS,
+				GenomixJob.DEFAULT_GROUPBY_HYBRID_RECORDSIZE_CROSS);
+
+		bGenerateReversedKmer = conf.getBoolean(GenomixJob.REVERSED_KMER,
+				GenomixJob.DEFAULT_REVERSED);
+
+		String type = conf.get(GenomixJob.GROUPBY_TYPE,
+				GenomixJob.DEFAULT_GROUPBY_TYPE);
+		if (type.equalsIgnoreCase("external")) {
+			groupbyType = GroupbyType.EXTERNAL;
+		} else if (type.equalsIgnoreCase("precluster")) {
+			groupbyType = GroupbyType.PRECLUSTER;
+		} else {
+			groupbyType = GroupbyType.HYBRIDHASH;
+		}
+
+		String output = conf.get(GenomixJob.OUTPUT_FORMAT,
+				GenomixJob.DEFAULT_OUTPUT_FORMAT);
+		if (output.equalsIgnoreCase("text")) {
+			outputFormat = OutputFormat.TEXT;
+		} else {
+			outputFormat = OutputFormat.BINARY;
+		}
+		job = new JobConf(conf);
+		LOG.info("Genomix Graph Build Configuration");
+		LOG.info("Kmer:" + kmers);
+		LOG.info("Groupby type:" + type);
+		LOG.info("Output format:" + output);
+		LOG.info("Frame limit" + frameLimits);
+		LOG.info("Frame size" + frameSize);
+	}
+
+}
diff --git a/genomix/genomix-hyracks/src/main/resources/conf/cluster.properties b/genomix/genomix-hyracks/src/main/resources/conf/cluster.properties
new file mode 100644
index 0000000..66251be
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/conf/cluster.properties
@@ -0,0 +1,41 @@
+#The CC port for Hyracks clients
+CC_CLIENTPORT=3099
+
+#The CC port for Hyracks cluster management
+CC_CLUSTERPORT=1099
+
+#The directory of hyracks binaries
+HYRACKS_HOME="../../../../hyracks"
+
+WORKPATH=""
+#The tmp directory for cc to install jars
+CCTMP_DIR=${WORKPATH}/tmp/t1
+
+#The tmp directory for nc to install jars
+NCTMP_DIR=${WORKPATH}/tmp/t2
+
+#The directory to put cc logs
+CCLOGS_DIR=$CCTMP_DIR/logs
+
+#The directory to put nc logs
+NCLOGS_DIR=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS="${WORKPATH}/tmp/t3"
+
+#The JAVA_HOME
+JAVA_HOME=$JAVA_HOME
+
+#HADOOP_HOME 
+CLASSPATH="${HADOOP_HOME}:${CLASSPATH}:."
+
+#The frame size of the internal dataflow engine
+FRAME_SIZE=65536
+
+#CC JAVA_OPTS
+CCJAVA_OPTS="-Xrunjdwp:transport=dt_socket,address=7001,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+# Yourkit option: -agentpath:/grid/0/dev/vborkar/tools/yjp-10.0.4/bin/linux-x86-64/libyjpagent.so=port=20001"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS="-Xrunjdwp:transport=dt_socket,address=7002,server=y,suspend=n -Xmx10g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/genomix/genomix-hyracks/src/main/resources/conf/debugnc.properties b/genomix/genomix-hyracks/src/main/resources/conf/debugnc.properties
new file mode 100644
index 0000000..27afa26
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/conf/debugnc.properties
@@ -0,0 +1,12 @@
+#The tmp directory for nc to install jars
+NCTMP_DIR2=/tmp/t-1
+
+#The directory to put nc logs
+NCLOGS_DIR2=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS2="/tmp/t-2,/tmp/t-3"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS2="-Xdebug -Xrunjdwp:transport=dt_socket,address=7003,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/genomix/genomix-hyracks/src/main/resources/conf/master b/genomix/genomix-hyracks/src/main/resources/conf/master
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/conf/master
@@ -0,0 +1 @@
+localhost
diff --git a/genomix/genomix-hyracks/src/main/resources/conf/slaves b/genomix/genomix-hyracks/src/main/resources/conf/slaves
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/conf/slaves
@@ -0,0 +1 @@
+localhost
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/genomix b/genomix/genomix-hyracks/src/main/resources/scripts/genomix
new file mode 100644
index 0000000..239a46c
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/genomix
@@ -0,0 +1,113 @@
+#!/bin/sh
+# ----------------------------------------------------------------------------
+#  Copyright 2001-2006 The Apache Software Foundation.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+# ----------------------------------------------------------------------------
+#
+#   Copyright (c) 2001-2006 The Apache Software Foundation.  All rights
+#   reserved.
+
+
+# resolve links - $0 may be a softlink
+PRG="$0"
+
+while [ -h "$PRG" ]; do
+  ls=`ls -ld "$PRG"`
+  link=`expr "$ls" : '.*-> \(.*\)$'`
+  if expr "$link" : '/.*' > /dev/null; then
+    PRG="$link"
+  else
+    PRG=`dirname "$PRG"`/"$link"
+  fi
+done
+
+PRGDIR=`dirname "$PRG"`
+BASEDIR=`cd "$PRGDIR/.." >/dev/null; pwd`
+
+
+
+# OS specific support.  $var _must_ be set to either true or false.
+cygwin=false;
+darwin=false;
+case "`uname`" in
+  CYGWIN*) cygwin=true ;;
+  Darwin*) darwin=true
+           if [ -z "$JAVA_VERSION" ] ; then
+             JAVA_VERSION="CurrentJDK"
+           else
+             echo "Using Java version: $JAVA_VERSION"
+           fi
+           if [ -z "$JAVA_HOME" ] ; then
+             JAVA_HOME=/System/Library/Frameworks/JavaVM.framework/Versions/${JAVA_VERSION}/Home
+           fi
+           ;;
+esac
+
+if [ -z "$JAVA_HOME" ] ; then
+  if [ -r /etc/gentoo-release ] ; then
+    JAVA_HOME=`java-config --jre-home`
+  fi
+fi
+
+# For Cygwin, ensure paths are in UNIX format before anything is touched
+if $cygwin ; then
+  [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
+  [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
+fi
+
+# If a specific java binary isn't specified search for the standard 'java' binary
+if [ -z "$JAVACMD" ] ; then
+  if [ -n "$JAVA_HOME"  ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+      # IBM's JDK on AIX uses strange locations for the executables
+      JAVACMD="$JAVA_HOME/jre/sh/java"
+    else
+      JAVACMD="$JAVA_HOME/bin/java"
+    fi
+  else
+    JAVACMD=`which java`
+  fi
+fi
+
+if [ ! -x "$JAVACMD" ] ; then
+  echo "Error: JAVA_HOME is not defined correctly." 1>&2
+  echo "  We cannot execute $JAVACMD" 1>&2
+  exit 1
+fi
+
+if [ -z "$REPO" ]
+then
+  REPO="$BASEDIR"/lib
+fi
+
+CLASSPATH=$CLASSPATH_PREFIX:"$BASEDIR"/etc:"$REPO"/hyracks-dataflow-std-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-api-0.2.3-SNAPSHOT.jar:"$REPO"/json-20090211.jar:"$REPO"/httpclient-4.1-alpha2.jar:"$REPO"/httpcore-4.1-beta1.jar:"$REPO"/commons-logging-1.1.1.jar:"$REPO"/args4j-2.0.12.jar:"$REPO"/commons-lang3-3.1.jar:"$REPO"/hyracks-dataflow-common-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-data-std-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-control-cc-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-control-common-0.2.3-SNAPSHOT.jar:"$REPO"/jetty-server-8.0.0.RC0.jar:"$REPO"/servlet-api-3.0.20100224.jar:"$REPO"/jetty-continuation-8.0.0.RC0.jar:"$REPO"/jetty-http-8.0.0.RC0.jar:"$REPO"/jetty-io-8.0.0.RC0.jar:"$REPO"/jetty-webapp-8.0.0.RC0.jar:"$REPO"/jetty-xml-8.0.0.RC0.jar:"$REPO"/jetty-util-8.0.0.RC0.jar:"$REPO"/jetty-servlet-8.0.0.RC0.jar:"$REPO"/jetty-security-8.0.0.RC0.jar:"$REPO"/wicket-core-1.5.2.jar:"$REPO"/wicket-util-1.5.2.jar:"$REPO"/wicket-request-1.5.2.jar:"$REPO"/slf4j-api-1.6.1.jar:"$REPO"/slf4j-jcl-1.6.3.jar:"$REPO"/hyracks-control-nc-0.2.3-SNAPSHOT.jar:"$REPO"/dcache-client-0.0.1.jar:"$REPO"/jetty-client-8.0.0.M0.jar:"$REPO"/hyracks-net-0.2.3-SNAPSHOT.jar:"$REPO"/commons-io-1.3.1.jar:"$REPO"/hyracks-ipc-0.2.3-SNAPSHOT.jar:"$REPO"/hadoop-core-0.20.2.jar:"$REPO"/commons-cli-1.2.jar:"$REPO"/xmlenc-0.52.jar:"$REPO"/commons-httpclient-3.0.1.jar:"$REPO"/commons-codec-1.3.jar:"$REPO"/commons-net-1.4.1.jar:"$REPO"/jetty-6.1.14.jar:"$REPO"/jetty-util-6.1.14.jar:"$REPO"/jasper-runtime-5.5.12.jar:"$REPO"/jasper-compiler-5.5.12.jar:"$REPO"/jsp-api-2.1-6.1.14.jar:"$REPO"/jsp-2.1-6.1.14.jar:"$REPO"/ant-1.6.5.jar:"$REPO"/commons-el-1.0.jar:"$REPO"/jets3t-0.7.1.jar:"$REPO"/servlet-api-2.5-6.1.14.jar:"$REPO"/kfs-0.3.jar:"$REPO"/hsqldb-1.8.0.10.jar:"$REPO"/oro-2.0.8.jar:"$REPO"/core-3.1.1.jar:"$REPO"/hadoop-test-0.20.2.jar:"$REPO"/ftplet-api-1.0.0.jar:"$REPO"/mina-core-2.0.0-M5.jar:"$REPO"/ftpserver-core-1.0.0.jar:"$REPO"/ftpserver-deprecated-1.0.0-M2.jar:"$REPO"/hyracks-hdfs-core-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-hdfs-0.20.2-0.2.3-SNAPSHOT.jar:"$REPO"/genomix-data-0.2.3-SNAPSHOT.jar:"$REPO"/genomix-hyracks-0.2.3-SNAPSHOT.jar
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin; then
+  [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
+  [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"`
+  [ -n "$HOME" ] && HOME=`cygpath --path --windows "$HOME"`
+  [ -n "$BASEDIR" ] && BASEDIR=`cygpath --path --windows "$BASEDIR"`
+  [ -n "$REPO" ] && REPO=`cygpath --path --windows "$REPO"`
+fi
+
+exec "$JAVACMD" $JAVA_OPTS  \
+  -classpath "$CLASSPATH" \
+  -Dapp.name="genomix" \
+  -Dapp.pid="$$" \
+  -Dapp.repo="$REPO" \
+  -Dapp.home="$BASEDIR" \
+  -Dbasedir="$BASEDIR" \
+  edu.uci.ics.genomix.driver.Driver \
+  "$@"
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/genomix.bat b/genomix/genomix-hyracks/src/main/resources/scripts/genomix.bat
new file mode 100644
index 0000000..abcafaf
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/genomix.bat
@@ -0,0 +1,108 @@
+@REM ----------------------------------------------------------------------------

+@REM  Copyright 2001-2006 The Apache Software Foundation.

+@REM

+@REM  Licensed under the Apache License, Version 2.0 (the "License");

+@REM  you may not use this file except in compliance with the License.

+@REM  You may obtain a copy of the License at

+@REM

+@REM       http://www.apache.org/licenses/LICENSE-2.0

+@REM

+@REM  Unless required by applicable law or agreed to in writing, software

+@REM  distributed under the License is distributed on an "AS IS" BASIS,

+@REM  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+@REM  See the License for the specific language governing permissions and

+@REM  limitations under the License.

+@REM ----------------------------------------------------------------------------

+@REM

+@REM   Copyright (c) 2001-2006 The Apache Software Foundation.  All rights

+@REM   reserved.

+

+@echo off

+

+set ERROR_CODE=0

+

+:init

+@REM Decide how to startup depending on the version of windows

+

+@REM -- Win98ME

+if NOT "%OS%"=="Windows_NT" goto Win9xArg

+

+@REM set local scope for the variables with windows NT shell

+if "%OS%"=="Windows_NT" @setlocal

+

+@REM -- 4NT shell

+if "%eval[2+2]" == "4" goto 4NTArgs

+

+@REM -- Regular WinNT shell

+set CMD_LINE_ARGS=%*

+goto WinNTGetScriptDir

+

+@REM The 4NT Shell from jp software

+:4NTArgs

+set CMD_LINE_ARGS=%$

+goto WinNTGetScriptDir

+

+:Win9xArg

+@REM Slurp the command line arguments.  This loop allows for an unlimited number

+@REM of arguments (up to the command line limit, anyway).

+set CMD_LINE_ARGS=

+:Win9xApp

+if %1a==a goto Win9xGetScriptDir

+set CMD_LINE_ARGS=%CMD_LINE_ARGS% %1

+shift

+goto Win9xApp

+

+:Win9xGetScriptDir

+set SAVEDIR=%CD%

+%0\

+cd %0\..\.. 

+set BASEDIR=%CD%

+cd %SAVEDIR%

+set SAVE_DIR=

+goto repoSetup

+

+:WinNTGetScriptDir

+set BASEDIR=%~dp0\..

+

+:repoSetup

+

+

+if "%JAVACMD%"=="" set JAVACMD=java

+

+if "%REPO%"=="" set REPO=%BASEDIR%\lib

+

+set CLASSPATH="%BASEDIR%"\etc;"%REPO%"\hyracks-dataflow-std-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-api-0.2.3-SNAPSHOT.jar;"%REPO%"\json-20090211.jar;"%REPO%"\httpclient-4.1-alpha2.jar;"%REPO%"\httpcore-4.1-beta1.jar;"%REPO%"\commons-logging-1.1.1.jar;"%REPO%"\args4j-2.0.12.jar;"%REPO%"\commons-lang3-3.1.jar;"%REPO%"\hyracks-dataflow-common-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-data-std-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-control-cc-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-control-common-0.2.3-SNAPSHOT.jar;"%REPO%"\jetty-server-8.0.0.RC0.jar;"%REPO%"\servlet-api-3.0.20100224.jar;"%REPO%"\jetty-continuation-8.0.0.RC0.jar;"%REPO%"\jetty-http-8.0.0.RC0.jar;"%REPO%"\jetty-io-8.0.0.RC0.jar;"%REPO%"\jetty-webapp-8.0.0.RC0.jar;"%REPO%"\jetty-xml-8.0.0.RC0.jar;"%REPO%"\jetty-util-8.0.0.RC0.jar;"%REPO%"\jetty-servlet-8.0.0.RC0.jar;"%REPO%"\jetty-security-8.0.0.RC0.jar;"%REPO%"\wicket-core-1.5.2.jar;"%REPO%"\wicket-util-1.5.2.jar;"%REPO%"\wicket-request-1.5.2.jar;"%REPO%"\slf4j-api-1.6.1.jar;"%REPO%"\slf4j-jcl-1.6.3.jar;"%REPO%"\hyracks-control-nc-0.2.3-SNAPSHOT.jar;"%REPO%"\dcache-client-0.0.1.jar;"%REPO%"\jetty-client-8.0.0.M0.jar;"%REPO%"\hyracks-net-0.2.3-SNAPSHOT.jar;"%REPO%"\commons-io-1.3.1.jar;"%REPO%"\hyracks-ipc-0.2.3-SNAPSHOT.jar;"%REPO%"\hadoop-core-0.20.2.jar;"%REPO%"\commons-cli-1.2.jar;"%REPO%"\xmlenc-0.52.jar;"%REPO%"\commons-httpclient-3.0.1.jar;"%REPO%"\commons-codec-1.3.jar;"%REPO%"\commons-net-1.4.1.jar;"%REPO%"\jetty-6.1.14.jar;"%REPO%"\jetty-util-6.1.14.jar;"%REPO%"\jasper-runtime-5.5.12.jar;"%REPO%"\jasper-compiler-5.5.12.jar;"%REPO%"\jsp-api-2.1-6.1.14.jar;"%REPO%"\jsp-2.1-6.1.14.jar;"%REPO%"\ant-1.6.5.jar;"%REPO%"\commons-el-1.0.jar;"%REPO%"\jets3t-0.7.1.jar;"%REPO%"\servlet-api-2.5-6.1.14.jar;"%REPO%"\kfs-0.3.jar;"%REPO%"\hsqldb-1.8.0.10.jar;"%REPO%"\oro-2.0.8.jar;"%REPO%"\core-3.1.1.jar;"%REPO%"\hadoop-test-0.20.2.jar;"%REPO%"\ftplet-api-1.0.0.jar;"%REPO%"\mina-core-2.0.0-M5.jar;"%REPO%"\ftpserver-core-1.0.0.jar;"%REPO%"\ftpserver-deprecated-1.0.0-M2.jar;"%REPO%"\hyracks-hdfs-core-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-hdfs-0.20.2-0.2.3-SNAPSHOT.jar;"%REPO%"\genomix-data-0.2.3-SNAPSHOT.jar;"%REPO%"\genomix-hyracks-0.2.3-SNAPSHOT.jar

+goto endInit

+

+@REM Reaching here means variables are defined and arguments have been captured

+:endInit

+

+%JAVACMD% %JAVA_OPTS%  -classpath %CLASSPATH_PREFIX%;%CLASSPATH% -Dapp.name="genomix" -Dapp.repo="%REPO%" -Dapp.home="%BASEDIR%" -Dbasedir="%BASEDIR%" edu.uci.ics.genomix.driver.Driver %CMD_LINE_ARGS%

+if ERRORLEVEL 1 goto error

+goto end

+

+:error

+if "%OS%"=="Windows_NT" @endlocal

+set ERROR_CODE=%ERRORLEVEL%

+

+:end

+@REM set local scope for the variables with windows NT shell

+if "%OS%"=="Windows_NT" goto endNT

+

+@REM For old DOS remove the set variables from ENV - we assume they were not set

+@REM before we started - at least we don't leave any baggage around

+set CMD_LINE_ARGS=

+goto postExec

+

+:endNT

+@REM If error code is set to 1 then the endlocal was done already in :error.

+if %ERROR_CODE% EQU 0 @endlocal

+

+

+:postExec

+

+if "%FORCE_EXIT_ON_ERROR%" == "on" (

+  if %ERROR_CODE% NEQ 0 exit %ERROR_CODE%

+)

+

+exit /B %ERROR_CODE%

diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/getip.sh b/genomix/genomix-hyracks/src/main/resources/scripts/getip.sh
new file mode 100644
index 0000000..e0cdf73
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/getip.sh
@@ -0,0 +1,21 @@
+#get the OS
+OS_NAME=`uname -a|awk '{print $1}'`
+LINUX_OS='Linux'
+
+if [ $OS_NAME = $LINUX_OS ];
+then
+        #Get IP Address
+        IPADDR=`/sbin/ifconfig eth0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+	if [ "$IPADDR" = "" ]
+        then
+		IPADDR=`/sbin/ifconfig lo | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+        fi 
+else
+        IPADDR=`/sbin/ifconfig en1 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+	if [ "$IPADDR" = "" ]
+        then
+                IPADDR=`/sbin/ifconfig lo0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+        fi
+
+fi
+echo $IPADDR
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/startAllNCs.sh b/genomix/genomix-hyracks/src/main/resources/scripts/startAllNCs.sh
new file mode 100644
index 0000000..5e38c40
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/startAllNCs.sh
@@ -0,0 +1,6 @@
+GENOMIX_PATH=`pwd`
+
+for i in `cat conf/slaves`
+do
+   ssh $i "cd ${GENOMIX_PATH}; bin/startnc.sh"
+done
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/startCluster.sh b/genomix/genomix-hyracks/src/main/resources/scripts/startCluster.sh
new file mode 100755
index 0000000..4727764
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/startCluster.sh
@@ -0,0 +1,19 @@
+bin/startcc.sh
+sleep 5
+bin/startAllNCs.sh
+
+. conf/cluster.properties
+# do we need to specify the version somewhere?
+hyrackcmd=`ls ${HYRACKS_HOME}/hyracks-cli/target/hyracks-cli-*-binary-assembly/bin/hyrackscli`
+# find zip file
+appzip=`ls $PWD/../genomix-*-binary-assembly.zip`
+
+[ -f $hyrackcmd ] || { echo "Hyracks commandline is missing"; exit -1;}
+[ -f $appzip ] || { echo "Genomix binary-assembly.zip is missing"; exit -1;}
+
+CCHOST_NAME=`cat conf/master`
+
+IPADDR=`bin/getip.sh`
+echo "connect to \"${IPADDR}:${CC_CLIENTPORT}\"; create application genomix \"$appzip\";" | $hyrackcmd 
+echo ""
+
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/startDebugNc.sh b/genomix/genomix-hyracks/src/main/resources/scripts/startDebugNc.sh
new file mode 100644
index 0000000..c335475
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/startDebugNc.sh
@@ -0,0 +1,50 @@
+hostname
+
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CURRENT_PATH=`pwd`
+CCHOST=`ssh ${CCHOST_NAME} "cd ${CURRENT_PATH}; bin/getip.sh"`
+
+#Import cluster properties
+. conf/cluster.properties
+. conf/debugnc.properties
+
+#Clean up temp dir
+
+#rm -rf $NCTMP_DIR2
+mkdir $NCTMP_DIR2
+
+#Clean up log dir
+#rm -rf $NCLOGS_DIR2
+mkdir $NCLOGS_DIR2
+
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS2 | tr "," "\n")
+for io_dir in $io_dirs
+do
+	#rm -rf $io_dir
+	mkdir $io_dir
+done
+
+#Set JAVA_HOME
+export JAVA_HOME=$JAVA_HOME
+
+#Get OS
+IPADDR=`bin/getip.sh`
+
+#Get node ID
+NODEID=`hostname | cut -d '.' -f 1`
+NODEID=${NODEID}2
+
+#Set JAVA_OPTS
+export JAVA_OPTS=$NCJAVA_OPTS2
+
+cd $HYRACKS_HOME
+HYRACKS_HOME=`pwd`
+
+#Enter the temp dir
+cd $NCTMP_DIR2
+
+#Launch hyracks nc
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyracksnc -cc-host $CCHOST -cc-port $CC_CLUSTERPORT -cluster-net-ip-address $IPADDR  -data-ip-address $IPADDR -node-id $NODEID -iodevices "${IO_DIRS2}" &> $NCLOGS_DIR2/$NODEID.log &
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/startcc.sh b/genomix/genomix-hyracks/src/main/resources/scripts/startcc.sh
new file mode 100644
index 0000000..63b1aca
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/startcc.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+hostname
+
+#Import cluster properties
+. conf/cluster.properties
+
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CCHOST=`bin/getip.sh`
+
+#Remove the temp dir
+#rm -rf $CCTMP_DIR
+mkdir -p $CCTMP_DIR
+
+#Remove the logs dir
+#rm -rf $CCLOGS_DIR
+mkdir -p $CCLOGS_DIR
+
+#Export JAVA_HOME and JAVA_OPTS
+export JAVA_HOME=$JAVA_HOME
+export JAVA_OPTS=$CCJAVA_OPTS
+
+#Launch hyracks cc script
+chmod -R 755 $HYRACKS_HOME
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyrackscc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -max-heartbeat-lapse-periods 999999 -default-max-job-attempts 0 -job-history-size 3 &> $CCLOGS_DIR/cc.log &
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/startnc.sh b/genomix/genomix-hyracks/src/main/resources/scripts/startnc.sh
new file mode 100644
index 0000000..1a45521
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/startnc.sh
@@ -0,0 +1,49 @@
+hostname
+
+MY_NAME=`hostname`
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CURRENT_PATH=`pwd`
+CCHOST=`ssh ${CCHOST_NAME} "cd ${CURRENT_PATH}; bin/getip.sh"`
+
+#Import cluster properties
+. conf/cluster.properties
+
+#Clean up temp dir
+
+#rm -rf $NCTMP_DIR
+mkdir -p $NCTMP_DIR
+
+#Clean up log dir
+#rm -rf $NCLOGS_DIR
+mkdir -p $NCLOGS_DIR
+
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS | tr "," "\n")
+for io_dir in $io_dirs
+do
+	#rm -rf $io_dir
+	mkdir -p $io_dir
+done
+
+#Set JAVA_HOME
+export JAVA_HOME=$JAVA_HOME
+
+IPADDR=`bin/getip.sh`
+#echo $IPADDR
+
+#Get node ID
+NODEID=`hostname | cut -d '.' -f 1`
+
+#Set JAVA_OPTS
+export JAVA_OPTS=$NCJAVA_OPTS
+
+cd $HYRACKS_HOME
+HYRACKS_HOME=`pwd`
+
+#Enter the temp dir
+cd $NCTMP_DIR
+
+#Launch hyracks nc
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyracksnc -cc-host $CCHOST -cc-port $CC_CLUSTERPORT -cluster-net-ip-address $IPADDR  -data-ip-address $IPADDR -result-ip-address $IPADDR -node-id $NODEID -iodevices "${IO_DIRS}" &> $NCLOGS_DIR/$NODEID.log &
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/stopAllNCs.sh b/genomix/genomix-hyracks/src/main/resources/scripts/stopAllNCs.sh
new file mode 100644
index 0000000..66ed866
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/stopAllNCs.sh
@@ -0,0 +1,6 @@
+GENOMIX_PATH=`pwd`
+
+for i in `cat conf/slaves`
+do
+   ssh $i "cd ${GENOMIX_PATH}; bin/stopnc.sh"
+done
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/stopCluster.sh b/genomix/genomix-hyracks/src/main/resources/scripts/stopCluster.sh
new file mode 100644
index 0000000..4889934
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/stopCluster.sh
@@ -0,0 +1,3 @@
+bin/stopAllNCs.sh
+sleep 2
+bin/stopcc.sh
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/stopcc.sh b/genomix/genomix-hyracks/src/main/resources/scripts/stopcc.sh
new file mode 100644
index 0000000..1865054
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/stopcc.sh
@@ -0,0 +1,10 @@
+hostname
+. conf/cluster.properties
+
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep hyracks|awk '{print $2}'`
+echo $PID
+[ "$PID" != "" ] && kill -9 $PID
+
+#Clean up CC temp dir
+rm -rf $CCTMP_DIR/*
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/stopnc.sh b/genomix/genomix-hyracks/src/main/resources/scripts/stopnc.sh
new file mode 100644
index 0000000..3928bb7
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/stopnc.sh
@@ -0,0 +1,23 @@
+hostname
+. conf/cluster.properties
+
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=hyracksnc'|awk '{print $2}'`
+
+if [ "$PID" == "" ]; then
+  USERID=`id | sed 's/^uid=//;s/(.*$//'`
+  PID=`ps -ef|grep ${USERID}|grep java|grep 'Dapp.name=hyracksnc'|awk '{print $2}'`
+fi
+
+echo $PID
+[ "$PID" != "" ] && kill -9 $PID
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS | tr "," "\n")
+for io_dir in $io_dirs
+do
+	rm -rf $io_dir/*
+done
+
+#Clean up NC temp dir
+rm -rf $NCTMP_DIR/*
diff --git a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/example/jobrun/JobRunTest.java b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/example/jobrun/JobRunTest.java
new file mode 100644
index 0000000..5a64f78
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/example/jobrun/JobRunTest.java
@@ -0,0 +1,250 @@
+package edu.uci.ics.genomix.example.jobrun;
+
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+
+import junit.framework.Assert;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.driver.Driver;
+import edu.uci.ics.genomix.driver.Driver.Plan;
+import edu.uci.ics.genomix.job.GenomixJob;
+import edu.uci.ics.genomix.type.Kmer;
+import edu.uci.ics.genomix.type.KmerCountValue;
+import edu.uci.ics.hyracks.hdfs.utils.HyracksUtils;
+import edu.uci.ics.hyracks.hdfs.utils.TestUtils;
+
+public class JobRunTest {
+	private static final String ACTUAL_RESULT_DIR = "actual";
+	private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
+
+	private static final String DATA_PATH = "src/test/resources/data/mergeTest/ThreeKmer";
+	private static final String HDFS_INPUT_PATH = "/webmap";
+	private static final String HDFS_OUTPUT_PATH = "/webmap_result";
+
+	private static final String DUMPED_RESULT = ACTUAL_RESULT_DIR
+			+ HDFS_OUTPUT_PATH + "/merged.txt";
+	private static final String CONVERT_RESULT = DUMPED_RESULT + ".txt";
+	private static final String EXPECTED_PATH = "src/test/resources/expected/result2";
+	private static final String EXPECTED_REVERSE_PATH = "src/test/resources/expected/result_reverse";
+
+	private static final String HYRACKS_APP_NAME = "genomix";
+	private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR
+			+ File.separator + "conf.xml";
+	private MiniDFSCluster dfsCluster;
+
+	private JobConf conf = new JobConf();
+	private int numberOfNC = 2;
+	private int numPartitionPerMachine = 1;
+
+	private Driver driver;
+
+	@Before
+	public void setUp() throws Exception {
+		cleanupStores();
+		HyracksUtils.init();
+		HyracksUtils.createApp(HYRACKS_APP_NAME);
+		FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+		FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+		startHDFS();
+
+		FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
+		FileOutputFormat.setOutputPath(conf, new Path(HDFS_OUTPUT_PATH));
+
+		conf.setInt(GenomixJob.KMER_LENGTH, 5);
+		driver = new Driver(HyracksUtils.CC_HOST,
+				HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT,
+				numPartitionPerMachine);
+	}
+
+	private void cleanupStores() throws IOException {
+		FileUtils.forceMkdir(new File("teststore"));
+		FileUtils.forceMkdir(new File("build"));
+		FileUtils.cleanDirectory(new File("teststore"));
+		FileUtils.cleanDirectory(new File("build"));
+	}
+
+	private void startHDFS() throws IOException {
+		conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+		conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+		conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+
+		FileSystem lfs = FileSystem.getLocal(new Configuration());
+		lfs.delete(new Path("build"), true);
+		System.setProperty("hadoop.log.dir", "logs");
+		dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
+		FileSystem dfs = FileSystem.get(conf);
+		Path src = new Path(DATA_PATH);
+		Path dest = new Path(HDFS_INPUT_PATH);
+		Path result = new Path(HDFS_OUTPUT_PATH);
+		dfs.mkdirs(dest);
+		//dfs.mkdirs(result);
+		dfs.copyFromLocalFile(src, dest);
+
+		DataOutputStream confOutput = new DataOutputStream(
+				new FileOutputStream(new File(HADOOP_CONF_PATH)));
+		conf.writeXml(confOutput);
+		confOutput.flush();
+		confOutput.close();
+	}
+
+	private void cleanUpReEntry() throws IOException {
+		FileSystem lfs = FileSystem.getLocal(new Configuration());
+		if (lfs.exists(new Path(DUMPED_RESULT))) {
+			lfs.delete(new Path(DUMPED_RESULT), true);
+		}
+		FileSystem dfs = FileSystem.get(conf);
+		if (dfs.exists(new Path(HDFS_OUTPUT_PATH))) {
+			dfs.delete(new Path(HDFS_OUTPUT_PATH), true);
+		}
+	}
+
+	@Test
+	public void TestAll() throws Exception{
+		cleanUpReEntry();
+		TestExternalGroupby();
+		cleanUpReEntry();
+		TestPreClusterGroupby();
+		cleanUpReEntry();
+		TestHybridGroupby();
+		cleanUpReEntry();
+		conf.setBoolean(GenomixJob.REVERSED_KMER, true);
+		TestExternalReversedGroupby();
+		cleanUpReEntry();
+		TestPreClusterReversedGroupby();
+		cleanUpReEntry();
+		TestHybridReversedGroupby();
+	}
+	
+	public void TestExternalGroupby() throws Exception {
+		conf.set(GenomixJob.GROUPBY_TYPE, "external");
+		System.err.println("Testing ExternalGroupBy");
+		driver.runJob(new GenomixJob(conf), Plan.BUILD_DEBRUJIN_GRAPH, true);
+		Assert.assertEquals(true, checkResults(EXPECTED_PATH));
+	}
+
+	public void TestPreClusterGroupby() throws Exception {
+		conf.set(GenomixJob.GROUPBY_TYPE, "precluster");
+		System.err.println("Testing PreClusterGroupBy");
+		driver.runJob(new GenomixJob(conf), Plan.BUILD_DEBRUJIN_GRAPH, true);
+		Assert.assertEquals(true, checkResults(EXPECTED_PATH));
+	}
+
+	public void TestHybridGroupby() throws Exception {
+		conf.set(GenomixJob.GROUPBY_TYPE, "hybrid");
+		System.err.println("Testing HybridGroupBy");
+		driver.runJob(new GenomixJob(conf), Plan.BUILD_DEBRUJIN_GRAPH, true);
+		Assert.assertEquals(true, checkResults(EXPECTED_PATH));
+	}
+	
+	public void TestExternalReversedGroupby() throws Exception{
+		conf.set(GenomixJob.GROUPBY_TYPE, "external");
+		conf.setBoolean(GenomixJob.REVERSED_KMER, true);
+		System.err.println("Testing ExternalGroupBy + Reversed");
+		driver.runJob(new GenomixJob(conf), Plan.BUILD_DEBRUJIN_GRAPH, true);
+		Assert.assertEquals(true, checkResults(EXPECTED_REVERSE_PATH));
+	}
+	public void TestPreClusterReversedGroupby() throws Exception{
+		conf.set(GenomixJob.GROUPBY_TYPE, "precluster");
+		conf.setBoolean(GenomixJob.REVERSED_KMER, true);
+		System.err.println("Testing PreclusterGroupBy + Reversed");
+		driver.runJob(new GenomixJob(conf), Plan.BUILD_DEBRUJIN_GRAPH, true);
+		Assert.assertEquals(true, checkResults(EXPECTED_REVERSE_PATH));
+	}
+	public void TestHybridReversedGroupby() throws Exception{
+		conf.set(GenomixJob.GROUPBY_TYPE, "hybrid");
+		conf.setBoolean(GenomixJob.REVERSED_KMER, true);
+		System.err.println("Testing HybridGroupBy + Reversed");
+		driver.runJob(new GenomixJob(conf), Plan.BUILD_DEBRUJIN_GRAPH, true);
+		Assert.assertEquals(true, checkResults(EXPECTED_REVERSE_PATH));
+	}
+
+	private boolean checkResults(String expectedPath) throws Exception {
+		File dumped = null;
+		String format = conf.get(GenomixJob.OUTPUT_FORMAT);
+		if ("text".equalsIgnoreCase(format)) {
+			FileUtil.copyMerge(FileSystem.get(conf),
+					new Path(HDFS_OUTPUT_PATH), FileSystem
+							.getLocal(new Configuration()), new Path(
+							DUMPED_RESULT), false, conf, null);
+			dumped = new File(DUMPED_RESULT);
+		} else {
+			
+			FileSystem.getLocal(new Configuration()).mkdirs(new Path(ACTUAL_RESULT_DIR
+			+ HDFS_OUTPUT_PATH));
+			File filePathTo = new File(CONVERT_RESULT);
+			BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
+			for (int i = 0; i < numPartitionPerMachine * numberOfNC; i++) {
+				String partname = "/part-" + i;
+				FileUtil.copy(FileSystem.get(conf), new Path(HDFS_OUTPUT_PATH						
+						+ partname), FileSystem.getLocal(new Configuration()),						
+						new Path(ACTUAL_RESULT_DIR + HDFS_OUTPUT_PATH + partname), false, conf);
+					
+				Path path = new Path(HDFS_OUTPUT_PATH
+						+ partname);
+				FileSystem dfs = FileSystem.get(conf);
+				if (dfs.getFileStatus(path).getLen() == 0){
+					continue;
+				}
+				SequenceFile.Reader reader = new SequenceFile.Reader(dfs, path,
+						conf);
+				BytesWritable key = (BytesWritable) ReflectionUtils
+						.newInstance(reader.getKeyClass(), conf);
+				KmerCountValue value = (KmerCountValue) ReflectionUtils
+						.newInstance(reader.getValueClass(), conf);
+
+				int k = conf.getInt(GenomixJob.KMER_LENGTH, 25);
+				while (reader.next(key, value)) {
+					if (key == null || value == null){
+						break;
+					}
+					bw.write(Kmer.recoverKmerFrom(k, key.getBytes(), 0,
+							key.getLength())
+							+ "\t" + value.toString());
+					System.out.println(Kmer.recoverKmerFrom(k, key.getBytes(), 0,
+							key.getLength())
+							+ "\t" + value.toString());
+					bw.newLine();
+				}
+				reader.close();
+
+			}
+			bw.close();
+			dumped = new File(CONVERT_RESULT);
+		}
+
+		TestUtils.compareWithSortedResult(new File(expectedPath), dumped);
+		return true;
+	}
+
+	@After
+	public void tearDown() throws Exception {
+		HyracksUtils.destroyApp(HYRACKS_APP_NAME);
+		HyracksUtils.deinit();
+		cleanupHDFS();
+	}
+
+	private void cleanupHDFS() throws Exception {
+		dfsCluster.shutdown();
+	}
+
+}
diff --git a/genomix/genomix-hyracks/src/test/resources/data/0/text.txt b/genomix/genomix-hyracks/src/test/resources/data/0/text.txt
new file mode 100755
index 0000000..f63a141
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/0/text.txt
@@ -0,0 +1,4 @@
+@625E1AAXX100810:1:100:10000:10271/1

+AATAGAAG

++

+EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?

diff --git a/genomix/genomix-hyracks/src/test/resources/data/webmap/text.txt b/genomix/genomix-hyracks/src/test/resources/data/webmap/text.txt
new file mode 100755
index 0000000..f5a05a8
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/webmap/text.txt
@@ -0,0 +1,8 @@
+@625E1AAXX100810:1:100:10000:10271/1

+AATAGAAG

+AATAGAAG

++

+EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?

+AATAGAAG

+AATAGAAG

+AATAGAAG

diff --git a/genomix/genomix-hyracks/src/test/resources/expected/result2 b/genomix/genomix-hyracks/src/test/resources/expected/result2
new file mode 100755
index 0000000..9296453
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/expected/result2
@@ -0,0 +1,4 @@
+AATAG	|A	5
+AGAAG	T|	5
+ATAGA	A|A	5
+TAGAA	A|G	5
diff --git a/genomix/genomix-hyracks/src/test/resources/expected/result_reverse b/genomix/genomix-hyracks/src/test/resources/expected/result_reverse
new file mode 100644
index 0000000..cf2712d
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/expected/result_reverse
@@ -0,0 +1,8 @@
+AAGAT	G|A	5
+AATAG	|A	5
+AGAAG	T|	5
+AGATA	A|A	5
+ATAGA	A|A	5
+GAAGA	|T	5
+GATAA	A|	5
+TAGAA	A|G	5
diff --git a/genomix/genomix-hyracks/src/test/resources/hadoop/conf/core-site.xml b/genomix/genomix-hyracks/src/test/resources/hadoop/conf/core-site.xml
new file mode 100644
index 0000000..3e5bacb
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/hadoop/conf/core-site.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+	<property>
+		<name>fs.default.name</name>
+		<value>hdfs://127.0.0.1:31888</value>
+	</property>
+	<property>
+		<name>hadoop.tmp.dir</name>
+		<value>/tmp/hadoop</value>
+	</property>
+
+
+</configuration>
diff --git a/genomix/genomix-hyracks/src/test/resources/hadoop/conf/hdfs-site.xml b/genomix/genomix-hyracks/src/test/resources/hadoop/conf/hdfs-site.xml
new file mode 100644
index 0000000..b1b1902
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/hadoop/conf/hdfs-site.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+	<property>
+		<name>dfs.replication</name>
+		<value>1</value>
+	</property>
+
+	<property>
+		<name>dfs.block.size</name>
+		<value>65536</value>
+	</property>
+
+</configuration>
diff --git a/genomix/genomix-hyracks/src/test/resources/hadoop/conf/log4j.properties b/genomix/genomix-hyracks/src/test/resources/hadoop/conf/log4j.properties
new file mode 100755
index 0000000..d5e6004
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/hadoop/conf/log4j.properties
@@ -0,0 +1,94 @@
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=FATAL,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshhold=FATAL
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this 
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# Rolling File Appender
+#
+
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+#
+# FSNamesystem Audit logging
+# All audit events are logged at INFO level
+#
+log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
diff --git a/genomix/genomix-hyracks/src/test/resources/hadoop/conf/mapred-site.xml b/genomix/genomix-hyracks/src/test/resources/hadoop/conf/mapred-site.xml
new file mode 100644
index 0000000..525e7d5
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/hadoop/conf/mapred-site.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+	<property>
+		<name>mapred.job.tracker</name>
+		<value>localhost:29007</value>
+	</property>
+	<property>
+		<name>mapred.tasktracker.map.tasks.maximum</name>
+		<value>20</value>
+	</property>
+	<property>
+		<name>mapred.tasktracker.reduce.tasks.maximum</name>
+		<value>20</value>
+	</property>
+	<property>
+		<name>mapred.max.split.size</name>
+		<value>2048</value>
+	</property>
+
+</configuration>
diff --git a/genomix/genomix-pregelix/data/result/.output.crc b/genomix/genomix-pregelix/data/result/.output.crc
new file mode 100644
index 0000000..c4995af
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/.output.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/result/BridgePath b/genomix/genomix-pregelix/data/result/BridgePath
new file mode 100755
index 0000000..90f0a8a
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/BridgePath
Binary files differ
diff --git a/genomix/genomix-pregelix/data/result/CyclePath b/genomix/genomix-pregelix/data/result/CyclePath
new file mode 100755
index 0000000..0d50d01
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/CyclePath
Binary files differ
diff --git a/genomix/genomix-pregelix/data/result/LongPath b/genomix/genomix-pregelix/data/result/LongPath
new file mode 100755
index 0000000..b1040ab
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LongPath
Binary files differ
diff --git a/genomix/genomix-pregelix/data/result/Path b/genomix/genomix-pregelix/data/result/Path
new file mode 100755
index 0000000..76b1a0e
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/Path
Binary files differ
diff --git a/genomix/genomix-pregelix/data/result/SimplePath b/genomix/genomix-pregelix/data/result/SimplePath
new file mode 100755
index 0000000..dfabc43
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/SimplePath
Binary files differ
diff --git a/genomix/genomix-pregelix/data/result/SinglePath b/genomix/genomix-pregelix/data/result/SinglePath
new file mode 100755
index 0000000..6329aa6
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/SinglePath
Binary files differ
diff --git a/genomix/genomix-pregelix/data/result/ThreeKmer b/genomix/genomix-pregelix/data/result/ThreeKmer
new file mode 100755
index 0000000..f0435c7
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/ThreeKmer
Binary files differ
diff --git a/genomix/genomix-pregelix/data/result/TreePath b/genomix/genomix-pregelix/data/result/TreePath
new file mode 100755
index 0000000..dc8d16c
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/TreePath
Binary files differ
diff --git a/genomix/genomix-pregelix/data/result/TwoKmer b/genomix/genomix-pregelix/data/result/TwoKmer
new file mode 100755
index 0000000..73024db
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/TwoKmer
Binary files differ
diff --git a/genomix/genomix-pregelix/data/webmap/part-1-out-100000 b/genomix/genomix-pregelix/data/webmap/part-1-out-100000
new file mode 100755
index 0000000..99d2fac
--- /dev/null
+++ b/genomix/genomix-pregelix/data/webmap/part-1-out-100000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/webmap/part-1-out-200000 b/genomix/genomix-pregelix/data/webmap/part-1-out-200000
new file mode 100755
index 0000000..86413de
--- /dev/null
+++ b/genomix/genomix-pregelix/data/webmap/part-1-out-200000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/webmap/part-1-out-25000 b/genomix/genomix-pregelix/data/webmap/part-1-out-25000
new file mode 100755
index 0000000..079f915
--- /dev/null
+++ b/genomix/genomix-pregelix/data/webmap/part-1-out-25000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/webmap/part-1-out-50000 b/genomix/genomix-pregelix/data/webmap/part-1-out-50000
new file mode 100755
index 0000000..d292c19
--- /dev/null
+++ b/genomix/genomix-pregelix/data/webmap/part-1-out-50000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/webmap/part-1-out-500000 b/genomix/genomix-pregelix/data/webmap/part-1-out-500000
new file mode 100755
index 0000000..01fdc9e
--- /dev/null
+++ b/genomix/genomix-pregelix/data/webmap/part-1-out-500000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/webmap/sequenceFileLongMergeTest b/genomix/genomix-pregelix/data/webmap/sequenceFileLongMergeTest
new file mode 100755
index 0000000..ff27c48
--- /dev/null
+++ b/genomix/genomix-pregelix/data/webmap/sequenceFileLongMergeTest
Binary files differ
diff --git a/genomix/genomix-pregelix/data/webmap/sequenceFileMergeTest b/genomix/genomix-pregelix/data/webmap/sequenceFileMergeTest
new file mode 100755
index 0000000..b50d3f1
--- /dev/null
+++ b/genomix/genomix-pregelix/data/webmap/sequenceFileMergeTest
Binary files differ
diff --git a/genomix/genomix-pregelix/data/webmap/sequenceShortFileMergeTest b/genomix/genomix-pregelix/data/webmap/sequenceShortFileMergeTest
new file mode 100755
index 0000000..96f7fc9
--- /dev/null
+++ b/genomix/genomix-pregelix/data/webmap/sequenceShortFileMergeTest
Binary files differ
diff --git a/genomix/genomix-pregelix/graph/BridgePath b/genomix/genomix-pregelix/graph/BridgePath
new file mode 100644
index 0000000..c10ea60
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/BridgePath
@@ -0,0 +1,14 @@
+ACCCC	C|G	1
+CCCCG	A|T	1
+CTCCG	A|T	1
+TTCCA	T|C	2
+ACTCC	C|G	1
+CCGTG	CT|	2
+TCCAC	T|CT	2
+CCACC	T|C	1
+CCACT	T|C	1
+CACCC	C|C	1
+TTTCC	|A	2
+CCCGT	C|G	1
+TCCGT	C|G	1
+CACTC	C|C	1
diff --git a/genomix/genomix-pregelix/graph/BridgePath_out.ps b/genomix/genomix-pregelix/graph/BridgePath_out.ps
new file mode 100644
index 0000000..9f66e44
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/BridgePath_out.ps
@@ -0,0 +1,603 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+        dup dup findfont dup length dict begin
+        { 1 index /FID ne { def }{ pop pop } ifelse
+        } forall
+        /Encoding EncodingVector def
+        currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+       dup 1 exch div /InvScaleFactor exch def
+       scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color 
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage {	% i j npages
+	/npages exch def
+	/j exch def
+	/i exch def
+	/str 10 string def
+	npages 1 gt {
+		gsave
+			coordfont setfont
+			0 0 moveto
+			(\() show i str cvs show (,) show j str cvs show (\)) show
+		grestore
+	} if
+} bind def
+
+/set_font {
+	findfont exch
+	scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext {			% width text
+	/text exch def
+	/width exch def
+	gsave
+		width 0 gt {
+			[] 0 setdash
+			text stringwidth pop width exch sub text length div 0 text ashow
+		} if
+	grestore
+} def
+
+/boxprim {				% xcorner ycorner xsize ysize
+		4 2 roll
+		moveto
+		2 copy
+		exch 0 rlineto
+		0 exch rlineto
+		pop neg 0 rlineto
+		closepath
+} bind def
+
+/ellipse_path {
+	/ry exch def
+	/rx exch def
+	/y exch def
+	/x exch def
+	matrix currentmatrix
+	newpath
+	x y translate
+	rx ry scale
+	0 0 1 0 360 arc
+	setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+	[	% layer color sequence - darkest to lightest
+		[0 0 0]
+		[.2 .8 .8]
+		[.4 .8 .8]
+		[.6 .8 .8]
+		[.8 .8 .8]
+	]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+	layercolorseq curlayer 1 sub layerlen mod get
+	aload pop sethsbcolor
+	/nodecolor {nopcolor} def
+	/edgecolor {nopcolor} def
+	/graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+	/myupper exch def
+	/mylower exch def
+	curlayer mylower lt
+	curlayer myupper gt
+	or
+	{invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+    userdict (<<) cvn ([) cvn load put
+    userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 248 674
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 212 638 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% ACCCC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 241 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 237.4 moveto 49 (ACCCC) alignedtext
+grestore
+% CCCCG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 167 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+21.5 163.4 moveto 51 (CCCCG) alignedtext
+grestore
+% ACCCC->CCCCG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 47 222.33 moveto
+47 214.26 47 204.65 47 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 50.5 195.67 moveto
+47 185.67 lineto
+43.5 195.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 50.5 195.67 moveto
+47 185.67 lineto
+43.5 195.67 lineto
+closepath stroke
+grestore
+% CCCGT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 93 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 89.4 moveto 49 (CCCGT) alignedtext
+grestore
+% CCCCG->CCCGT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 47.25 148.33 moveto
+47.36 140.26 47.49 130.65 47.61 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.11 121.71 moveto
+47.75 111.67 lineto
+44.11 121.62 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.11 121.71 moveto
+47.75 111.67 lineto
+44.11 121.62 lineto
+closepath stroke
+grestore
+% CCGTG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+102 19 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+76.5 15.4 moveto 51 (CCGTG) alignedtext
+grestore
+% CCCGT->CCGTG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 61.07 75.09 moveto
+67.59 66.15 75.6 55.18 82.8 45.31 curveto
+stroke
+0 0 0 edgecolor
+newpath 85.79 47.15 moveto
+88.86 37 lineto
+80.14 43.02 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 85.79 47.15 moveto
+88.86 37 lineto
+80.14 43.02 lineto
+closepath stroke
+grestore
+% CTCCG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+158 167 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+133.5 163.4 moveto 49 (CTCCG) alignedtext
+grestore
+% TCCGT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+157 93 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+133 89.4 moveto 48 (TCCGT) alignedtext
+grestore
+% CTCCG->TCCGT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 157.75 148.33 moveto
+157.64 140.26 157.51 130.65 157.39 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 160.89 121.62 moveto
+157.25 111.67 lineto
+153.89 121.71 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 160.89 121.62 moveto
+157.25 111.67 lineto
+153.89 121.71 lineto
+closepath stroke
+grestore
+% TCCGT->CCGTG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 143.69 75.09 moveto
+137.05 66.15 128.89 55.18 121.56 45.31 curveto
+stroke
+0 0 0 edgecolor
+newpath 124.16 42.94 moveto
+115.38 37 lineto
+118.54 47.12 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 124.16 42.94 moveto
+115.38 37 lineto
+118.54 47.12 lineto
+closepath stroke
+grestore
+% TTCCA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+102 537 44.05 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+78.5 533.4 moveto 47 (TTCCA) alignedtext
+grestore
+% TCCAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+102 463 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+78 459.4 moveto 48 (TCCAC) alignedtext
+grestore
+% TTCCA->TCCAC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 102 518.33 moveto
+102 510.26 102 500.65 102 491.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 105.5 491.67 moveto
+102 481.67 lineto
+98.5 491.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 105.5 491.67 moveto
+102 481.67 lineto
+98.5 491.67 lineto
+closepath stroke
+grestore
+% CCACC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 389 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 385.4 moveto 49 (CCACC) alignedtext
+grestore
+% TCCAC->CCACC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 88.69 445.09 moveto
+81.93 436 73.6 424.79 66.17 414.79 curveto
+stroke
+0 0 0 edgecolor
+newpath 68.97 412.69 moveto
+60.19 406.75 lineto
+63.35 416.86 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 68.97 412.69 moveto
+60.19 406.75 lineto
+63.35 416.86 lineto
+closepath stroke
+grestore
+% CCACT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+157 389 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+132.5 385.4 moveto 49 (CCACT) alignedtext
+grestore
+% TCCAC->CCACT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 115.31 445.09 moveto
+122.07 436 130.4 424.79 137.83 414.79 curveto
+stroke
+0 0 0 edgecolor
+newpath 140.65 416.86 moveto
+143.81 406.75 lineto
+135.03 412.69 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 140.65 416.86 moveto
+143.81 406.75 lineto
+135.03 412.69 lineto
+closepath stroke
+grestore
+% ACTCC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+157 241 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+133 237.4 moveto 48 (ACTCC) alignedtext
+grestore
+% ACTCC->CTCCG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 157.25 222.33 moveto
+157.36 214.26 157.49 204.65 157.61 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 161.11 195.71 moveto
+157.75 185.67 lineto
+154.11 195.62 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 161.11 195.71 moveto
+157.75 185.67 lineto
+154.11 195.62 lineto
+closepath stroke
+grestore
+% CACCC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 315 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 311.4 moveto 49 (CACCC) alignedtext
+grestore
+% CCACC->CACCC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 47 370.33 moveto
+47 362.26 47 352.65 47 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 50.5 343.67 moveto
+47 333.67 lineto
+43.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 50.5 343.67 moveto
+47 333.67 lineto
+43.5 343.67 lineto
+closepath stroke
+grestore
+% CACTC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+157 315 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+133 311.4 moveto 48 (CACTC) alignedtext
+grestore
+% CCACT->CACTC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 157 370.33 moveto
+157 362.26 157 352.65 157 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 160.5 343.67 moveto
+157 333.67 lineto
+153.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 160.5 343.67 moveto
+157 333.67 lineto
+153.5 343.67 lineto
+closepath stroke
+grestore
+% CACCC->ACCCC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 47 296.33 moveto
+47 288.26 47 278.65 47 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 50.5 269.67 moveto
+47 259.67 lineto
+43.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 50.5 269.67 moveto
+47 259.67 lineto
+43.5 269.67 lineto
+closepath stroke
+grestore
+% CACTC->ACTCC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 157 296.33 moveto
+157 288.26 157 278.65 157 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 160.5 269.67 moveto
+157 259.67 lineto
+153.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 160.5 269.67 moveto
+157 259.67 lineto
+153.5 269.67 lineto
+closepath stroke
+grestore
+% TTTCC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+102 611 43.84 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+79 607.4 moveto 46 (TTTCC) alignedtext
+grestore
+% TTTCC->TTCCA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 102 592.33 moveto
+102 584.26 102 574.65 102 565.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 105.5 565.67 moveto
+102 555.67 lineto
+98.5 565.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 105.5 565.67 moveto
+102 555.67 lineto
+98.5 565.67 lineto
+closepath stroke
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 248 674
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/graph/CyclePath b/genomix/genomix-pregelix/graph/CyclePath
new file mode 100644
index 0000000..db30e3a
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/CyclePath
@@ -0,0 +1,10 @@
+GCAAC	|T	1
+CATCA	T|A	1
+CTTCA	A|T	1
+AACTT	C|C	1
+ACTTC	A|A	1
+TCAAC	A|T	1
+ATCAA	C|C	1
+TTCAT	C|C	1
+CAACT	GT|T	2
+TCATC	T|A	1
diff --git a/genomix/genomix-pregelix/graph/CyclePath_out.ps b/genomix/genomix-pregelix/graph/CyclePath_out.ps
new file mode 100644
index 0000000..1b13ecb
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/CyclePath_out.ps
@@ -0,0 +1,489 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+        dup dup findfont dup length dict begin
+        { 1 index /FID ne { def }{ pop pop } ifelse
+        } forall
+        /Encoding EncodingVector def
+        currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+       dup 1 exch div /InvScaleFactor exch def
+       scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color 
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage {	% i j npages
+	/npages exch def
+	/j exch def
+	/i exch def
+	/str 10 string def
+	npages 1 gt {
+		gsave
+			coordfont setfont
+			0 0 moveto
+			(\() show i str cvs show (,) show j str cvs show (\)) show
+		grestore
+	} if
+} bind def
+
+/set_font {
+	findfont exch
+	scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext {			% width text
+	/text exch def
+	/width exch def
+	gsave
+		width 0 gt {
+			[] 0 setdash
+			text stringwidth pop width exch sub text length div 0 text ashow
+		} if
+	grestore
+} def
+
+/boxprim {				% xcorner ycorner xsize ysize
+		4 2 roll
+		moveto
+		2 copy
+		exch 0 rlineto
+		0 exch rlineto
+		pop neg 0 rlineto
+		closepath
+} bind def
+
+/ellipse_path {
+	/ry exch def
+	/rx exch def
+	/y exch def
+	/x exch def
+	matrix currentmatrix
+	newpath
+	x y translate
+	rx ry scale
+	0 0 1 0 360 arc
+	setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+	[	% layer color sequence - darkest to lightest
+		[0 0 0]
+		[.2 .8 .8]
+		[.4 .8 .8]
+		[.6 .8 .8]
+		[.8 .8 .8]
+	]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+	layercolorseq curlayer 1 sub layerlen mod get
+	aload pop sethsbcolor
+	/nodecolor {nopcolor} def
+	/edgecolor {nopcolor} def
+	/graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+	/myupper exch def
+	/mylower exch def
+	curlayer mylower lt
+	curlayer myupper gt
+	or
+	{invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+    userdict (<<) cvn ([) cvn load put
+    userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 175 748
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 139 712 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% GCAAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+83 685 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+57 681.4 moveto 52 (GCAAC) alignedtext
+grestore
+% CAACT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+83 611 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+58.5 607.4 moveto 49 (CAACT) alignedtext
+grestore
+% GCAAC->CAACT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 83 666.33 moveto
+83 658.26 83 648.65 83 639.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 86.5 639.67 moveto
+83 629.67 lineto
+79.5 639.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 86.5 639.67 moveto
+83 629.67 lineto
+79.5 639.67 lineto
+closepath stroke
+grestore
+% AACTT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+46 537 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+21.5 533.4 moveto 49 (AACTT) alignedtext
+grestore
+% CAACT->AACTT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 73.85 592.71 moveto
+69.58 584.17 64.41 573.83 59.69 564.38 curveto
+stroke
+0 0 0 edgecolor
+newpath 62.77 562.72 moveto
+55.17 555.34 lineto
+56.51 565.85 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 62.77 562.72 moveto
+55.17 555.34 lineto
+56.51 565.85 lineto
+closepath stroke
+grestore
+% CATCA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 167 44.05 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 163.4 moveto 47 (CATCA) alignedtext
+grestore
+% ATCAA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 93 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23 89.4 moveto 48 (ATCAA) alignedtext
+grestore
+% CATCA->ATCAA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 47 148.33 moveto
+47 140.26 47 130.65 47 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 50.5 121.67 moveto
+47 111.67 lineto
+43.5 121.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 50.5 121.67 moveto
+47 111.67 lineto
+43.5 121.67 lineto
+closepath stroke
+grestore
+% TCAAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+83 19 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+58.5 15.4 moveto 49 (TCAAC) alignedtext
+grestore
+% ATCAA->TCAAC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 55.9 74.71 moveto
+60.05 66.17 65.08 55.83 69.68 46.38 curveto
+stroke
+0 0 0 edgecolor
+newpath 72.85 47.86 moveto
+74.08 37.34 lineto
+66.56 44.8 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 72.85 47.86 moveto
+74.08 37.34 lineto
+66.56 44.8 lineto
+closepath stroke
+grestore
+% CTTCA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 389 44.05 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 385.4 moveto 47 (CTTCA) alignedtext
+grestore
+% TTCAT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 315 43.84 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+24 311.4 moveto 46 (TTCAT) alignedtext
+grestore
+% CTTCA->TTCAT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 47 370.33 moveto
+47 362.26 47 352.65 47 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 50.5 343.67 moveto
+47 333.67 lineto
+43.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 50.5 343.67 moveto
+47 333.67 lineto
+43.5 343.67 lineto
+closepath stroke
+grestore
+% TCATC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 241 43.84 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+24 237.4 moveto 46 (TCATC) alignedtext
+grestore
+% TTCAT->TCATC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 47 296.33 moveto
+47 288.26 47 278.65 47 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 50.5 269.67 moveto
+47 259.67 lineto
+43.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 50.5 269.67 moveto
+47 259.67 lineto
+43.5 269.67 lineto
+closepath stroke
+grestore
+% ACTTC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 463 44.05 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 459.4 moveto 47 (ACTTC) alignedtext
+grestore
+% AACTT->ACTTC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 46.25 518.33 moveto
+46.36 510.26 46.49 500.65 46.61 491.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 50.11 491.71 moveto
+46.75 481.67 lineto
+43.11 491.62 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 50.11 491.71 moveto
+46.75 481.67 lineto
+43.11 491.62 lineto
+closepath stroke
+grestore
+% ACTTC->CTTCA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 47 444.33 moveto
+47 436.26 47 426.65 47 417.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 50.5 417.67 moveto
+47 407.67 lineto
+43.5 417.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 50.5 417.67 moveto
+47 407.67 lineto
+43.5 417.67 lineto
+closepath stroke
+grestore
+% TCAAC->CAACT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 90.67 37.32 moveto
+101.5 64.93 120 119.06 120 167 curveto
+120 463 120 463 120 463 curveto
+120 505.88 105.2 553.72 94.32 583.1 curveto
+stroke
+0 0 0 edgecolor
+newpath 90.96 582.09 moveto
+90.67 592.68 lineto
+97.5 584.58 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 90.96 582.09 moveto
+90.67 592.68 lineto
+97.5 584.58 lineto
+closepath stroke
+grestore
+% TCATC->CATCA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 47 222.33 moveto
+47 214.26 47 204.65 47 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 50.5 195.67 moveto
+47 185.67 lineto
+43.5 195.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 50.5 195.67 moveto
+47 185.67 lineto
+43.5 195.67 lineto
+closepath stroke
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 175 748
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/graph/LongPath b/genomix/genomix-pregelix/graph/LongPath
new file mode 100644
index 0000000..82c0298
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/LongPath
@@ -0,0 +1,13 @@
+CTCAG	C|T	1
+AGTAC	C|G	1
+GGCCT	|C	1
+ACGCC	T|C	1
+CCTCA	G|G	1
+CCCGG	G|	1
+GCCTC	G|A	1
+CAGTA	T|C	1
+GTACG	A|C	1
+GCCCG	C|G	1
+CGCCC	A|G	1
+TCAGT	C|A	1
+TACGC	G|C	1
diff --git a/genomix/genomix-pregelix/graph/Path b/genomix/genomix-pregelix/graph/Path
new file mode 100644
index 0000000..67d55ca
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/Path
@@ -0,0 +1,8 @@
+CTCAG	C|T	1
+AGTAC	C|G	1
+GGCCT	|C	1
+CCTCA	G|G	1
+GCCTC	G|A	1
+CAGTA	T|C	1
+GTACG	A|	1
+TCAGT	C|A	1
diff --git a/genomix/genomix-pregelix/graph/SimplePath b/genomix/genomix-pregelix/graph/SimplePath
new file mode 100644
index 0000000..2e0667e
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/SimplePath
@@ -0,0 +1,18 @@
+AGCAC	C|	1
+AAGAC	|A	1
+CAGCA	A|C	1
+TCGCA	A|T	1
+CGGCA	G|A	1
+TATCG	A|C	1
+CAAGA	G|A	1
+ACAGC	G|A	1
+ATCGC	T|A	1
+GCGGC	|A	1
+GCATC	C|	1
+ATATC	|G	1
+GCAAG	G|A	1
+GACAG	A|C	1
+CGCAT	T|C	1
+GGCAA	C|G	1
+AAGAA	C|	1
+AGACA	A|G	1
diff --git a/genomix/genomix-pregelix/graph/SimplePath_out.ps b/genomix/genomix-pregelix/graph/SimplePath_out.ps
new file mode 100644
index 0000000..3b3bf39
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/SimplePath_out.ps
@@ -0,0 +1,659 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+        dup dup findfont dup length dict begin
+        { 1 index /FID ne { def }{ pop pop } ifelse
+        } forall
+        /Encoding EncodingVector def
+        currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+       dup 1 exch div /InvScaleFactor exch def
+       scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color 
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage {	% i j npages
+	/npages exch def
+	/j exch def
+	/i exch def
+	/str 10 string def
+	npages 1 gt {
+		gsave
+			coordfont setfont
+			0 0 moveto
+			(\() show i str cvs show (,) show j str cvs show (\)) show
+		grestore
+	} if
+} bind def
+
+/set_font {
+	findfont exch
+	scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext {			% width text
+	/text exch def
+	/width exch def
+	gsave
+		width 0 gt {
+			[] 0 setdash
+			text stringwidth pop width exch sub text length div 0 text ashow
+		} if
+	grestore
+} def
+
+/boxprim {				% xcorner ycorner xsize ysize
+		4 2 roll
+		moveto
+		2 copy
+		exch 0 rlineto
+		0 exch rlineto
+		pop neg 0 rlineto
+		closepath
+} bind def
+
+/ellipse_path {
+	/ry exch def
+	/rx exch def
+	/y exch def
+	/x exch def
+	matrix currentmatrix
+	newpath
+	x y translate
+	rx ry scale
+	0 0 1 0 360 arc
+	setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+	[	% layer color sequence - darkest to lightest
+		[0 0 0]
+		[.2 .8 .8]
+		[.4 .8 .8]
+		[.6 .8 .8]
+		[.8 .8 .8]
+	]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+	layercolorseq curlayer 1 sub layerlen mod get
+	aload pop sethsbcolor
+	/nodecolor {nopcolor} def
+	/edgecolor {nopcolor} def
+	/graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+	/myupper exch def
+	/mylower exch def
+	curlayer mylower lt
+	curlayer myupper gt
+	or
+	{invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+    userdict (<<) cvn ([) cvn load put
+    userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 366 452
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 330 416 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% AAGAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 389 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22 385.4 moveto 52 (AAGAC) alignedtext
+grestore
+% AGACA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 315 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 311.4 moveto 51 (AGACA) alignedtext
+grestore
+% AAGAC->AGACA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 370.33 moveto
+48 362.26 48 352.65 48 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 343.67 moveto
+48 333.67 lineto
+44.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 343.67 moveto
+48 333.67 lineto
+44.5 343.67 lineto
+closepath stroke
+grestore
+% GACAG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 241 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22 237.4 moveto 52 (GACAG) alignedtext
+grestore
+% AGACA->GACAG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 296.33 moveto
+48 288.26 48 278.65 48 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 269.67 moveto
+48 259.67 lineto
+44.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 269.67 moveto
+48 259.67 lineto
+44.5 269.67 lineto
+closepath stroke
+grestore
+% CAGCA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 93 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 89.4 moveto 51 (CAGCA) alignedtext
+grestore
+% AGCAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 19 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 15.4 moveto 51 (AGCAC) alignedtext
+grestore
+% CAGCA->AGCAC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 74.33 moveto
+48 66.26 48 56.65 48 47.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 47.67 moveto
+48 37.67 lineto
+44.5 47.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 47.67 moveto
+48 37.67 lineto
+44.5 47.67 lineto
+closepath stroke
+grestore
+% TCGCA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+160 167 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+135.5 163.4 moveto 49 (TCGCA) alignedtext
+grestore
+% CGCAT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+160 93 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+135.5 89.4 moveto 49 (CGCAT) alignedtext
+grestore
+% TCGCA->CGCAT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 160 148.33 moveto
+160 140.26 160 130.65 160 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 163.5 121.67 moveto
+160 111.67 lineto
+156.5 121.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 163.5 121.67 moveto
+160 111.67 lineto
+156.5 121.67 lineto
+closepath stroke
+grestore
+% GCATC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+160 19 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+135.5 15.4 moveto 49 (GCATC) alignedtext
+grestore
+% CGCAT->GCATC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 160 74.33 moveto
+160 66.26 160 56.65 160 47.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 163.5 47.67 moveto
+160 37.67 lineto
+156.5 47.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 163.5 47.67 moveto
+160 37.67 lineto
+156.5 47.67 lineto
+closepath stroke
+grestore
+% CGGCA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+273 315 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+247 311.4 moveto 52 (CGGCA) alignedtext
+grestore
+% GGCAA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+273 241 48.79 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+246.5 237.4 moveto 53 (GGCAA) alignedtext
+grestore
+% CGGCA->GGCAA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 273 296.33 moveto
+273 288.26 273 278.65 273 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 276.5 269.67 moveto
+273 259.67 lineto
+269.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 276.5 269.67 moveto
+273 259.67 lineto
+269.5 269.67 lineto
+closepath stroke
+grestore
+% GCAAG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+273 167 48.79 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+246.5 163.4 moveto 53 (GCAAG) alignedtext
+grestore
+% GGCAA->GCAAG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 273 222.33 moveto
+273 214.26 273 204.65 273 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 276.5 195.67 moveto
+273 185.67 lineto
+269.5 195.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 276.5 195.67 moveto
+273 185.67 lineto
+269.5 195.67 lineto
+closepath stroke
+grestore
+% TATCG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+160 315 44.05 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+136.5 311.4 moveto 47 (TATCG) alignedtext
+grestore
+% ATCGC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+160 241 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+135.5 237.4 moveto 49 (ATCGC) alignedtext
+grestore
+% TATCG->ATCGC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 160 296.33 moveto
+160 288.26 160 278.65 160 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 163.5 269.67 moveto
+160 259.67 lineto
+156.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 163.5 269.67 moveto
+160 259.67 lineto
+156.5 269.67 lineto
+closepath stroke
+grestore
+% ATCGC->TCGCA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 160 222.33 moveto
+160 214.26 160 204.65 160 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 163.5 195.67 moveto
+160 185.67 lineto
+156.5 195.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 163.5 195.67 moveto
+160 185.67 lineto
+156.5 195.67 lineto
+closepath stroke
+grestore
+% CAAGA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+273 93 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+247 89.4 moveto 52 (CAAGA) alignedtext
+grestore
+% AAGAA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+273 19 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+247 15.4 moveto 52 (AAGAA) alignedtext
+grestore
+% CAAGA->AAGAA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 273 74.33 moveto
+273 66.26 273 56.65 273 47.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 276.5 47.67 moveto
+273 37.67 lineto
+269.5 47.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 276.5 47.67 moveto
+273 37.67 lineto
+269.5 47.67 lineto
+closepath stroke
+grestore
+% ACAGC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 167 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 163.4 moveto 51 (ACAGC) alignedtext
+grestore
+% ACAGC->CAGCA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 148.33 moveto
+48 140.26 48 130.65 48 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 121.67 moveto
+48 111.67 lineto
+44.5 121.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 121.67 moveto
+48 111.67 lineto
+44.5 121.67 lineto
+closepath stroke
+grestore
+% GCGGC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+273 389 49 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+246 385.4 moveto 54 (GCGGC) alignedtext
+grestore
+% GCGGC->CGGCA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 273 370.33 moveto
+273 362.26 273 352.65 273 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 276.5 343.67 moveto
+273 333.67 lineto
+269.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 276.5 343.67 moveto
+273 333.67 lineto
+269.5 343.67 lineto
+closepath stroke
+grestore
+% ATATC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+160 389 43.13 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+137.5 385.4 moveto 45 (ATATC) alignedtext
+grestore
+% ATATC->TATCG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 160 370.33 moveto
+160 362.26 160 352.65 160 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 163.5 343.67 moveto
+160 333.67 lineto
+156.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 163.5 343.67 moveto
+160 333.67 lineto
+156.5 343.67 lineto
+closepath stroke
+grestore
+% GCAAG->CAAGA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 273 148.33 moveto
+273 140.26 273 130.65 273 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 276.5 121.67 moveto
+273 111.67 lineto
+269.5 121.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 276.5 121.67 moveto
+273 111.67 lineto
+269.5 121.67 lineto
+closepath stroke
+grestore
+% GACAG->ACAGC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 222.33 moveto
+48 214.26 48 204.65 48 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 195.67 moveto
+48 185.67 lineto
+44.5 195.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 195.67 moveto
+48 185.67 lineto
+44.5 195.67 lineto
+closepath stroke
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 366 452
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/graph/SinglePath b/genomix/genomix-pregelix/graph/SinglePath
new file mode 100644
index 0000000..02f42ba
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/SinglePath
@@ -0,0 +1,6 @@
+ACAAC	G|A	1
+CAACA	A|G	1
+ACAGT	A|	1
+AACAG	C|T	1
+GACAA	A|C	1
+AGACA	|A	1
diff --git a/genomix/genomix-pregelix/graph/SinglePath_out.ps b/genomix/genomix-pregelix/graph/SinglePath_out.ps
new file mode 100644
index 0000000..8371636
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/SinglePath_out.ps
@@ -0,0 +1,351 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+        dup dup findfont dup length dict begin
+        { 1 index /FID ne { def }{ pop pop } ifelse
+        } forall
+        /Encoding EncodingVector def
+        currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+       dup 1 exch div /InvScaleFactor exch def
+       scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color 
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage {	% i j npages
+	/npages exch def
+	/j exch def
+	/i exch def
+	/str 10 string def
+	npages 1 gt {
+		gsave
+			coordfont setfont
+			0 0 moveto
+			(\() show i str cvs show (,) show j str cvs show (\)) show
+		grestore
+	} if
+} bind def
+
+/set_font {
+	findfont exch
+	scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext {			% width text
+	/text exch def
+	/width exch def
+	gsave
+		width 0 gt {
+			[] 0 setdash
+			text stringwidth pop width exch sub text length div 0 text ashow
+		} if
+	grestore
+} def
+
+/boxprim {				% xcorner ycorner xsize ysize
+		4 2 roll
+		moveto
+		2 copy
+		exch 0 rlineto
+		0 exch rlineto
+		pop neg 0 rlineto
+		closepath
+} bind def
+
+/ellipse_path {
+	/ry exch def
+	/rx exch def
+	/y exch def
+	/x exch def
+	matrix currentmatrix
+	newpath
+	x y translate
+	rx ry scale
+	0 0 1 0 360 arc
+	setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+	[	% layer color sequence - darkest to lightest
+		[0 0 0]
+		[.2 .8 .8]
+		[.4 .8 .8]
+		[.6 .8 .8]
+		[.8 .8 .8]
+	]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+	layercolorseq curlayer 1 sub layerlen mod get
+	aload pop sethsbcolor
+	/nodecolor {nopcolor} def
+	/edgecolor {nopcolor} def
+	/graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+	/myupper exch def
+	/mylower exch def
+	curlayer mylower lt
+	curlayer myupper gt
+	or
+	{invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+    userdict (<<) cvn ([) cvn load put
+    userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 140 452
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 104 416 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% ACAAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 241 46.17 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23 237.4 moveto 50 (ACAAC) alignedtext
+grestore
+% CAACA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 167 46.17 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23 163.4 moveto 50 (CAACA) alignedtext
+grestore
+% ACAAC->CAACA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 222.33 moveto
+48 214.26 48 204.65 48 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 195.67 moveto
+48 185.67 lineto
+44.5 195.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 195.67 moveto
+48 185.67 lineto
+44.5 195.67 lineto
+closepath stroke
+grestore
+% AACAG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 93 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22 89.4 moveto 52 (AACAG) alignedtext
+grestore
+% CAACA->AACAG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 148.33 moveto
+48 140.26 48 130.65 48 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 121.67 moveto
+48 111.67 lineto
+44.5 121.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 121.67 moveto
+48 111.67 lineto
+44.5 121.67 lineto
+closepath stroke
+grestore
+% ACAGT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 19 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 15.4 moveto 49 (ACAGT) alignedtext
+grestore
+% AACAG->ACAGT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 74.33 moveto
+48 66.26 48 56.65 48 47.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 47.67 moveto
+48 37.67 lineto
+44.5 47.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 47.67 moveto
+48 37.67 lineto
+44.5 47.67 lineto
+closepath stroke
+grestore
+% GACAA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 315 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22 311.4 moveto 52 (GACAA) alignedtext
+grestore
+% GACAA->ACAAC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 296.33 moveto
+48 288.26 48 278.65 48 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 269.67 moveto
+48 259.67 lineto
+44.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 269.67 moveto
+48 259.67 lineto
+44.5 269.67 lineto
+closepath stroke
+grestore
+% AGACA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 389 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 385.4 moveto 51 (AGACA) alignedtext
+grestore
+% AGACA->GACAA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 370.33 moveto
+48 362.26 48 352.65 48 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 343.67 moveto
+48 333.67 lineto
+44.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 343.67 moveto
+48 333.67 lineto
+44.5 343.67 lineto
+closepath stroke
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 140 452
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/graph/ThreeKmer b/genomix/genomix-pregelix/graph/ThreeKmer
new file mode 100644
index 0000000..7ce9890
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/ThreeKmer
@@ -0,0 +1,3 @@
+CTCGG	A|T	1
+ACTCG	|G	1
+TCGGT	C|	1
diff --git a/genomix/genomix-pregelix/graph/TreePath b/genomix/genomix-pregelix/graph/TreePath
new file mode 100644
index 0000000..0a3d5c6
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/TreePath
@@ -0,0 +1,29 @@
+CTAAA	A|C	1
+GTAAC	A|T	1
+CTCAG	C|T	2
+GCTAT	G|C	1
+AGTAC	C|G	1
+GGCCT	|CG	3
+ATCCC	T|	1
+ACGCC	T|C	1
+CCTCA	G|G	2
+CCCGG	G|	1
+CCTGG	G|C	1
+GCCTC	G|A	2
+CAGTA	T|AC	2
+TAAAC	C|	1
+ACTAA	A|A	1
+AGTAA	C|C	1
+TAACT	G|A	1
+GTACG	A|C	1
+GCCCG	C|G	1
+CGCCC	A|G	1
+TGGCT	C|A	1
+TATCC	C|C	1
+TCAGT	C|A	2
+TACGC	G|C	1
+CTGGC	C|T	1
+CTATC	G|C	1
+AACTA	T|A	1
+GCCTG	G|G	1
+GGCTA	T|T	1
diff --git a/genomix/genomix-pregelix/graph/TreePath_out.ps b/genomix/genomix-pregelix/graph/TreePath_out.ps
new file mode 100644
index 0000000..a1e7da1
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/TreePath_out.ps
@@ -0,0 +1,1018 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+        dup dup findfont dup length dict begin
+        { 1 index /FID ne { def }{ pop pop } ifelse
+        } forall
+        /Encoding EncodingVector def
+        currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+       dup 1 exch div /InvScaleFactor exch def
+       scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color 
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage {	% i j npages
+	/npages exch def
+	/j exch def
+	/i exch def
+	/str 10 string def
+	npages 1 gt {
+		gsave
+			coordfont setfont
+			0 0 moveto
+			(\() show i str cvs show (,) show j str cvs show (\)) show
+		grestore
+	} if
+} bind def
+
+/set_font {
+	findfont exch
+	scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext {			% width text
+	/text exch def
+	/width exch def
+	gsave
+		width 0 gt {
+			[] 0 setdash
+			text stringwidth pop width exch sub text length div 0 text ashow
+		} if
+	grestore
+} def
+
+/boxprim {				% xcorner ycorner xsize ysize
+		4 2 roll
+		moveto
+		2 copy
+		exch 0 rlineto
+		0 exch rlineto
+		pop neg 0 rlineto
+		closepath
+} bind def
+
+/ellipse_path {
+	/ry exch def
+	/rx exch def
+	/y exch def
+	/x exch def
+	matrix currentmatrix
+	newpath
+	x y translate
+	rx ry scale
+	0 0 1 0 360 arc
+	setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+	[	% layer color sequence - darkest to lightest
+		[0 0 0]
+		[.2 .8 .8]
+		[.4 .8 .8]
+		[.6 .8 .8]
+		[.8 .8 .8]
+	]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+	layercolorseq curlayer 1 sub layerlen mod get
+	aload pop sethsbcolor
+	/nodecolor {nopcolor} def
+	/edgecolor {nopcolor} def
+	/graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+	/myupper exch def
+	/mylower exch def
+	curlayer mylower lt
+	curlayer myupper gt
+	or
+	{invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+    userdict (<<) cvn ([) cvn load put
+    userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 359 970
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 323 934 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% CTAAA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+160 93 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+135.5 89.4 moveto 49 (CTAAA) alignedtext
+grestore
+% TAAAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+160 19 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+135.5 15.4 moveto 49 (TAAAC) alignedtext
+grestore
+% CTAAA->TAAAC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 160 74.33 moveto
+160 66.26 160 56.65 160 47.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 163.5 47.67 moveto
+160 37.67 lineto
+156.5 47.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 163.5 47.67 moveto
+160 37.67 lineto
+156.5 47.67 lineto
+closepath stroke
+grestore
+% GTAAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+158 389 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+133.5 385.4 moveto 49 (GTAAC) alignedtext
+grestore
+% TAACT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+158 315 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+134 311.4 moveto 48 (TAACT) alignedtext
+grestore
+% GTAAC->TAACT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 158 370.33 moveto
+158 362.26 158 352.65 158 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 161.5 343.67 moveto
+158 333.67 lineto
+154.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 161.5 343.67 moveto
+158 333.67 lineto
+154.5 343.67 lineto
+closepath stroke
+grestore
+% AACTA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+159 241 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+134.5 237.4 moveto 49 (AACTA) alignedtext
+grestore
+% TAACT->AACTA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 158.25 296.33 moveto
+158.36 288.26 158.49 278.65 158.61 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 162.11 269.71 moveto
+158.75 259.67 lineto
+155.11 269.62 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 162.11 269.71 moveto
+158.75 259.67 lineto
+155.11 269.62 lineto
+closepath stroke
+grestore
+% CTCAG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+155 685 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+130.5 681.4 moveto 49 (CTCAG) alignedtext
+grestore
+% TCAGT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+155 611 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+131 607.4 moveto 48 (TCAGT) alignedtext
+grestore
+% CTCAG->TCAGT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 155 666.33 moveto
+155 658.26 155 648.65 155 639.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 158.5 639.67 moveto
+155 629.67 lineto
+151.5 639.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 158.5 639.67 moveto
+155 629.67 lineto
+151.5 639.67 lineto
+closepath stroke
+grestore
+% CAGTA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+155 537 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+130.5 533.4 moveto 49 (CAGTA) alignedtext
+grestore
+% TCAGT->CAGTA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 155 592.33 moveto
+155 584.26 155 574.65 155 565.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 158.5 565.67 moveto
+155 555.67 lineto
+151.5 565.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 158.5 565.67 moveto
+155 555.67 lineto
+151.5 565.67 lineto
+closepath stroke
+grestore
+% GCTAT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+267 463 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+243 459.4 moveto 48 (GCTAT) alignedtext
+grestore
+% CTATC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+267 389 43.84 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+244 385.4 moveto 46 (CTATC) alignedtext
+grestore
+% GCTAT->CTATC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 267 444.33 moveto
+267 436.26 267 426.65 267 417.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 270.5 417.67 moveto
+267 407.67 lineto
+263.5 417.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 270.5 417.67 moveto
+267 407.67 lineto
+263.5 417.67 lineto
+closepath stroke
+grestore
+% TATCC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+267 315 43.84 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+244 311.4 moveto 46 (TATCC) alignedtext
+grestore
+% CTATC->TATCC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 267 370.33 moveto
+267 362.26 267 352.65 267 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 270.5 343.67 moveto
+267 333.67 lineto
+263.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 270.5 343.67 moveto
+267 333.67 lineto
+263.5 343.67 lineto
+closepath stroke
+grestore
+% AGTAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 463 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 459.4 moveto 49 (AGTAC) alignedtext
+grestore
+% GTACG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 389 46.17 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23 385.4 moveto 50 (GTACG) alignedtext
+grestore
+% AGTAC->GTACG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 444.33 moveto
+48 436.26 48 426.65 48 417.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 417.67 moveto
+48 407.67 lineto
+44.5 417.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 417.67 moveto
+48 407.67 lineto
+44.5 417.67 lineto
+closepath stroke
+grestore
+% TACGC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 315 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 311.4 moveto 49 (TACGC) alignedtext
+grestore
+% GTACG->TACGC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 370.33 moveto
+48 362.26 48 352.65 48 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 343.67 moveto
+48 333.67 lineto
+44.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 343.67 moveto
+48 333.67 lineto
+44.5 343.67 lineto
+closepath stroke
+grestore
+% GGCCT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+211 907 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+185 903.4 moveto 52 (GGCCT) alignedtext
+grestore
+% GCCTC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+155 833 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+130.5 829.4 moveto 49 (GCCTC) alignedtext
+grestore
+% GGCCT->GCCTC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 197.44 889.09 moveto
+190.57 880 182.09 868.79 174.52 858.79 curveto
+stroke
+0 0 0 edgecolor
+newpath 177.26 856.61 moveto
+168.43 850.75 lineto
+171.67 860.83 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 177.26 856.61 moveto
+168.43 850.75 lineto
+171.67 860.83 lineto
+closepath stroke
+grestore
+% GCCTG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+267 833 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+241 829.4 moveto 52 (GCCTG) alignedtext
+grestore
+% GGCCT->GCCTG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 224.56 889.09 moveto
+231.38 880.06 239.79 868.96 247.32 859.01 curveto
+stroke
+0 0 0 edgecolor
+newpath 250.13 861.09 moveto
+253.37 851 lineto
+244.55 856.87 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 250.13 861.09 moveto
+253.37 851 lineto
+244.55 856.87 lineto
+closepath stroke
+grestore
+% CCTCA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+155 759 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+131 755.4 moveto 48 (CCTCA) alignedtext
+grestore
+% GCCTC->CCTCA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 155 814.33 moveto
+155 806.26 155 796.65 155 787.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 158.5 787.67 moveto
+155 777.67 lineto
+151.5 787.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 158.5 787.67 moveto
+155 777.67 lineto
+151.5 787.67 lineto
+closepath stroke
+grestore
+% CCTGG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+267 759 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+241 755.4 moveto 52 (CCTGG) alignedtext
+grestore
+% GCCTG->CCTGG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 267 814.33 moveto
+267 806.26 267 796.65 267 787.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 270.5 787.67 moveto
+267 777.67 lineto
+263.5 787.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 270.5 787.67 moveto
+267 777.67 lineto
+263.5 787.67 lineto
+closepath stroke
+grestore
+% ACGCC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 241 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 237.4 moveto 51 (ACGCC) alignedtext
+grestore
+% CGCCC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 167 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 163.4 moveto 51 (CGCCC) alignedtext
+grestore
+% ACGCC->CGCCC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 222.33 moveto
+48 214.26 48 204.65 48 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 195.67 moveto
+48 185.67 lineto
+44.5 195.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 195.67 moveto
+48 185.67 lineto
+44.5 195.67 lineto
+closepath stroke
+grestore
+% GCCCG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 93 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22 89.4 moveto 52 (GCCCG) alignedtext
+grestore
+% CGCCC->GCCCG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 148.33 moveto
+48 140.26 48 130.65 48 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 121.67 moveto
+48 111.67 lineto
+44.5 121.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 121.67 moveto
+48 111.67 lineto
+44.5 121.67 lineto
+closepath stroke
+grestore
+% CCTCA->CTCAG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 155 740.33 moveto
+155 732.26 155 722.65 155 713.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 158.5 713.67 moveto
+155 703.67 lineto
+151.5 713.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 158.5 713.67 moveto
+155 703.67 lineto
+151.5 713.67 lineto
+closepath stroke
+grestore
+% CTGGC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+267 685 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+241 681.4 moveto 52 (CTGGC) alignedtext
+grestore
+% CCTGG->CTGGC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 267 740.33 moveto
+267 732.26 267 722.65 267 713.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 270.5 713.67 moveto
+267 703.67 lineto
+263.5 713.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 270.5 713.67 moveto
+267 703.67 lineto
+263.5 713.67 lineto
+closepath stroke
+grestore
+% TGGCT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+267 611 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+241.5 607.4 moveto 51 (TGGCT) alignedtext
+grestore
+% CTGGC->TGGCT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 267 666.33 moveto
+267 658.26 267 648.65 267 639.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 270.5 639.67 moveto
+267 629.67 lineto
+263.5 639.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 270.5 639.67 moveto
+267 629.67 lineto
+263.5 639.67 lineto
+closepath stroke
+grestore
+% CAGTA->AGTAC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 131.78 520.94 moveto
+116.5 510.37 96.3 496.4 79.56 484.83 curveto
+stroke
+0 0 0 edgecolor
+newpath 81.39 481.84 moveto
+71.17 479.03 lineto
+77.41 487.59 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 81.39 481.84 moveto
+71.17 479.03 lineto
+77.41 487.59 lineto
+closepath stroke
+grestore
+% AGTAA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+158 463 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+133.5 459.4 moveto 49 (AGTAA) alignedtext
+grestore
+% CAGTA->AGTAA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 155.76 518.33 moveto
+156.08 510.26 156.47 500.65 156.84 491.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 160.34 491.8 moveto
+157.24 481.67 lineto
+153.34 491.52 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 160.34 491.8 moveto
+157.24 481.67 lineto
+153.34 491.52 lineto
+closepath stroke
+grestore
+% AGTAA->GTAAC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 158 444.33 moveto
+158 436.26 158 426.65 158 417.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 161.5 417.67 moveto
+158 407.67 lineto
+154.5 417.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 161.5 417.67 moveto
+158 407.67 lineto
+154.5 417.67 lineto
+closepath stroke
+grestore
+% ACTAA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+159 167 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+134.5 163.4 moveto 49 (ACTAA) alignedtext
+grestore
+% ACTAA->CTAAA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 159.25 148.33 moveto
+159.36 140.26 159.49 130.65 159.61 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 163.11 121.71 moveto
+159.75 111.67 lineto
+156.11 121.62 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 163.11 121.71 moveto
+159.75 111.67 lineto
+156.11 121.62 lineto
+closepath stroke
+grestore
+% AACTA->ACTAA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 159 222.33 moveto
+159 214.26 159 204.65 159 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 162.5 195.67 moveto
+159 185.67 lineto
+155.5 195.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 162.5 195.67 moveto
+159 185.67 lineto
+155.5 195.67 lineto
+closepath stroke
+grestore
+% TACGC->ACGCC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 296.33 moveto
+48 288.26 48 278.65 48 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 269.67 moveto
+48 259.67 lineto
+44.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 269.67 moveto
+48 259.67 lineto
+44.5 269.67 lineto
+closepath stroke
+grestore
+% CCCGG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 19 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22 15.4 moveto 52 (CCCGG) alignedtext
+grestore
+% GCCCG->CCCGG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 74.33 moveto
+48 66.26 48 56.65 48 47.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 47.67 moveto
+48 37.67 lineto
+44.5 47.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 47.67 moveto
+48 37.67 lineto
+44.5 47.67 lineto
+closepath stroke
+grestore
+% GGCTA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+267 537 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+241.5 533.4 moveto 51 (GGCTA) alignedtext
+grestore
+% TGGCT->GGCTA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 267 592.33 moveto
+267 584.26 267 574.65 267 565.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 270.5 565.67 moveto
+267 555.67 lineto
+263.5 565.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 270.5 565.67 moveto
+267 555.67 lineto
+263.5 565.67 lineto
+closepath stroke
+grestore
+% GGCTA->GCTAT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 267 518.33 moveto
+267 510.26 267 500.65 267 491.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 270.5 491.67 moveto
+267 481.67 lineto
+263.5 491.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 270.5 491.67 moveto
+267 481.67 lineto
+263.5 491.67 lineto
+closepath stroke
+grestore
+% ATCCC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+267 241 44.05 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+243.5 237.4 moveto 47 (ATCCC) alignedtext
+grestore
+% TATCC->ATCCC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 267 296.33 moveto
+267 288.26 267 278.65 267 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 270.5 269.67 moveto
+267 259.67 lineto
+263.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 270.5 269.67 moveto
+267 259.67 lineto
+263.5 269.67 lineto
+closepath stroke
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 359 970
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/graph/mergeTest/BridgePath b/genomix/genomix-pregelix/graph/mergeTest/BridgePath
new file mode 100644
index 0000000..0717611
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/mergeTest/BridgePath
@@ -0,0 +1,2 @@
+TTTCCACTCCGTG

+TTTCCACCCCGTG
\ No newline at end of file
diff --git a/genomix/genomix-pregelix/graph/mergeTest/CyclePath b/genomix/genomix-pregelix/graph/mergeTest/CyclePath
new file mode 100644
index 0000000..04080f4
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/mergeTest/CyclePath
@@ -0,0 +1 @@
+GCAACTTCATCAACT
\ No newline at end of file
diff --git a/genomix/genomix-pregelix/graph/mergeTest/LongPath b/genomix/genomix-pregelix/graph/mergeTest/LongPath
new file mode 100644
index 0000000..acd3c1a
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/mergeTest/LongPath
@@ -0,0 +1 @@
+GGCCTCAGTACGCCCGG
diff --git a/genomix/genomix-pregelix/graph/mergeTest/Path b/genomix/genomix-pregelix/graph/mergeTest/Path
new file mode 100644
index 0000000..f63bbcf
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/mergeTest/Path
@@ -0,0 +1 @@
+GGCCTCAGTACG
diff --git a/genomix/genomix-pregelix/graph/mergeTest/SimplePath b/genomix/genomix-pregelix/graph/mergeTest/SimplePath
new file mode 100644
index 0000000..80c03af
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/mergeTest/SimplePath
@@ -0,0 +1,3 @@
+ATATCGCATC

+AAGACAGCAC

+GCGGCAAGAA

diff --git a/genomix/genomix-pregelix/graph/mergeTest/SinglePath b/genomix/genomix-pregelix/graph/mergeTest/SinglePath
new file mode 100644
index 0000000..56ef5f8
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/mergeTest/SinglePath
@@ -0,0 +1 @@
+AGACAACAGT

diff --git a/genomix/genomix-pregelix/graph/mergeTest/ThreeKmer b/genomix/genomix-pregelix/graph/mergeTest/ThreeKmer
new file mode 100644
index 0000000..ec004fa
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/mergeTest/ThreeKmer
@@ -0,0 +1 @@
+ACTCGGT
diff --git a/genomix/genomix-pregelix/graph/mergeTest/TreePath b/genomix/genomix-pregelix/graph/mergeTest/TreePath
new file mode 100644
index 0000000..f3c13ce
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/mergeTest/TreePath
@@ -0,0 +1,3 @@
+GGCCTGGCTATCCC

+GGCCTCAGTAACTAAAC

+GGCCTCAGTACGCCCGG
\ No newline at end of file
diff --git a/genomix/genomix-pregelix/graph/mergeTest/TwoKmer b/genomix/genomix-pregelix/graph/mergeTest/TwoKmer
new file mode 100644
index 0000000..86790c6
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/mergeTest/TwoKmer
@@ -0,0 +1,2 @@
+ACACT	|G	1
+CACTG	A|	1
diff --git a/genomix/genomix-pregelix/pom.xml b/genomix/genomix-pregelix/pom.xml
new file mode 100644
index 0000000..c429105
--- /dev/null
+++ b/genomix/genomix-pregelix/pom.xml
@@ -0,0 +1,188 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>edu.uci.ics.pregelix</groupId>
+  <artifactId>genomix-pregelix</artifactId>
+  <packaging>jar</packaging>
+  <version>0.2.4-SNAPSHOT</version>
+  <name>genomix-pregelix</name>
+
+  <properties>
+    <jvm.extraargs/>
+  </properties>
+
+  <profiles>
+    <profile>
+      <id>macosx</id>
+      <activation>
+        <os>
+          <name>mac os x</name>
+        </os>
+        <jdk>1.7</jdk>
+      </activation>
+      <properties>
+        <jvm.extraargs>-Djava.nio.channels.spi.SelectorProvider=sun.nio.ch.KQueueSelectorProvider</jvm.extraargs>
+      </properties>
+    </profile>
+  </profiles>
+
+  <build>
+        <plugins>
+            <plugin>
+                                <groupId>org.apache.maven.plugins</groupId>
+                                <artifactId>maven-compiler-plugin</artifactId>
+                                <version>2.0.2</version>
+                                <configuration>
+                                        <source>1.7</source>
+                                        <target>1.7</target>
+                                        <fork>true</fork>
+                                </configuration>
+                        </plugin>
+                        <plugin>
+                                <artifactId>maven-assembly-plugin</artifactId>
+                                <configuration>
+                                        <descriptorRefs>
+                                                <descriptorRef>jar-with-dependencies</descriptorRef>
+                                        </descriptorRefs>
+                                </configuration>
+                                <executions>
+                                        <execution>
+                                                <id>make-my-jar-with-dependencies</id>
+                                                <phase>package</phase>
+                                                <goals>
+                                                        <goal>single</goal>
+                                                </goals>
+                                        </execution>
+                                </executions>
+                        </plugin>
+			<plugin>
+				<groupId>org.codehaus.mojo</groupId>
+				<artifactId>appassembler-maven-plugin</artifactId>
+				<version>1.3</version>
+				<executions>
+					<execution>
+						<configuration>
+							<programs>
+								<program>
+									<mainClass>edu.uci.ics.genomix.pregelix.example.Client</mainClass>
+									<name>pregelix</name>
+								</program>
+							</programs>
+							<repositoryLayout>flat</repositoryLayout>
+							<repositoryName>lib</repositoryName>
+						</configuration>
+						<phase>package</phase>
+						<goals>
+							<goal>assemble</goal>
+						</goals>
+					</execution>
+				</executions>
+			</plugin>
+                        <plugin>
+                                <groupId>org.apache.maven.plugins</groupId>
+                                <artifactId>maven-surefire-plugin</artifactId>
+                                <version>2.7.2</version>
+                                <configuration>
+                                        <forkMode>pertest</forkMode>
+                                        <argLine>-enableassertions -Xmx2047m -Dfile.encoding=UTF-8
+                                                -Djava.util.logging.config.file=src/test/resources/logging.properties</argLine>
+                                        <includes>
+                                                <include>**/*TestSuite.java</include>
+                                                <include>**/*Test.java</include>
+                                        </includes>
+                                </configuration>
+                        </plugin>
+                        <plugin>
+                                <artifactId>maven-clean-plugin</artifactId>
+                                <configuration>
+                                        <filesets>
+                                                <fileset>
+                                                        <directory>.</directory>
+                                                        <includes>
+                                                                <include>teststore*</include>
+                                                                <include>edu*</include>
+                                                                <include>actual*</include>
+                                                                <include>build*</include>
+                                                                <include>expect*</include>
+                                                                <include>ClusterController*</include>
+                                                                <include>edu.uci.*</include>
+                                                        </includes>
+                                                </fileset>
+                                        </filesets>
+                                </configuration>
+                        </plugin>
+                </plugins>
+        </build>
+
+        <dependencies>
+                <dependency>
+                        <groupId>junit</groupId>
+                        <artifactId>junit</artifactId>
+                        <version>4.8.1</version>
+                        <scope>test</scope>
+                </dependency>
+                <dependency>
+                	<groupId>edu.uci.ics.hyracks</groupId>
+                	<artifactId>pregelix-core</artifactId>
+                	<version>0.2.4-SNAPSHOT</version>
+                	<type>jar</type>
+                	<scope>compile</scope>
+                </dependency>
+                 <dependency>
+                	<groupId>edu.uci.ics.hyracks</groupId>
+                	<artifactId>genomix-data</artifactId>
+                	<version>0.2.4-SNAPSHOT</version>
+                	<type>jar</type>
+                	<scope>compile</scope>
+                </dependency>
+        </dependencies>
+
+  <scm>
+    <connection>scm:svn:https://hyracks.googlecode.com/svn/trunk/fullstack/pregelix</connection>
+    <developerConnection>scm:svn:https://hyracks.googlecode.com/svn/trunk/fullstack/pregelix</developerConnection>
+    <url>http://code.google.com/p/hyracks/source/browse/#svn/trunk/fullstack/pregelix</url>
+  </scm>
+
+  <distributionManagement>
+    <repository>
+      <id>hyracks-releases</id>
+      <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-releases/</url>
+    </repository>
+    <snapshotRepository>
+      <id>hyracks-snapshots</id>
+      <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-snapshots/</url>
+    </snapshotRepository>
+  </distributionManagement>
+
+  <reporting>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-changelog-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </reporting>
+
+  <repositories>
+    <repository>
+      <id>hyracks-public</id>
+      <url>http://obelix.ics.uci.edu/nexus/content/groups/hyracks-public/</url>
+    </repository>
+    <repository>
+      <id>jboss-public</id>
+      <url>https://repository.jboss.org/nexus/content/groups/public/</url>
+    </repository>
+  </repositories>
+
+  <pluginRepositories>
+    <pluginRepository>
+      <id>hyracks-public</id>
+      <url>http://obelix.ics.uci.edu/nexus/content/groups/hyracks-public/</url>
+      <releases>
+        <updatePolicy>always</updatePolicy>
+      </releases>
+    </pluginRepository>
+  </pluginRepositories>
+</project>
+
+
diff --git a/genomix/genomix-pregelix/src/main/assembly/binary-assembly.xml b/genomix/genomix-pregelix/src/main/assembly/binary-assembly.xml
new file mode 100755
index 0000000..0500499
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/assembly/binary-assembly.xml
@@ -0,0 +1,19 @@
+<assembly>
+  <id>binary-assembly</id>
+  <formats>
+    <format>zip</format>
+    <format>dir</format>
+  </formats>
+  <includeBaseDirectory>false</includeBaseDirectory>
+  <fileSets>
+    <fileSet>
+      <directory>target/appassembler/bin</directory>
+      <outputDirectory>bin</outputDirectory>
+      <fileMode>0755</fileMode>
+    </fileSet>
+    <fileSet>
+      <directory>target/appassembler/lib</directory>
+      <outputDirectory>lib</outputDirectory>
+    </fileSet>
+  </fileSets>
+</assembly>
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/GraphVertexOperation.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/GraphVertexOperation.java
new file mode 100644
index 0000000..6a5299b
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/GraphVertexOperation.java
@@ -0,0 +1,64 @@
+package edu.uci.ics.genomix.pregelix;
+
+import org.apache.hadoop.io.BytesWritable;
+
+import edu.uci.ics.genomix.type.Kmer;
+import edu.uci.ics.genomix.type.KmerUtil;
+
+public class GraphVertexOperation {
+	
+	/**
+	 *  generate the valid data(byte[]) from BytesWritable
+	 */
+	public static byte[] generateValidDataFromBytesWritable(BytesWritable bw){
+		byte[] wholeBytes = bw.getBytes();
+		int validNum = bw.getLength();
+		byte[] validBytes = new byte[validNum];
+		for(int i = 0; i < validNum; i++)
+			validBytes[i] = wholeBytes[i];
+		return validBytes;
+	}
+	/**
+	 * Single Vertex: in-degree = out-degree = 1
+	 * @param vertexValue 
+	 */
+	public static boolean isPathVertex(byte value){
+		if(KmerUtil.inDegree(value) == 1 && KmerUtil.outDegree(value) == 1)
+			return true;
+		return false;
+	}
+	/** 
+	 * Head Vertex:  out-degree > 0, 
+	 * @param vertexValue 
+	 */
+	public static boolean isHeadVertex(byte value){
+		if(KmerUtil.outDegree(value) > 0 && !isPathVertex(value))
+			return true;
+		return false;
+	}
+	/**
+	 * Rear Vertex:  in-degree > 0, 
+	 * @param vertexValue 
+	 */
+	public static boolean isRearVertex(byte value){
+		if(KmerUtil.inDegree(value) > 0 && !isPathVertex(value))
+			return true;
+		return false;
+	}
+	/**
+	 * update right neighber based on next vertexId
+	 */
+	public static byte updateRightNeighberByVertexId(byte oldVertexValue, byte[] neighberVertexId, int k){
+		
+		String neighberVertex = Kmer.recoverKmerFrom(k, neighberVertexId, 0, neighberVertexId.length);
+		
+		byte newBit = Kmer.GENE_CODE.getAdjBit((byte)neighberVertex.charAt(neighberVertex.length() - 1));
+		return (byte) ((byte)(oldVertexValue & 0xF0) | (byte) (newBit & 0x0F));
+	}
+	/**
+	 * update right neighber
+	 */
+	public static byte updateRightNeighber(byte oldVertexValue, byte newVertexValue){
+		return (byte) ((byte)(oldVertexValue & 0xF0) | (byte) (newVertexValue & 0x0F));
+	}
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/LoadGraphVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/LoadGraphVertex.java
new file mode 100644
index 0000000..e22b20d
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/LoadGraphVertex.java
@@ -0,0 +1,71 @@
+package edu.uci.ics.genomix.pregelix;
+
+import java.util.Iterator;
+
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.NullWritable;
+
+import edu.uci.ics.genomix.pregelix.client.Client;
+import edu.uci.ics.genomix.pregelix.format.BinaryLoadGraphInputFormat;
+import edu.uci.ics.genomix.pregelix.format.BinaryLoadGraphOutputFormat;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+
+/*
+ * vertexId: BytesWritable
+ * vertexValue: ByteWritable
+ * edgeValue: NullWritable
+ * message: MessageWritable
+ * 
+ * DNA:
+ * A: 00
+ * C: 01
+ * G: 10
+ * T: 11
+ * 
+ * succeed node
+ *  A 00000001 1
+ *  G 00000010 2
+ *  C 00000100 4
+ *  T 00001000 8
+ * precursor node
+ *  A 00010000 16
+ *  G 00100000 32
+ *  C 01000000 64
+ *  T 10000000 128
+ *  
+ * For example, ONE LINE in input file: 00,01,10	0001,0010,
+ * That means that vertexId is ACG, its succeed node is A and its precursor node is C.
+ * The succeed node and precursor node will be stored in vertexValue and we don't use edgeValue.
+ * The details about message are in edu.uci.ics.pregelix.example.io.MessageWritable. 
+ */
+public class LoadGraphVertex extends Vertex<BytesWritable, ByteWritable, NullWritable, MessageWritable>{
+
+	/**
+	 * For test, just output original file
+	 */
+	@Override
+	public void compute(Iterator<MessageWritable> msgIterator) {
+		voteToHalt();
+	}
+
+	/**
+	 * @param args
+	 */
+	public static void main(String[] args) throws Exception {
+		//final int k = Integer.parseInt(args[0]);
+        PregelixJob job = new PregelixJob(LoadGraphVertex.class.getSimpleName());
+        job.setVertexClass(LoadGraphVertex.class);
+        /**
+         * BinaryInput and BinaryOutput
+         */
+        job.setVertexInputFormatClass(BinaryLoadGraphInputFormat.class); 
+        job.setVertexOutputFormatClass(BinaryLoadGraphOutputFormat.class); 
+        job.setOutputKeyClass(BytesWritable.class);
+        job.setOutputValueClass(ValueStateWritable.class);
+        Client.run(args, job);
+	}
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/LogAlgorithmForMergeGraphVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/LogAlgorithmForMergeGraphVertex.java
new file mode 100644
index 0000000..757d8f8
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/LogAlgorithmForMergeGraphVertex.java
@@ -0,0 +1,298 @@
+package edu.uci.ics.genomix.pregelix;
+
+import java.util.Iterator;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.NullWritable;
+
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.genomix.pregelix.client.Client;
+import edu.uci.ics.genomix.pregelix.format.LogAlgorithmForMergeGraphInputFormat;
+import edu.uci.ics.genomix.pregelix.format.LogAlgorithmForMergeGraphOutputFormat;
+import edu.uci.ics.genomix.pregelix.io.LogAlgorithmMessageWritable;
+import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
+import edu.uci.ics.genomix.pregelix.type.Message;
+import edu.uci.ics.genomix.pregelix.type.State;
+import edu.uci.ics.genomix.type.Kmer;
+import edu.uci.ics.genomix.type.KmerUtil;
+
+/*
+ * vertexId: BytesWritable
+ * vertexValue: ValueStateWritable
+ * edgeValue: NullWritable
+ * message: LogAlgorithmMessageWritable
+ * 
+ * DNA:
+ * A: 00
+ * C: 01
+ * G: 10
+ * T: 11
+ * 
+ * succeed node
+ *  A 00000001 1
+ *  G 00000010 2
+ *  C 00000100 4
+ *  T 00001000 8
+ * precursor node
+ *  A 00010000 16
+ *  G 00100000 32
+ *  C 01000000 64
+ *  T 10000000 128
+ *  
+ * For example, ONE LINE in input file: 00,01,10	0001,0010,
+ * That means that vertexId is ACG, its succeed node is A and its precursor node is C.
+ * The succeed node and precursor node will be stored in vertexValue and we don't use edgeValue.
+ * The details about message are in edu.uci.ics.pregelix.example.io.MessageWritable. 
+ */
+public class LogAlgorithmForMergeGraphVertex extends Vertex<BytesWritable, ValueStateWritable, NullWritable, LogAlgorithmMessageWritable>{
+	public static final String KMER_SIZE = "LogAlgorithmForMergeGraphVertex.kmerSize";
+	public static int kmerSize = -1;
+	
+	private byte[] tmpVertexId;
+	private byte[] tmpDestVertexId;
+	private BytesWritable destVertexId = new BytesWritable();
+	private byte[] mergeChainVertexId;
+	private int lengthOfMergeChainVertex;
+	private byte tmpVertexValue;
+	private ValueStateWritable tmpVal = new ValueStateWritable();
+	private LogAlgorithmMessageWritable tmpMsg = new LogAlgorithmMessageWritable();
+	/**
+	 * Log Algorithm for path merge graph
+	 */
+	
+	/**
+     *	Load KmerSize
+     */
+	public LogAlgorithmForMergeGraphVertex(){
+		
+	}
+	
+	@Override
+	public void compute(Iterator<LogAlgorithmMessageWritable> msgIterator) {
+		if(kmerSize == -1)
+			kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
+		tmpVertexId = GraphVertexOperation.generateValidDataFromBytesWritable(getVertexId());
+		tmpVal = getVertexValue();
+		if (getSuperstep() == 1) {
+			tmpMsg.setChainVertexId(new byte[0]);
+			if(GraphVertexOperation.isHeadVertex(tmpVal.getValue())){
+				tmpMsg.setMessage(Message.START);
+				for(byte x = Kmer.GENE_CODE.A; x<= Kmer.GENE_CODE.T ; x++){
+					if((tmpVal.getValue() & (1 << x)) != 0){
+						tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(kmerSize, tmpVertexId, 0, tmpVertexId.length, x);
+						destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
+						sendMsg(destVertexId,tmpMsg);
+					}
+				}
+				voteToHalt();
+			}
+			if(GraphVertexOperation.isRearVertex(tmpVal.getValue())){
+				tmpMsg.setMessage(Message.END);
+				
+				for(byte x = Kmer.GENE_CODE.A; x<= Kmer.GENE_CODE.T ; x++){
+					if(((tmpVal.getValue()>> 4) & (1 << x)) != 0){
+						tmpDestVertexId = KmerUtil.shiftKmerWithPreCode(kmerSize, tmpVertexId, 0, tmpVertexId.length, x);
+						destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
+						sendMsg(destVertexId,tmpMsg);
+					}
+				}
+				voteToHalt();
+			}
+			if(GraphVertexOperation.isPathVertex(tmpVal.getValue())){
+				tmpVal.setState(State.MID_VERTEX);
+				setVertexValue(tmpVal);
+			}
+			if(!GraphVertexOperation.isHeadVertex(tmpVal.getValue())
+					&& !GraphVertexOperation.isRearVertex(tmpVal.getValue())
+					&& !GraphVertexOperation.isRearVertex(tmpVal.getValue()))
+				voteToHalt();
+		}
+		else if(getSuperstep() == 2){
+			while(msgIterator.hasNext()){
+				if(!GraphVertexOperation.isPathVertex(tmpVal.getValue())){
+					msgIterator.next();
+					voteToHalt();
+				}
+				else{
+					tmpMsg = msgIterator.next();
+					if(tmpMsg.getMessage() == Message.START && tmpVal.getState() == State.MID_VERTEX){
+						tmpVal.setState(State.START_VERTEX);
+						setVertexValue(tmpVal);
+					}
+					else if(tmpMsg.getMessage() == Message.END && tmpVal.getState() == State.MID_VERTEX){
+						tmpVal.setState(State.END_VERTEX);
+						setVertexValue(tmpVal);
+						voteToHalt();
+					}
+					else
+						voteToHalt();
+				}
+			}
+		}
+		//head node sends message to path node
+		else if(getSuperstep()%3 == 0){
+			if(getSuperstep() == 3){
+				tmpMsg = new LogAlgorithmMessageWritable();
+				if(Kmer.GENE_CODE.getGeneCodeFromBitMap((byte)(tmpVal.getValue() & 0x0F)) == -1)
+					voteToHalt();
+				else{
+					tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(kmerSize, tmpVertexId, 
+							0, tmpVertexId.length, 
+							Kmer.GENE_CODE.getGeneCodeFromBitMap((byte)(tmpVal.getValue() & 0x0F)));
+					destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
+					if(tmpVal.getState() == State.START_VERTEX){
+						tmpMsg.setMessage(Message.START);
+						tmpMsg.setSourceVertexId(getVertexId().getBytes());
+						sendMsg(destVertexId, tmpMsg);
+						voteToHalt();
+					}
+					else if(tmpVal.getState() != State.END_VERTEX && tmpVal.getState() != State.FINAL_DELETE){
+						tmpMsg.setMessage(Message.NON);
+						tmpMsg.setSourceVertexId(getVertexId().getBytes());
+						sendMsg(destVertexId,tmpMsg);
+						voteToHalt();
+					}
+				}
+			}
+			else{
+				if(msgIterator.hasNext()){
+					tmpMsg = msgIterator.next();
+					byte[] lastKmer = KmerUtil.getLastKmerFromChain(kmerSize,
+							tmpVal.getLengthOfMergeChain(),
+							tmpVal.getMergeChain(),
+							0, tmpVal.getMergeChain().length);
+					if(Kmer.GENE_CODE.getGeneCodeFromBitMap((byte)(tmpVal.getValue() & 0x0F)) == -1)
+						voteToHalt();
+					else{
+						tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(kmerSize, lastKmer, 
+								0, lastKmer.length,
+								Kmer.GENE_CODE.getGeneCodeFromBitMap((byte)(tmpVal.getValue() & 0x0F))); //tmpMsg.getNeighberInfo()
+						destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
+						if(tmpVal.getState() == State.START_VERTEX){
+							tmpMsg.setMessage(Message.START);
+							tmpMsg.setSourceVertexId(getVertexId().getBytes());
+							sendMsg(destVertexId, tmpMsg);
+							voteToHalt();
+						}
+						else if(tmpVal.getState() != State.END_VERTEX && tmpVal.getState() != State.FINAL_DELETE){
+							tmpMsg.setMessage(Message.NON);
+							tmpMsg.setSourceVertexId(getVertexId().getBytes());
+							sendMsg(destVertexId,tmpMsg);
+						}
+					}
+				}
+			}
+		}
+		
+		//path node sends message back to head node
+		else if(getSuperstep()%3 == 1){
+			if(msgIterator.hasNext()){
+				tmpMsg = msgIterator.next();
+				int message = tmpMsg.getMessage();
+				if(tmpVal.getLengthOfMergeChain() == 0){
+					tmpVal.setLengthOfMergeChain(kmerSize);
+					tmpVal.setMergeChain(tmpVertexId);
+					setVertexValue(tmpVal);
+				}
+				tmpMsg.setLengthOfChain(tmpVal.getLengthOfMergeChain());
+				tmpMsg.setChainVertexId(tmpVal.getMergeChain());
+				
+				tmpMsg.setNeighberInfo(tmpVal.getValue()); //set neighber
+				tmpMsg.setSourceVertexState(tmpVal.getState());
+				
+				//kill Message because it has been merged by the head
+				if(tmpVal.getState() == State.END_VERTEX || tmpVal.getState() == State.FINAL_DELETE){
+					tmpMsg.setMessage(Message.END);
+					tmpVal.setState(State.FINAL_DELETE);
+					setVertexValue(tmpVal);
+					//deleteVertex(getVertexId());
+				}
+				else
+					tmpMsg.setMessage(Message.NON);
+				
+				if(message == Message.START){
+					tmpVal.setState(State.TODELETE);
+					setVertexValue(tmpVal);
+				}
+				destVertexId.set(tmpMsg.getSourceVertexId(), 0, tmpMsg.getSourceVertexId().length);
+				sendMsg(destVertexId,tmpMsg);
+				voteToHalt();
+			}
+			else{
+				if(getVertexValue().getState() != State.START_VERTEX
+						&& getVertexValue().getState() != State.END_VERTEX && getVertexValue().getState() != State.FINAL_DELETE)
+					deleteVertex(getVertexId()); //killSelf because it doesn't receive any message
+			}
+		}
+		else if(getSuperstep()%3 == 2){
+			if(tmpVal.getState() == State.TODELETE)
+				deleteVertex(getVertexId()); //killSelf
+			else{
+				if(msgIterator.hasNext()){
+					tmpMsg = msgIterator.next();
+
+					if(tmpMsg.getMessage() == Message.END){
+						if(tmpVal.getState() != State.START_VERTEX)
+							tmpVal.setState(State.END_VERTEX);
+						else
+							tmpVal.setState(State.FINAL_VERTEX);
+					}
+						
+					if(getSuperstep() == 5){
+						lengthOfMergeChainVertex = kmerSize;
+						mergeChainVertexId = tmpVertexId;
+					}
+					else{
+						lengthOfMergeChainVertex = tmpVal.getLengthOfMergeChain(); 
+						mergeChainVertexId = tmpVal.getMergeChain(); 
+					}
+					byte[] tmplastKmer = KmerUtil.getLastKmerFromChain(tmpMsg.getLengthOfChain() - kmerSize + 1,
+							tmpMsg.getLengthOfChain(), tmpMsg.getChainVertexId(),0, tmpMsg.getChainVertexId().length);
+					mergeChainVertexId = KmerUtil.mergeTwoKmer(lengthOfMergeChainVertex, 
+							mergeChainVertexId, 
+							0, mergeChainVertexId.length,
+							tmpMsg.getLengthOfChain() - kmerSize + 1, 
+							tmplastKmer, 0, tmplastKmer.length);
+					lengthOfMergeChainVertex = lengthOfMergeChainVertex + tmpMsg.getLengthOfChain()
+							- kmerSize + 1;
+					tmpVal.setLengthOfMergeChain(lengthOfMergeChainVertex);
+					tmpVal.setMergeChain(mergeChainVertexId);
+
+					tmpVertexValue = GraphVertexOperation.updateRightNeighber(getVertexValue().getValue(),tmpMsg.getNeighberInfo());
+					tmpVal.setValue(tmpVertexValue);
+					if(tmpMsg.getMessage() != Message.END){
+						setVertexValue(tmpVal);
+						tmpMsg = new LogAlgorithmMessageWritable(); //reset
+						tmpMsg.setNeighberInfo(tmpVertexValue);
+						sendMsg(getVertexId(),tmpMsg);
+					}
+				}
+				if(tmpVal.getState() == State.END_VERTEX || tmpVal.getState() == State.FINAL_DELETE)
+					voteToHalt();
+				if(tmpVal.getState() == State.FINAL_VERTEX){
+					//String source = Kmer.recoverKmerFrom(tmpVal.getLengthOfMergeChain(), tmpVal.getMergeChain(), 0, tmpVal.getMergeChain().length);
+					voteToHalt();
+				}
+			}
+			
+		}
+	}
+
+	/**
+	 * @param args
+	 */
+	public static void main(String[] args) throws Exception {
+        PregelixJob job = new PregelixJob(LogAlgorithmForMergeGraphVertex.class.getSimpleName());
+        job.setVertexClass(LogAlgorithmForMergeGraphVertex.class);
+        /**
+         * BinaryInput and BinaryOutput~/
+         */
+        job.setVertexInputFormatClass(LogAlgorithmForMergeGraphInputFormat.class); 
+        job.setVertexOutputFormatClass(LogAlgorithmForMergeGraphOutputFormat.class); 
+        job.setOutputKeyClass(BytesWritable.class);
+        job.setOutputValueClass(ValueStateWritable.class);
+        job.setDynamicVertexValueSize(true);
+        Client.run(args, job);
+	}
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/MergeGraphVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/MergeGraphVertex.java
new file mode 100644
index 0000000..12450b4
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/MergeGraphVertex.java
@@ -0,0 +1,192 @@
+package edu.uci.ics.genomix.pregelix;
+
+import java.util.Iterator;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+
+import edu.uci.ics.genomix.type.Kmer;
+import edu.uci.ics.genomix.type.KmerUtil;
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.genomix.pregelix.client.Client;
+import edu.uci.ics.genomix.pregelix.format.BinaryLoadGraphInputFormat;
+import edu.uci.ics.genomix.pregelix.format.BinaryLoadGraphOutputFormat;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
+import edu.uci.ics.genomix.pregelix.type.State;
+
+/*
+ * vertexId: BytesWritable
+ * vertexValue: ByteWritable
+ * edgeValue: NullWritable
+ * message: MessageWritable
+ * 
+ * DNA:
+ * A: 00
+ * C: 01
+ * G: 10
+ * T: 11
+ * 
+ * succeed node
+ *  A 00000001 1
+ *  G 00000010 2
+ *  C 00000100 4
+ *  T 00001000 8
+ * precursor node
+ *  A 00010000 16
+ *  G 00100000 32
+ *  C 01000000 64
+ *  T 10000000 128
+ *  
+ * For example, ONE LINE in input file: 00,01,10	0001,0010,
+ * That means that vertexId is ACG, its succeed node is A and its precursor node is C.
+ * The succeed node and precursor node will be stored in vertexValue and we don't use edgeValue.
+ * The details about message are in edu.uci.ics.pregelix.example.io.MessageWritable. 
+ */
+public class MergeGraphVertex extends Vertex<BytesWritable, ValueStateWritable, NullWritable, MessageWritable>{
+	
+	public static final String KMER_SIZE = "MergeGraphVertex.kmerSize";
+	public static int kmerSize = -1;
+	
+    private byte[] tmpVertexId;
+    private byte[] tmpDestVertexId;
+	private BytesWritable destVertexId = new BytesWritable();
+	private BytesWritable tmpChainVertexId = new BytesWritable();
+	private ValueStateWritable tmpVertexValue = new ValueStateWritable();
+	private MessageWritable tmpMsg = new MessageWritable();
+	/**
+	 * Naive Algorithm for path merge graph
+	 * @throws Exception 
+	 * @throws  
+	 */
+	
+	/**
+     *	Load KmerSize
+     */
+	@Override
+	public void compute(Iterator<MessageWritable> msgIterator) {
+		if(kmerSize == -1)
+			kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
+		tmpVertexId = GraphVertexOperation.generateValidDataFromBytesWritable(getVertexId());
+		if (getSuperstep() == 1) {
+			if(GraphVertexOperation.isHeadVertex(getVertexValue().getValue())){ 
+				tmpMsg.setSourceVertexId(tmpVertexId);
+				tmpMsg.setHead(tmpVertexId);
+				tmpMsg.setLengthOfChain(0);
+				tmpMsg.setChainVertexId(tmpChainVertexId.getBytes());
+				for(byte x = Kmer.GENE_CODE.A; x<= Kmer.GENE_CODE.T ; x++){
+					if((getVertexValue().getValue() & (1 << x)) != 0){
+						tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(kmerSize, tmpVertexId, 0, tmpVertexId.length, x);
+						destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
+						sendMsg(destVertexId,tmpMsg);
+					}
+				}
+			}
+		}
+		
+		//path node sends message back to head node
+		else if(getSuperstep()%2 == 0){
+			
+			 if(msgIterator.hasNext()){
+				tmpMsg = msgIterator.next();
+					
+				if(!tmpMsg.isRear()){
+					if(getSuperstep() == 2)
+						tmpMsg.setHead(tmpVertexId);
+					if(GraphVertexOperation.isPathVertex(getVertexValue().getValue())){
+						tmpDestVertexId = tmpMsg.getSourceVertexId();
+						tmpMsg.setNeighberInfo(getVertexValue().getValue()); //set neighber
+						if(tmpMsg.getLengthOfChain() == 0){
+							tmpMsg.setLengthOfChain(kmerSize);
+							tmpMsg.setChainVertexId(tmpVertexId);
+						}
+						else{
+							String source = Kmer.recoverKmerFrom(kmerSize, tmpVertexId, 0, tmpVertexId.length);
+							tmpMsg.setChainVertexId(KmerUtil.mergeKmerWithNextCode(
+									tmpMsg.getLengthOfChain(),
+									tmpMsg.getChainVertexId(), 
+									0, tmpMsg.getChainVertexId().length,
+									Kmer.GENE_CODE.getCodeFromSymbol((byte)source.charAt(source.length() - 1))));
+							tmpMsg.incrementLength();
+							deleteVertex(getVertexId());
+						}
+						destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
+						sendMsg(destVertexId,tmpMsg);
+					}
+					else if(GraphVertexOperation.isRearVertex(getVertexValue().getValue())){
+						if(getSuperstep() == 2)
+							voteToHalt();
+						else{
+							tmpDestVertexId = tmpMsg.getSourceVertexId();
+							tmpMsg.setSourceVertexId(tmpVertexId);
+							tmpMsg.setRear(true);
+							destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
+							sendMsg(destVertexId,tmpMsg);
+						}
+					}
+				}
+				else{
+					tmpVertexValue.setState(State.START_VERTEX);
+					tmpVertexValue.setValue(GraphVertexOperation.updateRightNeighberByVertexId(getVertexValue().getValue(),
+							tmpMsg.getSourceVertexId(), kmerSize));
+					tmpVertexValue.setLengthOfMergeChain(tmpMsg.getLengthOfChain());
+					tmpVertexValue.setMergeChain(tmpMsg.getChainVertexId());
+					setVertexValue(tmpVertexValue);
+					//String source = Kmer.recoverKmerFrom(tmpMsg.getLengthOfChain(), tmpMsg.getChainVertexId(), 0, tmpMsg.getChainVertexId().length);
+					//System.out.print("");
+					/*try {
+						
+						GraphVertexOperation.flushChainToFile(tmpMsg.getChainVertexId(), 
+								tmpMsg.getLengthOfChain(),tmpVertexId);
+					} catch (IOException e) { e.printStackTrace(); }*/
+				}
+			}
+		}
+		//head node sends message to path node
+		else if(getSuperstep()%2 == 1){
+			while (msgIterator.hasNext()){
+				tmpMsg = msgIterator.next();
+				if(!tmpMsg.isRear()){
+					byte[] lastKmer = KmerUtil.getLastKmerFromChain(kmerSize,
+							tmpMsg.getLengthOfChain(),
+							tmpMsg.getChainVertexId(),
+							0, tmpMsg.getChainVertexId().length);
+					tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(kmerSize, lastKmer, 
+							0, lastKmer.length,
+							Kmer.GENE_CODE.getGeneCodeFromBitMap((byte)(tmpMsg.getNeighberInfo() & 0x0F)));
+
+					tmpMsg.setSourceVertexId(tmpVertexId);
+					destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
+					sendMsg(destVertexId,tmpMsg);
+				}
+				else{	
+					tmpDestVertexId = tmpMsg.getHead();
+					destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
+					sendMsg(destVertexId,tmpMsg);
+				}
+			}
+		}
+		voteToHalt();
+	}
+
+	/**
+	 * @param args
+	 */
+	public static void main(String[] args) throws Exception {
+        PregelixJob job = new PregelixJob(MergeGraphVertex.class.getSimpleName());
+        job.setVertexClass(MergeGraphVertex.class);
+        /**
+         * BinaryInput and BinaryOutput
+         */
+        job.setVertexInputFormatClass(BinaryLoadGraphInputFormat.class); 
+        job.setVertexOutputFormatClass(BinaryLoadGraphOutputFormat.class); 
+        job.setDynamicVertexValueSize(true);
+        job.setOutputKeyClass(BytesWritable.class);
+        job.setOutputValueClass(ValueStateWritable.class);
+        Client.run(args, job);
+	}
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryVertexInputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryVertexInputFormat.java
new file mode 100644
index 0000000..823a984
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryVertexInputFormat.java
@@ -0,0 +1,107 @@
+package edu.uci.ics.genomix.pregelix.api.io.binary;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+
+import edu.uci.ics.pregelix.api.io.VertexInputFormat;
+import edu.uci.ics.pregelix.api.io.VertexReader;
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+public class BinaryVertexInputFormat <I extends WritableComparable<?>, V extends Writable, E extends Writable, M extends Writable>
+	extends VertexInputFormat<I, V, E, M>{
+	
+    /** Uses the SequenceFileInputFormat to do everything */
+	protected SequenceFileInputFormat binaryInputFormat = new SequenceFileInputFormat();
+    
+    /**
+     * Abstract class to be implemented by the user based on their specific
+     * vertex input. Easiest to ignore the key value separator and only use key
+     * instead.
+     * 
+     * @param <I>
+     *            Vertex index value
+     * @param <V>
+     *            Vertex value
+     * @param <E>
+     *            Edge value
+     */
+    public static abstract class BinaryVertexReader<I extends WritableComparable<?>, V extends Writable, E extends Writable, M extends Writable>
+            implements VertexReader<I, V, E, M> {
+        /** Internal line record reader */
+        private final RecordReader<BytesWritable,KmerCountValue> lineRecordReader;
+        /** Context passed to initialize */
+        private TaskAttemptContext context;
+
+        /**
+         * Initialize with the LineRecordReader.
+         * 
+         * @param recordReader
+         *            Line record reader from SequenceFileInputFormat
+         */
+        public BinaryVertexReader(RecordReader<BytesWritable, KmerCountValue> recordReader) {
+            this.lineRecordReader = recordReader;
+        }
+
+        @Override
+        public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException,
+                InterruptedException {
+            lineRecordReader.initialize(inputSplit, context);
+            this.context = context;
+        }
+
+        @Override
+        public void close() throws IOException {
+            lineRecordReader.close();
+        }
+
+        @Override
+        public float getProgress() throws IOException, InterruptedException {
+            return lineRecordReader.getProgress();
+        }
+
+        /**
+         * Get the line record reader.
+         * 
+         * @return Record reader to be used for reading.
+         */
+        protected RecordReader<BytesWritable,KmerCountValue> getRecordReader() {
+            return lineRecordReader;
+        }
+
+        /**
+         * Get the context.
+         * 
+         * @return Context passed to initialize.
+         */
+        protected TaskAttemptContext getContext() {
+            return context;
+        }
+    }
+
+    @Override
+    public List<InputSplit> getSplits(JobContext context, int numWorkers) throws IOException, InterruptedException {
+        // Ignore the hint of numWorkers here since we are using SequenceFileInputFormat
+        // to do this for us
+        return binaryInputFormat.getSplits(context);
+    }
+
+	@Override
+	public VertexReader<I, V, E, M> createVertexReader(InputSplit split,
+			TaskAttemptContext context) throws IOException {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+
+
+
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryVertexOutputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryVertexOutputFormat.java
new file mode 100644
index 0000000..f497f21
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryVertexOutputFormat.java
@@ -0,0 +1,102 @@
+package edu.uci.ics.genomix.pregelix.api.io.binary;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+
+import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
+import edu.uci.ics.pregelix.api.io.VertexOutputFormat;
+import edu.uci.ics.pregelix.api.io.VertexWriter;
+
+/**
+ * Abstract class that users should subclass to use their own text based vertex
+ * output format.
+ * 
+ * @param <I>
+ *            Vertex index value
+ * @param <V>
+ *            Vertex value
+ * @param <E>
+ *            Edge value
+ */
+@SuppressWarnings("rawtypes")
+public abstract class BinaryVertexOutputFormat<I extends WritableComparable, V extends Writable, E extends Writable>
+        extends VertexOutputFormat<I, V, E> {
+    /** Uses the SequenceFileOutputFormat to do everything */
+	protected SequenceFileOutputFormat binaryOutputFormat = new SequenceFileOutputFormat();
+
+    /**
+     * Abstract class to be implemented by the user based on their specific
+     * vertex output. Easiest to ignore the key value separator and only use key
+     * instead.
+     * 
+     * @param <I>
+     *            Vertex index value
+     * @param <V>
+     *            Vertex value
+     * @param <E>
+     *            Edge value
+     */
+    public static abstract class BinaryVertexWriter<I extends WritableComparable, V extends Writable, E extends Writable>
+            implements VertexWriter<I, V, E> {
+        /** Context passed to initialize */
+        private TaskAttemptContext context;
+        /** Internal line record writer */
+        private final RecordWriter<BytesWritable, ValueStateWritable> lineRecordWriter;
+
+        /**
+         * Initialize with the LineRecordWriter.
+         * 
+         * @param lineRecordWriter
+         *            Line record writer from SequenceFileOutputFormat
+         */
+        public BinaryVertexWriter(RecordWriter<BytesWritable, ValueStateWritable> lineRecordWriter) {
+            this.lineRecordWriter = lineRecordWriter;
+        }
+
+        @Override
+        public void initialize(TaskAttemptContext context) throws IOException {
+            this.context = context;
+        }
+
+        @Override
+        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
+            lineRecordWriter.close(context);
+        }
+
+        /**
+         * Get the line record writer.
+         * 
+         * @return Record writer to be used for writing.
+         */
+        public RecordWriter<BytesWritable, ValueStateWritable> getRecordWriter() {
+            return lineRecordWriter;
+        }
+
+        /**
+         * Get the context.
+         * 
+         * @return Context passed to initialize.
+         */
+        public TaskAttemptContext getContext() {
+            return context;
+        }
+    }
+
+    @Override
+    public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
+    	binaryOutputFormat.checkOutputSpecs(context);
+    }
+
+    @Override
+    public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
+        return binaryOutputFormat.getOutputCommitter(context);
+    }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/client/Client.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/client/Client.java
new file mode 100644
index 0000000..ab0fb57
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/client/Client.java
@@ -0,0 +1,65 @@
+
+package edu.uci.ics.genomix.pregelix.client;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.kohsuke.args4j.CmdLineException;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+import edu.uci.ics.genomix.pregelix.LogAlgorithmForMergeGraphVertex;
+import edu.uci.ics.genomix.pregelix.MergeGraphVertex;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.core.base.IDriver.Plan;
+import edu.uci.ics.pregelix.core.driver.Driver;
+
+public class Client {
+
+    private static class Options {
+        @Option(name = "-inputpaths", usage = "comma seprated input paths", required = true)
+        public String inputPaths;
+
+        @Option(name = "-outputpath", usage = "output path", required = true)
+        public String outputPath;
+
+        @Option(name = "-ip", usage = "ip address of cluster controller", required = true)
+        public String ipAddress;
+
+        @Option(name = "-port", usage = "port of cluster controller", required = false)
+        public int port;
+
+        @Option(name = "-plan", usage = "query plan choice", required = false)
+        public Plan planChoice = Plan.OUTER_JOIN;
+        
+        @Option(name = "-kmer-size", usage = "the size of kmer", required = false)
+        public int sizeKmer;
+
+        @Option(name = "-runtime-profiling", usage = "whether to do runtime profifling", required = false)
+        public String profiling = "false";
+    }
+
+    public static void run(String[] args, PregelixJob job) throws Exception {
+        Options options = prepareJob(args, job);
+        Driver driver = new Driver(Client.class);
+        driver.runJob(job, options.planChoice, options.ipAddress, options.port, Boolean.parseBoolean(options.profiling));
+    }
+
+    private static Options prepareJob(String[] args, PregelixJob job) throws CmdLineException, IOException {
+        Options options = new Options();
+        CmdLineParser parser = new CmdLineParser(options);
+        parser.parseArgument(args);
+
+        String[] inputs = options.inputPaths.split(";");
+        FileInputFormat.setInputPaths(job, inputs[0]);
+        for (int i = 1; i < inputs.length; i++)
+            FileInputFormat.addInputPaths(job, inputs[0]);
+        FileOutputFormat.setOutputPath(job, new Path(options.outputPath));
+        job.getConfiguration().setInt(MergeGraphVertex.KMER_SIZE, options.sizeKmer);
+        job.getConfiguration().setInt(LogAlgorithmForMergeGraphVertex.KMER_SIZE, options.sizeKmer);
+        return options;
+    }
+
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/BinaryLoadGraphInputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/BinaryLoadGraphInputFormat.java
new file mode 100644
index 0000000..4cd22ac
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/BinaryLoadGraphInputFormat.java
@@ -0,0 +1,81 @@
+package edu.uci.ics.genomix.pregelix.format;
+
+import java.io.IOException;
+import java.util.logging.FileHandler;
+import java.util.logging.Logger;
+
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import edu.uci.ics.genomix.type.KmerCountValue;
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.io.VertexReader;
+import edu.uci.ics.pregelix.api.util.BspUtils;
+import edu.uci.ics.genomix.pregelix.GraphVertexOperation;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
+import edu.uci.ics.genomix.pregelix.io.ValueWritable;
+import edu.uci.ics.genomix.pregelix.log.DataLoadLogFormatter;
+import edu.uci.ics.genomix.pregelix.api.io.binary.BinaryVertexInputFormat;
+import edu.uci.ics.genomix.pregelix.api.io.binary.BinaryVertexInputFormat.BinaryVertexReader;
+
+public class BinaryLoadGraphInputFormat extends
+	BinaryVertexInputFormat<BytesWritable, ValueStateWritable, NullWritable, MessageWritable>{
+	/**
+	 * Format INPUT
+	 */
+    @Override
+    public VertexReader<BytesWritable, ValueStateWritable, NullWritable, MessageWritable> createVertexReader(
+            InputSplit split, TaskAttemptContext context) throws IOException {
+        return new BinaryLoadGraphReader(binaryInputFormat.createRecordReader(split, context));
+    }	
+}
+
+@SuppressWarnings("rawtypes")
+class BinaryLoadGraphReader extends
+        BinaryVertexReader<BytesWritable, ValueStateWritable, NullWritable, MessageWritable> {
+    private Vertex vertex;
+    private BytesWritable vertexId = new BytesWritable();
+    private ValueStateWritable vertexValue = new ValueStateWritable();
+
+    public BinaryLoadGraphReader(RecordReader<BytesWritable,KmerCountValue> recordReader) {
+        super(recordReader);
+    }
+
+    @Override
+    public boolean nextVertex() throws IOException, InterruptedException {
+        return getRecordReader().nextKeyValue();
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public Vertex<BytesWritable, ValueStateWritable, NullWritable, MessageWritable> getCurrentVertex() throws IOException,
+            InterruptedException {
+        if (vertex == null)
+            vertex = (Vertex) BspUtils.createVertex(getContext().getConfiguration());
+
+        vertex.getMsgList().clear();
+        vertex.getEdges().clear();
+        
+        vertex.reset();
+        if(getRecordReader() != null){
+            /**
+             * set the src vertex id
+             */
+    		vertexId.set(getRecordReader().getCurrentKey());
+    		vertex.setVertexId(vertexId);
+            /**
+             * set the vertex value
+             */
+            KmerCountValue kmerCountValue = getRecordReader().getCurrentValue();
+            vertexValue.setValue(kmerCountValue.getAdjBitMap()); 
+            vertex.setVertexValue(vertexValue);
+        }
+        
+        return vertex;
+    }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/BinaryLoadGraphOutputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/BinaryLoadGraphOutputFormat.java
new file mode 100644
index 0000000..2b87379
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/BinaryLoadGraphOutputFormat.java
@@ -0,0 +1,40 @@
+package edu.uci.ics.genomix.pregelix.format;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import edu.uci.ics.genomix.pregelix.api.io.binary.BinaryVertexOutputFormat;
+import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.io.VertexWriter;
+
+public class BinaryLoadGraphOutputFormat extends 
+	BinaryVertexOutputFormat<BytesWritable, ValueStateWritable, NullWritable> {
+
+        @Override
+        public VertexWriter<BytesWritable, ValueStateWritable, NullWritable> createVertexWriter(TaskAttemptContext context)
+                throws IOException, InterruptedException {
+            RecordWriter<BytesWritable, ValueStateWritable> recordWriter = binaryOutputFormat.getRecordWriter(context);
+            return new BinaryLoadGraphVertexWriter(recordWriter);
+        }
+        
+        /**
+         * Simple VertexWriter that supports {@link BinaryLoadGraphVertex}
+         */
+        public static class BinaryLoadGraphVertexWriter extends
+                BinaryVertexWriter<BytesWritable, ValueStateWritable, NullWritable> {
+            public BinaryLoadGraphVertexWriter(RecordWriter<BytesWritable, ValueStateWritable> lineRecordWriter) {
+                super(lineRecordWriter);
+            }
+
+            @Override
+            public void writeVertex(Vertex<BytesWritable, ValueStateWritable, NullWritable, ?> vertex) throws IOException,
+                    InterruptedException {
+                getRecordWriter().write(vertex.getVertexId(),vertex.getVertexValue());
+            }
+        }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/LogAlgorithmForMergeGraphInputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/LogAlgorithmForMergeGraphInputFormat.java
new file mode 100644
index 0000000..0e74c2d
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/LogAlgorithmForMergeGraphInputFormat.java
@@ -0,0 +1,79 @@
+package edu.uci.ics.genomix.pregelix.format;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import edu.uci.ics.genomix.pregelix.api.io.binary.BinaryVertexInputFormat;
+import edu.uci.ics.genomix.pregelix.io.LogAlgorithmMessageWritable;
+import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
+import edu.uci.ics.genomix.pregelix.type.State;
+import edu.uci.ics.genomix.type.KmerCountValue;
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.io.VertexReader;
+import edu.uci.ics.pregelix.api.util.BspUtils;
+
+public class LogAlgorithmForMergeGraphInputFormat extends
+	BinaryVertexInputFormat<BytesWritable, ValueStateWritable, NullWritable, LogAlgorithmMessageWritable>{
+
+	/**
+	 * Format INPUT
+	 */
+    @Override
+    public VertexReader<BytesWritable, ValueStateWritable, NullWritable, LogAlgorithmMessageWritable> createVertexReader(
+            InputSplit split, TaskAttemptContext context) throws IOException {
+        return new BinaryLoadGraphReader(binaryInputFormat.createRecordReader(split, context));
+    }
+    
+    @SuppressWarnings("rawtypes")
+    class BinaryLoadGraphReader extends
+            BinaryVertexReader<BytesWritable, ValueStateWritable, NullWritable, LogAlgorithmMessageWritable> {
+        private Vertex vertex;
+        private BytesWritable vertexId = new BytesWritable();
+        private ValueStateWritable vertexValue = new ValueStateWritable();
+
+        public BinaryLoadGraphReader(RecordReader<BytesWritable,KmerCountValue> recordReader) {
+            super(recordReader);
+        }
+
+        @Override
+        public boolean nextVertex() throws IOException, InterruptedException {
+            return getRecordReader().nextKeyValue();
+        }
+
+        @SuppressWarnings("unchecked")
+        @Override
+        public Vertex<BytesWritable, ValueStateWritable, NullWritable, LogAlgorithmMessageWritable> getCurrentVertex() throws IOException,
+                InterruptedException {
+            if (vertex == null)
+                vertex = (Vertex) BspUtils.createVertex(getContext().getConfiguration());
+
+            vertex.getMsgList().clear();
+            vertex.getEdges().clear();
+            
+            
+            if(getRecordReader() != null){
+	            /**
+	             * set the src vertex id
+	             */
+	            
+        		vertexId = getRecordReader().getCurrentKey();
+        		vertex.setVertexId(vertexId);
+	            /**
+	             * set the vertex value
+	             */
+	            KmerCountValue kmerCountValue = getRecordReader().getCurrentValue();
+	            vertexValue.setValue(kmerCountValue.getAdjBitMap()); 
+	            vertexValue.setState(State.NON_VERTEX);
+	            vertex.setVertexValue(vertexValue);
+            }
+            
+            return vertex;
+        }
+    }
+	
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/LogAlgorithmForMergeGraphOutputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/LogAlgorithmForMergeGraphOutputFormat.java
new file mode 100644
index 0000000..aa81066
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/LogAlgorithmForMergeGraphOutputFormat.java
@@ -0,0 +1,44 @@
+package edu.uci.ics.genomix.pregelix.format;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import edu.uci.ics.genomix.pregelix.api.io.binary.BinaryVertexOutputFormat;
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.io.VertexWriter;
+import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
+import edu.uci.ics.genomix.pregelix.type.State;
+
+public class LogAlgorithmForMergeGraphOutputFormat extends 
+	BinaryVertexOutputFormat<BytesWritable, ValueStateWritable, NullWritable> {
+
+		
+        @Override
+        public VertexWriter<BytesWritable, ValueStateWritable, NullWritable> createVertexWriter(TaskAttemptContext context)
+                throws IOException, InterruptedException {
+            RecordWriter<BytesWritable, ValueStateWritable> recordWriter = binaryOutputFormat.getRecordWriter(context);
+            return new BinaryLoadGraphVertexWriter(recordWriter);
+        }
+        
+        /**
+         * Simple VertexWriter that supports {@link BinaryLoadGraphVertex}
+         */
+        public static class BinaryLoadGraphVertexWriter extends
+                BinaryVertexWriter<BytesWritable, ValueStateWritable, NullWritable> {
+        	
+            public BinaryLoadGraphVertexWriter(RecordWriter<BytesWritable, ValueStateWritable> lineRecordWriter) {
+                super(lineRecordWriter);
+            }
+
+            @Override
+            public void writeVertex(Vertex<BytesWritable, ValueStateWritable, NullWritable, ?> vertex) throws IOException,
+                    InterruptedException {
+            	if(vertex.getVertexValue().getState() != State.FINAL_DELETE)
+                    getRecordWriter().write(vertex.getVertexId(),vertex.getVertexValue());
+            }
+        }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/graph/Graph.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/graph/Graph.java
new file mode 100644
index 0000000..df09a64
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/graph/Graph.java
@@ -0,0 +1,55 @@
+package edu.uci.ics.genomix.pregelix.graph;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+
+public class Graph {
+	
+	/**
+    * Construct a DOT graph in memory, convert it
+    * to image and store the image in the file system.
+	 * @throws Exception 
+    */
+   private void start(String fileName) throws Exception
+   {
+		File filePathTo = new File("graph/" + fileName);
+		BufferedReader br = new BufferedReader(new FileReader(filePathTo));
+		String line = "";
+		String[] split;
+		
+		String precursor = "";
+		String[] adjMap;
+		char[] succeeds;
+		String succeed = "";
+		String output;
+		
+		GraphViz gv = new GraphViz();
+		gv.addln(gv.start_graph());
+		while((line = br.readLine()) != null){
+			split = line.split("\t");
+			precursor = split[0];
+			adjMap = split[1].split("\\|"); 
+			if(adjMap.length > 1){
+				succeeds = adjMap[1].toCharArray();
+				for(int i = 0; i < succeeds.length; i++){
+					succeed = precursor.substring(1) + succeeds[i]; 
+					output = precursor + " -> " + succeed;
+					gv.addln(output);
+				}
+			}
+		}
+		gv.addln(gv.end_graph());
+		System.out.println(gv.getDotSource());
+
+		String type = "ps";
+		File out = new File("graph/" + fileName + "_out." + type); // Linux
+		gv.writeGraphToFile(gv.getGraph(gv.getDotSource(), type), out);
+   }
+	   
+	public static void main(String[] args) throws Exception
+	{
+		Graph g = new Graph();
+		g.start("Path");
+	}
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/graph/GraphViz.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/graph/GraphViz.java
new file mode 100644
index 0000000..c2178bc
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/graph/GraphViz.java
@@ -0,0 +1,288 @@
+package edu.uci.ics.genomix.pregelix.graph;

+

+// GraphViz.java - a simple API to call dot from Java programs

+

+/*$Id$*/

+/*

+ ******************************************************************************

+ *                                                                            *

+ *              (c) Copyright 2003 Laszlo Szathmary                           *

+ *                                                                            *

+ * This program is free software; you can redistribute it and/or modify it    *

+ * under the terms of the GNU Lesser General Public License as published by   *

+ * the Free Software Foundation; either version 2.1 of the License, or        *

+ * (at your option) any later version.                                        *

+ *                                                                            *

+ * This program is distributed in the hope that it will be useful, but        *

+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *

+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public    *

+ * License for more details.                                                  *

+ *                                                                            *

+ * You should have received a copy of the GNU Lesser General Public License   *

+ * along with this program; if not, write to the Free Software Foundation,    *

+ * Inc., 675 Mass Ave, Cambridge, MA 02139, USA.                              *

+ *                                                                            *

+ ******************************************************************************

+ */

+

+import java.io.BufferedReader;

+import java.io.DataInputStream;

+import java.io.File;

+import java.io.FileInputStream;

+import java.io.FileOutputStream;

+import java.io.FileWriter;

+import java.io.InputStreamReader;

+

+/**

+ * <dl>

+ * <dt>Purpose: GraphViz Java API

+ * <dd>

+ *

+ * <dt>Description:

+ * <dd> With this Java class you can simply call dot

+ *      from your Java programs

+ * <dt>Example usage:

+ * <dd>

+ * <pre>

+ *    GraphViz gv = new GraphViz();

+ *    gv.addln(gv.start_graph());

+ *    gv.addln("A -> B;");

+ *    gv.addln("A -> C;");

+ *    gv.addln(gv.end_graph());

+ *    System.out.println(gv.getDotSource());

+ *

+ *    String type = "gif";

+ *    File out = new File("out." + type);   // out.gif in this example

+ *    gv.writeGraphToFile( gv.getGraph( gv.getDotSource(), type ), out );

+ * </pre>

+ * </dd>

+ *

+ * </dl>

+ *

+ * @version v0.4, 2011/02/05 (February) -- Patch of Keheliya Gallaba is added. Now you

+ * can specify the type of the output file: gif, dot, fig, pdf, ps, svg, png, etc.

+ * @version v0.3, 2010/11/29 (November) -- Windows support + ability 

+ * to read the graph from a text file

+ * @version v0.2, 2010/07/22 (July) -- bug fix

+ * @version v0.1, 2003/12/04 (December) -- first release

+ * @author  Laszlo Szathmary (<a href="jabba.laci@gmail.com">jabba.laci@gmail.com</a>)

+ */

+public class GraphViz

+{

+   /**

+    * The dir. where temporary files will be created.

+    */

+   private static String TEMP_DIR = "/tmp";	// Linux

+ //  private static String TEMP_DIR = "c:/temp";	// Windows

+

+   /**

+    * Where is your dot program located? It will be called externally.

+    */

+   private static String DOT = "/usr/bin/dot";	// Linux

+//   private static String DOT = "c:/Program Files/Graphviz2.26.3/bin/dot.exe";	// Windows

+

+   /**

+    * The source of the graph written in dot language.

+    */

+	private StringBuilder graph = new StringBuilder();

+

+   /**

+    * Constructor: creates a new GraphViz object that will contain

+    * a graph.

+    */

+   public GraphViz() {

+   }

+

+   /**

+    * Returns the graph's source description in dot language.

+    * @return Source of the graph in dot language.

+    */

+   public String getDotSource() {

+      return graph.toString();

+   }

+

+   /**

+    * Adds a string to the graph's source (without newline).

+    */

+   public void add(String line) {

+      graph.append(line);

+   }

+

+   /**

+    * Adds a string to the graph's source (with newline).

+    */

+   public void addln(String line) {

+      graph.append(line + "\n");

+   }

+

+   /**

+    * Adds a newline to the graph's source.

+    */

+   public void addln() {

+      graph.append('\n');

+   }

+

+   /**

+    * Returns the graph as an image in binary format.

+    * @param dot_source Source of the graph to be drawn.

+    * @param type Type of the output image to be produced, e.g.: gif, dot, fig, pdf, ps, svg, png.

+    * @return A byte array containing the image of the graph.

+    */

+   public byte[] getGraph(String dot_source, String type)

+   {

+      File dot;

+      byte[] img_stream = null;

+   

+      try {

+         dot = writeDotSourceToFile(dot_source);

+         if (dot != null)

+         {

+            img_stream = get_img_stream(dot, type);

+            if (dot.delete() == false) 

+               System.err.println("Warning: " + dot.getAbsolutePath() + " could not be deleted!");

+            return img_stream;

+         }

+         return null;

+      } catch (java.io.IOException ioe) { return null; }

+   }

+

+   /**

+    * Writes the graph's image in a file.

+    * @param img   A byte array containing the image of the graph.

+    * @param file  Name of the file to where we want to write.

+    * @return Success: 1, Failure: -1

+    */

+   public int writeGraphToFile(byte[] img, String file)

+   {

+      File to = new File(file);

+      return writeGraphToFile(img, to);

+   }

+

+   /**

+    * Writes the graph's image in a file.

+    * @param img   A byte array containing the image of the graph.

+    * @param to    A File object to where we want to write.

+    * @return Success: 1, Failure: -1

+    */

+   public int writeGraphToFile(byte[] img, File to)

+   {

+      try {

+         FileOutputStream fos = new FileOutputStream(to);

+         fos.write(img);

+         fos.close();

+      } catch (java.io.IOException ioe) { return -1; }

+      return 1;

+   }

+

+   /**

+    * It will call the external dot program, and return the image in

+    * binary format.

+    * @param dot Source of the graph (in dot language).

+    * @param type Type of the output image to be produced, e.g.: gif, dot, fig, pdf, ps, svg, png.

+    * @return The image of the graph in .gif format.

+    */

+   private byte[] get_img_stream(File dot, String type)

+   {

+      File img;

+      byte[] img_stream = null;

+

+      try {

+         img = File.createTempFile("graph_", "."+type, new File(GraphViz.TEMP_DIR));

+         Runtime rt = Runtime.getRuntime();

+         

+         // patch by Mike Chenault

+         String[] args = {DOT, "-T"+type, dot.getAbsolutePath(), "-o", img.getAbsolutePath()};

+         Process p = rt.exec(args);

+         

+         p.waitFor();

+

+         FileInputStream in = new FileInputStream(img.getAbsolutePath());

+         img_stream = new byte[in.available()];

+         in.read(img_stream);

+         // Close it if we need to

+         if( in != null ) in.close();

+

+         if (img.delete() == false) 

+            System.err.println("Warning: " + img.getAbsolutePath() + " could not be deleted!");

+      }

+      catch (java.io.IOException ioe) {

+         System.err.println("Error:    in I/O processing of tempfile in dir " + GraphViz.TEMP_DIR+"\n");

+         System.err.println("       or in calling external command");

+         ioe.printStackTrace();

+      }

+      catch (java.lang.InterruptedException ie) {

+         System.err.println("Error: the execution of the external program was interrupted");

+         ie.printStackTrace();

+      }

+

+      return img_stream;

+   }

+

+   /**

+    * Writes the source of the graph in a file, and returns the written file

+    * as a File object.

+    * @param str Source of the graph (in dot language).

+    * @return The file (as a File object) that contains the source of the graph.

+    */

+   private File writeDotSourceToFile(String str) throws java.io.IOException

+   {

+      File temp;

+      try {

+         temp = File.createTempFile("graph_", ".dot.tmp", new File(GraphViz.TEMP_DIR));

+         FileWriter fout = new FileWriter(temp);

+         fout.write(str);

+         fout.close();

+      }

+      catch (Exception e) {

+         System.err.println("Error: I/O error while writing the dot source to temp file!");

+         return null;

+      }

+      return temp;

+   }

+

+   /**

+    * Returns a string that is used to start a graph.

+    * @return A string to open a graph.

+    */

+   public String start_graph() {

+      return "digraph G {";

+   }

+

+   /**

+    * Returns a string that is used to end a graph.

+    * @return A string to close a graph.

+    */

+   public String end_graph() {

+      return "}";

+   }

+

+   /**

+    * Read a DOT graph from a text file.

+    * 

+    * @param input Input text file containing the DOT graph

+    * source.

+    */

+   public void readSource(String input)

+   {

+	   StringBuilder sb = new StringBuilder();

+	   

+	   try

+	   {

+		   FileInputStream fis = new FileInputStream(input);

+		   DataInputStream dis = new DataInputStream(fis);

+		   BufferedReader br = new BufferedReader(new InputStreamReader(dis));

+		   String line;

+		   while ((line = br.readLine()) != null) {

+			   sb.append(line);

+		   }

+		   dis.close();

+	   } 

+	   catch (Exception e) {

+		   System.err.println("Error: " + e.getMessage());

+	   }

+	   

+	   this.graph = sb;

+   }

+   

+} // end of class GraphViz

+

diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/LogAlgorithmMessageWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/LogAlgorithmMessageWritable.java
new file mode 100644
index 0000000..42f1269
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/LogAlgorithmMessageWritable.java
@@ -0,0 +1,176 @@
+package edu.uci.ics.genomix.pregelix.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+import edu.uci.ics.genomix.pregelix.LogAlgorithmForMergeGraphVertex;
+
+public class LogAlgorithmMessageWritable implements WritableComparable<LogAlgorithmMessageWritable>{
+	/**
+	 * sourceVertexId stores source vertexId when headVertex sends the message
+	 * 				  stores neighber vertexValue when pathVertex sends the message
+	 * chainVertexId stores the chains of connected DNA
+	 * file stores the point to the file that stores the chains of connected DNA
+	 */
+	private byte[] sourceVertexId;
+	private byte neighberInfo;
+	private int lengthOfChain;
+	private byte[] chainVertexId;
+	private File file;
+	private int message;
+	private int sourceVertexState;
+	
+	public LogAlgorithmMessageWritable(){
+		sourceVertexId = new byte[(LogAlgorithmForMergeGraphVertex.kmerSize-1)/4 + 1];
+	}
+	
+	public void set(byte[] sourceVertexId,byte neighberInfo, byte[] chainVertexId, File file){
+		this.sourceVertexId = sourceVertexId;
+		this.chainVertexId = chainVertexId;
+		this.file = file;
+		this.message = 0;
+		this.lengthOfChain = 0;
+	}
+	
+	public void reset(){
+		sourceVertexId = new byte[(LogAlgorithmForMergeGraphVertex.kmerSize-1)/4 + 1];
+		neighberInfo = (Byte) null;
+		lengthOfChain = 0;
+		chainVertexId = null;
+		message = 0;
+		sourceVertexState = 0;
+	}
+
+	public byte[] getSourceVertexId() {
+		return sourceVertexId;
+	}
+
+	public void setSourceVertexId(byte[] sourceVertexId) {
+		this.sourceVertexId = sourceVertexId;
+	}
+
+	public byte getNeighberInfo() {
+		return neighberInfo;
+	}
+
+	public void setNeighberInfo(byte neighberInfo) {
+		this.neighberInfo = neighberInfo;
+	}
+
+	public byte[] getChainVertexId() {
+		return chainVertexId;
+	}
+
+	public void setChainVertexId(byte[] chainVertexId) {
+		this.chainVertexId = chainVertexId;
+	}
+
+	public File getFile() {
+		return file;
+	}
+
+	public void setFile(File file) {
+		this.file = file;
+	}
+
+	public int getMessage() {
+		return message;
+	}
+
+	public void setMessage(int message) {
+		this.message = message;
+	}
+
+	public int getSourceVertexState() {
+		return sourceVertexState;
+	}
+
+	public void setSourceVertexState(int sourceVertexState) {
+		this.sourceVertexState = sourceVertexState;
+	}
+
+	public int getLengthOfChain() {
+		return lengthOfChain;
+	}
+
+	public void setLengthOfChain(int lengthOfChain) {
+		this.lengthOfChain = lengthOfChain;
+	}
+
+	public void incrementLength(){
+		this.lengthOfChain++;
+	}
+	
+	@Override
+	public void write(DataOutput out) throws IOException {
+		// TODO Auto-generated method stub
+		out.writeInt(lengthOfChain);
+		if(lengthOfChain != 0)
+			out.write(chainVertexId);
+
+		out.writeInt(message);
+		out.writeInt(sourceVertexState);
+		
+		out.write(sourceVertexId); 
+		out.write(neighberInfo);
+	}
+
+	@Override
+	public void readFields(DataInput in) throws IOException {
+		// TODO Auto-generated method stub
+		lengthOfChain = in.readInt();
+		if(lengthOfChain != 0){
+			chainVertexId = new byte[(lengthOfChain-1)/4 + 1];
+			in.readFully(chainVertexId);
+		}
+		else
+			chainVertexId = new byte[0];
+
+		message = in.readInt();
+		sourceVertexState = in.readInt();
+		
+		sourceVertexId = new byte[(LogAlgorithmForMergeGraphVertex.kmerSize-1)/4 + 1];
+		in.readFully(sourceVertexId);
+		neighberInfo = in.readByte();
+	}
+
+    @Override
+    public int hashCode() {
+    	int hashCode = 0;
+    	for(int i = 0; i < chainVertexId.length; i++)
+    		hashCode = (int)chainVertexId[i];
+        return hashCode;
+    }
+    @Override
+    public boolean equals(Object o) {
+        if (o instanceof LogAlgorithmMessageWritable) {
+        	LogAlgorithmMessageWritable tp = (LogAlgorithmMessageWritable) o;
+            return chainVertexId == tp.chainVertexId && file == tp.file;
+        }
+        return false;
+    }
+    @Override
+    public String toString() {
+        return chainVertexId.toString() + "\t" + file.getAbsolutePath();
+    }
+    @Override
+	public int compareTo(LogAlgorithmMessageWritable tp) {
+		// TODO Auto-generated method stub
+        int cmp;
+        if (chainVertexId == tp.chainVertexId)
+            cmp = 0;
+        else
+            cmp = 1;
+        if (cmp != 0)
+            return cmp;
+        if (file == tp.file)
+            return 0;
+        else
+            return 1;
+	}
+
+
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/MessageWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/MessageWritable.java
new file mode 100644
index 0000000..2976c87
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/MessageWritable.java
@@ -0,0 +1,167 @@
+package edu.uci.ics.genomix.pregelix.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+import edu.uci.ics.genomix.pregelix.MergeGraphVertex;
+
+public class MessageWritable implements WritableComparable<MessageWritable>{
+	/**
+	 * sourceVertexId stores source vertexId when headVertex sends the message
+	 * 				  stores neighber vertexValue when pathVertex sends the message
+	 * chainVertexId stores the chains of connected DNA
+	 * file stores the point to the file that stores the chains of connected DNA
+	 */
+	private byte[] sourceVertexId;
+	private byte neighberInfo;
+	private byte[] chainVertexId;
+	private File file;
+	private boolean isRear;
+	private int lengthOfChain;
+	private byte[] head;
+	
+	public MessageWritable(){		
+	}
+	
+	public void set(byte[] sourceVertexId, byte neighberInfo, byte[] chainVertexId, File file, byte[] head){
+		this.sourceVertexId = sourceVertexId;
+		this.neighberInfo = neighberInfo;
+		this.chainVertexId = chainVertexId;
+		this.file = file;
+		this.isRear = false;
+		this.lengthOfChain = 0;
+		this.head = head;
+	}
+
+	public byte[] getSourceVertexId() {
+		return sourceVertexId;
+	}
+
+	public void setSourceVertexId(byte[] sourceVertexId) {
+		this.sourceVertexId = sourceVertexId;
+	}
+
+	public byte getNeighberInfo() {
+		return neighberInfo;
+	}
+
+	public void setNeighberInfo(byte neighberInfo) {
+		this.neighberInfo = neighberInfo;
+	}
+
+	public byte[] getChainVertexId() {
+		return chainVertexId;
+	}
+
+	public void setChainVertexId(byte[] chainVertexId) {
+		this.chainVertexId = chainVertexId;
+	}
+
+	public File getFile() {
+		return file;
+	}
+
+	public void setFile(File file) {
+		this.file = file;
+	}
+
+	public boolean isRear() {
+		return isRear;
+	}
+
+	public void setRear(boolean isRear) {
+		this.isRear = isRear;
+	}
+
+	public int getLengthOfChain() {
+		return lengthOfChain;
+	}
+
+	public void setLengthOfChain(int lengthOfChain) {
+		this.lengthOfChain = lengthOfChain;
+	}
+	
+
+	public byte[] getHead() {
+		return head;
+	}
+
+	public void setHead(byte[] head) {
+		this.head = head;
+	}
+
+	public void incrementLength(){
+		this.lengthOfChain++;
+	}
+	
+	@Override
+	public void write(DataOutput out) throws IOException {
+		// TODO Auto-generated method stub
+		out.writeInt(lengthOfChain);
+		if(lengthOfChain != 0)
+			out.write(chainVertexId);
+		out.write(sourceVertexId);
+		out.write(head);
+		out.write(neighberInfo);
+		out.writeBoolean(isRear);
+	}
+
+	@Override
+	public void readFields(DataInput in) throws IOException {
+		// TODO Auto-generated method stub
+		lengthOfChain = in.readInt();
+		if(lengthOfChain != 0){
+			chainVertexId = new byte[(lengthOfChain-1)/4 + 1];
+			in.readFully(chainVertexId);
+		}
+		else
+			chainVertexId = new byte[0];
+		sourceVertexId = new byte[(MergeGraphVertex.kmerSize-1)/4 + 1];
+		in.readFully(sourceVertexId);
+		head = new byte[(MergeGraphVertex.kmerSize-1)/4 + 1];
+		in.readFully(head);
+		neighberInfo = in.readByte();
+		isRear = in.readBoolean();
+
+	}
+
+    @Override
+    public int hashCode() {
+    	int hashCode = 0;
+    	for(int i = 0; i < chainVertexId.length; i++)
+    		hashCode = (int)chainVertexId[i];
+        return hashCode;
+    }
+    @Override
+    public boolean equals(Object o) {
+        if (o instanceof MessageWritable) {
+        	MessageWritable tp = (MessageWritable) o;
+            return chainVertexId == tp.chainVertexId && file == tp.file;
+        }
+        return false;
+    }
+    @Override
+    public String toString() {
+        return chainVertexId.toString() + "\t" + file.getAbsolutePath();
+    }
+    
+	@Override
+	public int compareTo(MessageWritable tp) {
+		// TODO Auto-generated method stub
+        int cmp;
+        if (chainVertexId == tp.chainVertexId)
+            cmp = 0;
+        else
+            cmp = 1;
+        if (cmp != 0)
+            return cmp;
+        if (file == tp.file)
+            return 0;
+        else
+            return 1;
+	}
+
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/ValueStateWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/ValueStateWritable.java
new file mode 100644
index 0000000..9acda4e
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/ValueStateWritable.java
@@ -0,0 +1,89 @@
+package edu.uci.ics.genomix.pregelix.io;
+
+import java.io.*;
+
+import org.apache.hadoop.io.WritableComparable;
+
+import edu.uci.ics.genomix.pregelix.type.State;
+
+
+public class ValueStateWritable implements WritableComparable<ValueStateWritable> {
+
+	private byte value;
+	private int state;
+	private int lengthOfMergeChain;
+	private byte[] mergeChain;
+
+	public ValueStateWritable() {
+		state = State.NON_VERTEX;
+		lengthOfMergeChain = 0;
+	}
+
+	public ValueStateWritable(byte value, int state, int lengthOfMergeChain, byte[] mergeChain) {
+		this.value = value;
+		this.state = state;
+		this.lengthOfMergeChain = lengthOfMergeChain;
+		this.mergeChain = mergeChain;
+	}
+
+	public byte getValue() {
+		return value;
+	}
+
+	public void setValue(byte value) {
+		this.value = value;
+	}
+
+	public int getState() {
+		return state;
+	}
+
+	public void setState(int state) {
+		this.state = state;
+	}
+
+	public int getLengthOfMergeChain() {
+		return lengthOfMergeChain;
+	}
+
+	public void setLengthOfMergeChain(int lengthOfMergeChain) {
+		this.lengthOfMergeChain = lengthOfMergeChain;
+	}
+
+	public byte[] getMergeChain() {
+		return mergeChain;
+	}
+
+	public void setMergeChain(byte[] mergeChain) {
+		this.mergeChain = mergeChain;
+	}
+
+	@Override
+	public void readFields(DataInput in) throws IOException {
+		value = in.readByte();
+		state = in.readInt();
+		lengthOfMergeChain = in.readInt();
+		if(lengthOfMergeChain != 0){
+			mergeChain = new byte[(lengthOfMergeChain-1)/4 + 1];
+			in.readFully(mergeChain);
+		}
+		else
+			mergeChain = new byte[0];
+	}
+
+	@Override
+	public void write(DataOutput out) throws IOException {
+		out.writeByte(value);
+		out.writeInt(state);
+		out.writeInt(lengthOfMergeChain);
+		if(lengthOfMergeChain != 0)
+			out.write(mergeChain);
+	}
+
+	@Override
+	public int compareTo(ValueStateWritable o) {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+	
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/ValueWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/ValueWritable.java
new file mode 100644
index 0000000..a3f0b9f
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/ValueWritable.java
@@ -0,0 +1,73 @@
+package edu.uci.ics.genomix.pregelix.io;
+
+import java.io.*;
+
+import org.apache.hadoop.io.WritableComparable;
+
+public class ValueWritable implements WritableComparable<ValueWritable> {
+
+	private byte value;
+	private int lengthOfMergeChain;
+	private byte[] mergeChain;
+
+	public ValueWritable() {
+		lengthOfMergeChain = 0;
+	}
+
+	public ValueWritable(byte value, int lengthOfMergeChain, byte[] mergeChain) {
+		this.value = value;
+		this.lengthOfMergeChain = lengthOfMergeChain;
+		this.mergeChain = mergeChain;
+	}
+
+	public byte getValue() {
+		return value;
+	}
+
+	public void setValue(byte value) {
+		this.value = value;
+	}
+
+	public int getLengthOfMergeChain() {
+		return lengthOfMergeChain;
+	}
+
+	public void setLengthOfMergeChain(int lengthOfMergeChain) {
+		this.lengthOfMergeChain = lengthOfMergeChain;
+	}
+
+	public byte[] getMergeChain() {
+		return mergeChain;
+	}
+
+	public void setMergeChain(byte[] mergeChain) {
+		this.mergeChain = mergeChain;
+	}
+
+	@Override
+	public void readFields(DataInput in) throws IOException {
+		value = in.readByte();
+		lengthOfMergeChain = in.readInt();
+		if(lengthOfMergeChain != 0){
+			mergeChain = new byte[(lengthOfMergeChain-1)/4 + 1];
+			in.readFully(mergeChain);
+		}
+		else
+			mergeChain = new byte[0];
+	}
+
+	@Override
+	public void write(DataOutput out) throws IOException {
+		out.writeByte(value);
+		out.writeInt(lengthOfMergeChain);
+		if(lengthOfMergeChain != 0)
+			out.write(mergeChain);
+	}
+
+	@Override
+	public int compareTo(ValueWritable o) {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+	
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/DataLoadLogFormatter.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/DataLoadLogFormatter.java
new file mode 100644
index 0000000..f5ddfc6
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/DataLoadLogFormatter.java
@@ -0,0 +1,42 @@
+package edu.uci.ics.genomix.pregelix.log;
+
+import java.util.logging.Formatter;
+import java.util.logging.Handler;
+import java.util.logging.LogRecord;
+
+import org.apache.hadoop.io.BytesWritable;
+
+import edu.uci.ics.genomix.type.Kmer;
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+public class DataLoadLogFormatter extends Formatter{
+    private BytesWritable key;
+    private KmerCountValue value;
+    private int k;
+
+    public void set(BytesWritable key, 
+    		KmerCountValue value, int k){
+    	this.key = key;
+    	this.value = value;
+    	this.k = k;
+    }
+    public String format(LogRecord record) {
+        StringBuilder builder = new StringBuilder(1000);
+        
+        builder.append(Kmer.recoverKmerFrom(k, key.getBytes(), 0,
+							key.getLength())
+							+ "\t" + value.toString() + "\r\n");
+
+        if(!formatMessage(record).equals(""))
+        	builder.append(formatMessage(record) + "\r\n");
+        return builder.toString();
+    }
+
+    public String getHead(Handler h) {
+        return super.getHead(h);
+    }
+
+    public String getTail(Handler h) {
+        return super.getTail(h);
+    }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/LogAlgorithmLogFormatter.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/LogAlgorithmLogFormatter.java
new file mode 100644
index 0000000..e0d241a
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/LogAlgorithmLogFormatter.java
@@ -0,0 +1,116 @@
+package edu.uci.ics.genomix.pregelix.log;
+
+import java.util.logging.*;
+
+import edu.uci.ics.genomix.pregelix.io.LogAlgorithmMessageWritable;
+import edu.uci.ics.genomix.pregelix.type.Message;
+import edu.uci.ics.genomix.pregelix.type.State;
+import edu.uci.ics.genomix.type.Kmer;
+
+public class LogAlgorithmLogFormatter extends Formatter {
+	//
+    // Create a DateFormat to format the logger timestamp.
+    //
+    //private static final DateFormat df = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss.SSS");
+    private long step;
+    private byte[] sourceVertexId;
+    private byte[] destVertexId;
+    private LogAlgorithmMessageWritable msg;
+    private int state;
+    private int k;
+    private byte[] mergeChain;
+    private int lengthOfMergeChain;
+    //private boolean testDelete = false;
+    /** 0: general operation 
+     *  1: testDelete 
+     *  2: testMergeChain
+     *  3: testVoteToHalt
+     */ 
+    private int operation; 
+
+    public void set(long step, byte[] sourceVertexId, 
+    		byte[] destVertexId, LogAlgorithmMessageWritable msg, int state, int k){
+    	this.step = step;
+    	this.sourceVertexId = sourceVertexId;
+    	this.destVertexId = destVertexId;
+    	this.msg = msg;
+    	this.state = state;
+    	this.k = k;
+    	this.operation = 0;
+    }
+    public void setMergeChain(long step, byte[] sourceVertexId, 
+    		int lengthOfMergeChain, byte[] mergeChain, int k){
+    	this.reset();
+    	this.step = step;
+    	this.sourceVertexId = sourceVertexId;
+    	this.lengthOfMergeChain = lengthOfMergeChain;
+    	this.mergeChain = mergeChain;
+    	this.k = k;
+    	this.operation = 2;
+    }
+    public void setVotoToHalt(long step, byte[] sourceVertexId, int k){
+    	this.reset();
+    	this.step = step;
+    	this.sourceVertexId = sourceVertexId;
+    	this.k = k;
+    	this.operation = 3;
+    }
+    public void reset(){
+    	this.sourceVertexId = null;
+    	this.destVertexId = null;
+    	this.msg = null;
+    	this.state = 0;
+    	this.k = 0;
+    	this.mergeChain = null;
+    	this.lengthOfMergeChain = 0;
+    }
+    public String format(LogRecord record) {
+        StringBuilder builder = new StringBuilder(1000);
+        String source = Kmer.recoverKmerFrom(k, sourceVertexId, 0, sourceVertexId.length);
+        String chain = "";
+        
+        builder.append("Step: " + step + "\r\n");
+        builder.append("Source Code: " + source + "\r\n");
+        if(operation == 0){
+	        if(destVertexId != null){
+	        	String dest = Kmer.recoverKmerFrom(k, destVertexId, 0, destVertexId.length);
+		        builder.append("Send message to " + "\r\n");
+		        builder.append("Destination Code: " + dest + "\r\n");
+	        }
+	        builder.append("Message is: " + Message.MESSAGE_CONTENT.getContentFromCode(msg.getMessage()) + "\r\n");
+	        	
+	        if(msg.getLengthOfChain() != 0){
+	        	chain = Kmer.recoverKmerFrom(msg.getLengthOfChain(), msg.getChainVertexId(), 0, msg.getChainVertexId().length);
+	        	builder.append("Chain Message: " + chain + "\r\n");
+	        	builder.append("Chain Length: " + msg.getLengthOfChain() + "\r\n");
+	        }
+	        
+	        builder.append("State is: " + State.STATE_CONTENT.getContentFromCode(state) + "\r\n");
+        }
+        if(operation == 2){
+        	chain = Kmer.recoverKmerFrom(lengthOfMergeChain, mergeChain, 0, mergeChain.length);
+        	builder.append("Merge Chain: " + chain + "\r\n");
+        	builder.append("Merge Chain Length: " + lengthOfMergeChain + "\r\n");
+        }
+        if(operation == 3)
+        	builder.append("Vote to halt!");
+        if(!formatMessage(record).equals(""))
+        	builder.append(formatMessage(record) + "\r\n");
+        builder.append("\n");
+        return builder.toString();
+    }
+
+    public String getHead(Handler h) {
+        return super.getHead(h);
+    }
+
+    public String getTail(Handler h) {
+        return super.getTail(h);
+    }
+	public int getOperation() {
+		return operation;
+	}
+	public void setOperation(int operation) {
+		this.operation = operation;
+	}
+}
\ No newline at end of file
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/NaiveAlgorithmLogFormatter.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/NaiveAlgorithmLogFormatter.java
new file mode 100644
index 0000000..ef0d96f
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/NaiveAlgorithmLogFormatter.java
@@ -0,0 +1,59 @@
+package edu.uci.ics.genomix.pregelix.log;
+
+import java.util.logging.*;
+
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.type.Kmer;
+
+public class NaiveAlgorithmLogFormatter extends Formatter {
+	//
+    // Create a DateFormat to format the logger timestamp.
+    //
+    //private static final DateFormat df = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss.SSS");
+    private long step;
+    private byte[] sourceVertexId;
+    private byte[] destVertexId;
+    private MessageWritable msg;
+    private int k;
+
+    public void set(long step, byte[] sourceVertexId, 
+    		byte[] destVertexId, MessageWritable msg, int k){
+    	this.step = step;
+    	this.sourceVertexId = sourceVertexId;
+    	this.destVertexId = destVertexId;
+    	this.msg = msg;
+    	this.k = k;
+    }
+    public String format(LogRecord record) {
+        StringBuilder builder = new StringBuilder(1000);
+        String source = Kmer.recoverKmerFrom(k, sourceVertexId, 0, sourceVertexId.length);
+        
+        String chain = "";
+        
+        builder.append("Step: " + step + "\r\n");
+        builder.append("Source Code: " + source + "\r\n");
+        
+        if(destVertexId != null){
+        	builder.append("Send message to " + "\r\n");
+        	String dest = Kmer.recoverKmerFrom(k, destVertexId, 0, destVertexId.length);
+        	builder.append("Destination Code: " + dest + "\r\n");
+        }
+        if(msg.getLengthOfChain() != 0){
+        	chain = Kmer.recoverKmerFrom(msg.getLengthOfChain(), msg.getChainVertexId(), 0, msg.getChainVertexId().length);
+        	builder.append("Chain Message: " + chain + "\r\n");
+        	builder.append("Chain Length: " + msg.getLengthOfChain() + "\r\n");
+        }
+        if(!formatMessage(record).equals(""))
+        	builder.append(formatMessage(record) + "\r\n");
+        builder.append("\n");
+        return builder.toString();
+    }
+
+    public String getHead(Handler h) {
+        return super.getHead(h);
+    }
+
+    public String getTail(Handler h) {
+        return super.getTail(h);
+    }
+}
\ No newline at end of file
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/CombineSequenceFile.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/CombineSequenceFile.java
new file mode 100644
index 0000000..8924302
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/CombineSequenceFile.java
@@ -0,0 +1,63 @@
+package edu.uci.ics.genomix.pregelix.sequencefile;
+
+import java.io.File;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
+
+import edu.uci.ics.genomix.type.Kmer;
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+
+public class CombineSequenceFile {
+
+	/**
+	 * @param args
+	 * @throws Exception 
+	 */
+	public static void main(String[] args) throws Exception {
+		// TODO Auto-generated method stub
+		int kmerSize = 5;
+		Configuration conf = new Configuration();
+		FileSystem fileSys = FileSystem.get(conf);
+		
+		Path p = new Path("data/SinglePath_55");
+		Path p2 = new Path("data/result");
+		Path outFile = new Path(p2, "output"); 
+		SequenceFile.Reader reader;
+	    SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
+	         outFile, BytesWritable.class, KmerCountValue.class, 
+	         CompressionType.NONE);
+	    BytesWritable key = new BytesWritable();
+	    KmerCountValue value = new KmerCountValue();
+	    
+	    File dir = new File("data/SinglePath_55");
+		for(File child : dir.listFiles()){
+			String name = child.getAbsolutePath();
+			Path inFile = new Path(p, name);
+			reader = new SequenceFile.Reader(fileSys, inFile, conf);
+			while (reader.next(key, value)) {
+				System.out.println(Kmer.recoverKmerFrom(kmerSize, key.getBytes(), 0,
+						key.getLength())
+						+ "\t" + value.toString());
+				writer.append(key, value);
+			}
+			reader.close();
+		}
+		writer.close();
+		System.out.println();
+		
+		reader = new SequenceFile.Reader(fileSys, outFile, conf);
+		while (reader.next(key, value)) {
+			System.err.println(Kmer.recoverKmerFrom(kmerSize, key.getBytes(), 0,
+					key.getLength())
+					+ "\t" + value.toString());
+		}
+		reader.close();
+	}
+
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/ConvertToSequenceFile.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/ConvertToSequenceFile.java
new file mode 100644
index 0000000..d64b279
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/ConvertToSequenceFile.java
@@ -0,0 +1,42 @@
+package edu.uci.ics.genomix.pregelix.sequencefile;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+
+public class ConvertToSequenceFile {
+	public static void main(String[] args) throws IOException,
+    InterruptedException, ClassNotFoundException {
+
+		Configuration conf = new Configuration();
+		Job job = new Job(conf);
+		job.setJobName("Convert Text");
+		job.setJarByClass(Mapper.class);
+		
+		job.setMapperClass(Mapper.class);
+		job.setReducerClass(Reducer.class);
+		
+		// increase if you need sorting or a special number of files
+		job.setNumReduceTasks(0);
+		
+		job.setOutputKeyClass(LongWritable.class);
+		job.setOutputValueClass(Text.class);
+		
+		job.setOutputFormatClass(SequenceFileOutputFormat.class);
+		job.setInputFormatClass(TextInputFormat.class);
+		
+		TextInputFormat.addInputPath(job, new Path("data/webmap/part-00000"));
+		SequenceFileOutputFormat.setOutputPath(job, new Path("folder_seq"));
+		
+		// submit and wait for completion
+		job.waitForCompletion(true);
+	}
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/generateSmallFile.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/generateSmallFile.java
new file mode 100644
index 0000000..45329d0
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/generateSmallFile.java
@@ -0,0 +1,48 @@
+package edu.uci.ics.genomix.pregelix.sequencefile;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
+
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+public class generateSmallFile {
+
+	public static void generateNumOfLinesFromBigFile(Path inFile, Path outFile, int numOfLines) throws IOException{
+		Configuration conf = new Configuration();
+		FileSystem fileSys = FileSystem.get(conf);
+
+		SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
+	    SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
+	         outFile, BytesWritable.class, KmerCountValue.class, 
+	         CompressionType.NONE);
+	    BytesWritable outKey = new BytesWritable();
+	    KmerCountValue outValue = new KmerCountValue();
+	    int i = 0;
+	    
+	    for(i = 0; i < numOfLines; i++){
+	    	 //System.out.println(i);
+	    	 reader.next(outKey, outValue);
+	    	 writer.append(outKey, outValue);
+	    }
+	    writer.close();
+	    reader.close();
+	}
+	/**
+	 * @param args
+	 * @throws IOException 
+	 */
+	public static void main(String[] args) throws IOException {
+		// TODO Auto-generated method stub
+		Path dir = new Path("data/webmap");
+		Path inFile = new Path(dir, "part-2");
+		Path outFile = new Path(dir, "part-2-out-20000");
+		generateNumOfLinesFromBigFile(inFile,outFile,20000);
+	}
+
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/testcase/GenerateTestInput.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/testcase/GenerateTestInput.java
new file mode 100644
index 0000000..8ff5589
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/testcase/GenerateTestInput.java
@@ -0,0 +1,77 @@
+package edu.uci.ics.genomix.pregelix.testcase;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+
+public class GenerateTestInput {
+
+	/**
+	 * Simple Path
+	 */
+	public static String simplePath(int k, int length, int numLines){
+		RandomString rs = new RandomString(k, length);
+		String output = "";
+		for(int i = 0; i < numLines; i++)
+			output += rs.nextString(0) + "\r\n";
+		return output;
+	}
+	/**
+	 * Tree Path
+	 */
+	public static String treePath(int k, int x, int y, int z){
+		RandomString rs = new RandomString(k, x + y + k - 1);
+		String s1 = rs.nextString(0);
+		rs.setLength(x + y + z + k - 1);
+		rs.addString(s1.substring(0, x));
+		String s2 = rs.nextString(x);
+		rs.setLength(x + y + z + k - 1);
+		rs.addString(s2.substring(0,x + y));
+		String s3 = rs.nextString(x + y);
+		return s1 + "\r\n" + s2 + "\r\n" + s3;
+	}
+	/**
+	 * Cycle Path  
+	 */
+	public static String cyclePath(int k, int length){
+		RandomString rs = new RandomString(k, length);
+		String s1 = rs.nextString(0);
+		String s2 = s1 + s1.substring(1, k + 1);
+		return s2;
+	}
+	/**
+	 * Bridge Path
+	 */
+	public static String bridgePath(int k, int x){
+		RandomString rs = new RandomString(k, x + k + 2 + k - 1);
+		String s1 = rs.nextString(0);
+		rs.setLength(x + k + 2);
+		rs.addString(s1.substring(0, k + 2));
+		String s2 = rs.nextString(k + 2) + s1.substring(x + k + 2, x + k + 2 + k - 1);
+		return s1 + "\r\n" + s2;
+	}
+
+	public static void main(String[] args) {
+		// TODO Auto-generated method stub
+		OutputStreamWriter writer;
+		try {
+			writer = new OutputStreamWriter(new FileOutputStream("graph/55/SinglePath_55"));
+			writer.write(simplePath(55,60,1));
+			writer.close();
+			writer = new OutputStreamWriter(new FileOutputStream("graph/55/SimplePath_55"));
+			writer.write(simplePath(55,60,3));
+			writer.close();
+			/*writer = new OutputStreamWriter(new FileOutputStream("TreePath"));
+			writer.write(treePath(5, 5, 5, 3));
+			writer.close();
+			writer = new OutputStreamWriter(new FileOutputStream("CyclePath"));
+			writer.write(cyclePath(5,10));
+			writer.close();
+			writer = new OutputStreamWriter(new FileOutputStream("BridgePath"));
+			writer.write(bridgePath(5,2));
+			writer.close();*/
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/testcase/RandomString.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/testcase/RandomString.java
new file mode 100644
index 0000000..337c5d8
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/testcase/RandomString.java
@@ -0,0 +1,63 @@
+package edu.uci.ics.genomix.pregelix.testcase;
+
+import java.util.ArrayList;
+import java.util.Random;
+
+public class RandomString
+{
+
+  private static final char[] symbols = new char[4];
+
+  static {
+	  symbols[0] = 'A';
+	  symbols[1] = 'C';
+	  symbols[2] = 'G';
+	  symbols[3] = 'T';
+  }
+
+  private final Random random = new Random(); 
+
+  private char[] buf;
+  
+  private ArrayList<String> existKmer = new ArrayList<String>();;
+  
+  private int k;
+
+  public RandomString(int k, int length)
+  {
+    if (length < 1)
+      throw new IllegalArgumentException("length < 1: " + length);
+    buf = new char[length];
+    this.k = k;
+  }
+
+  public String nextString(int startIdx)
+  {
+	String tmp = "";
+    for (int idx = startIdx; idx < buf.length;){ 
+    	buf[idx] = symbols[random.nextInt(4)]; 
+    	if(idx >= k - 1){
+    		tmp = new String(buf, idx-k+1, k);
+    		if(!existKmer.contains(tmp)){
+    			existKmer.add(tmp);
+    			idx++;
+    		}
+    	}
+    	else
+    		idx++;
+    }
+    	
+    return new String(buf);
+  }
+  
+  public void setLength(int length){
+	  buf = new char[length];
+  }
+  
+  public void addString(String s){
+	  char[] tmp = s.toCharArray();
+	  for(int i = 0; i < tmp.length; i++)
+		  buf[i] = tmp[i];
+  }
+
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/Message.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/Message.java
new file mode 100644
index 0000000..f556a73
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/Message.java
@@ -0,0 +1,27 @@
+package edu.uci.ics.genomix.pregelix.type;
+
+public class Message {
+	
+	public static final int NON = 0;
+	public static final int START = 1;
+	public static final int END = 2;
+	
+	public final static class MESSAGE_CONTENT{
+		
+		public static String getContentFromCode(int code){
+			String r = "";
+			switch(code){
+			case NON:
+				r = "NON";
+				break;
+			case START:
+				r = "START";
+				break;
+			case END:
+				r = "END";
+				break;
+			}
+			return r;
+		}
+	}
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/State.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/State.java
new file mode 100644
index 0000000..5447d26
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/State.java
@@ -0,0 +1,42 @@
+package edu.uci.ics.genomix.pregelix.type;
+
+public class State {
+	public static final int NON_VERTEX = 0;
+	public static final int START_VERTEX = 1;
+	public static final int END_VERTEX = 2;
+	public static final int MID_VERTEX = 3;
+	public static final int TODELETE = 4;
+	public static final int FINAL_VERTEX = 5;
+	public static final int FINAL_DELETE = 6;
+	
+	public final static class STATE_CONTENT{
+
+		public static String getContentFromCode(int code){
+			String r = "";
+			switch(code){
+			case NON_VERTEX:
+				r = "NON_VERTEX";
+				break;
+			case START_VERTEX:
+				r = "START_VERTEX";
+				break;
+			case END_VERTEX:
+				r = "END_VERTEX";
+				break;
+			case MID_VERTEX:
+				r = "MID_VERTEX";
+				break;
+			case TODELETE:
+				r = "TODELETE";
+				break;
+			case FINAL_VERTEX:
+				r = "FINAL_VERTEX";
+				break;
+			case FINAL_DELETE:
+				r = "FINAL_DELETE";
+				break;
+			}
+			return r;
+		}
+	}
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java
new file mode 100644
index 0000000..53340dc
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java
@@ -0,0 +1,95 @@
+package edu.uci.ics.genomix.pregelix.JobGen;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+
+import edu.uci.ics.genomix.pregelix.LoadGraphVertex;
+import edu.uci.ics.genomix.pregelix.format.BinaryLoadGraphInputFormat;
+import edu.uci.ics.genomix.pregelix.format.BinaryLoadGraphOutputFormat;
+import edu.uci.ics.genomix.pregelix.format.LogAlgorithmForMergeGraphInputFormat;
+import edu.uci.ics.genomix.pregelix.format.LogAlgorithmForMergeGraphOutputFormat;
+import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
+import edu.uci.ics.genomix.pregelix.LogAlgorithmForMergeGraphVertex;
+import edu.uci.ics.genomix.pregelix.MergeGraphVertex;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+
+
+public class JobGenerator {
+
+    private static String outputBase = "src/test/resources/jobs/";
+    private static String HDFS_INPUTPATH = "/webmap";
+    private static String HDFS_OUTPUTPAH = "/result";
+    
+    private static void generateLoadGraphJob(String jobName, String outputPath) throws IOException {
+    	PregelixJob job = new PregelixJob(jobName);
+    	job.setVertexClass(LoadGraphVertex.class);
+    	job.setVertexInputFormatClass(BinaryLoadGraphInputFormat.class);
+        job.setVertexOutputFormatClass(BinaryLoadGraphOutputFormat.class);
+        job.setOutputKeyClass(BytesWritable.class);
+        job.setOutputValueClass(ByteWritable.class);
+        FileInputFormat.setInputPaths(job, HDFS_INPUTPATH);
+        FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH));
+        job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+    }
+    
+    private static void genLoadGraph() throws IOException {
+    	generateLoadGraphJob("LoadGraph", outputBase + "LoadGraph.xml");
+    }
+    
+    private static void generateMergeGraphJob(String jobName, String outputPath) throws IOException {
+    	PregelixJob job = new PregelixJob(jobName);
+    	job.setVertexClass(MergeGraphVertex.class);
+    	job.setVertexInputFormatClass(BinaryLoadGraphInputFormat.class);
+        job.setVertexOutputFormatClass(BinaryLoadGraphOutputFormat.class);
+        job.setDynamicVertexValueSize(true);
+        job.setOutputKeyClass(BytesWritable.class);
+        job.setOutputValueClass(ValueStateWritable.class);
+        FileInputFormat.setInputPaths(job, HDFS_INPUTPATH);
+        FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH));
+        job.getConfiguration().setInt(MergeGraphVertex.KMER_SIZE, 55);
+        job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+    }
+    
+    private static void genMergeGraph() throws IOException {
+    	generateMergeGraphJob("MergeGraph", outputBase + "MergeGraph.xml");
+    }
+    
+    private static void generateLogAlgorithmForMergeGraphJob(String jobName, String outputPath) throws IOException {
+    	PregelixJob job = new PregelixJob(jobName);
+    	job.setVertexClass(LogAlgorithmForMergeGraphVertex.class);
+        job.setVertexInputFormatClass(LogAlgorithmForMergeGraphInputFormat.class); 
+        job.setVertexOutputFormatClass(LogAlgorithmForMergeGraphOutputFormat.class);
+        job.setDynamicVertexValueSize(true);
+        job.setOutputKeyClass(BytesWritable.class);
+        job.setOutputValueClass(ValueStateWritable.class);
+        FileInputFormat.setInputPaths(job, HDFS_INPUTPATH);
+        FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH));
+        job.getConfiguration().setInt(LogAlgorithmForMergeGraphVertex.KMER_SIZE, 55);
+        job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+    }
+    
+    private static void genLogAlgorithmForMergeGraph() throws IOException {
+    	generateLogAlgorithmForMergeGraphJob("LogAlgorithmForMergeGraph", outputBase + "LogAlgorithmForMergeGraph.xml");
+    }
+    
+	/**
+	 * @param args
+	 * @throws IOException 
+	 */
+	public static void main(String[] args) throws IOException {
+		// TODO Auto-generated method stub
+		//genLoadGraph();
+		genMergeGraph();
+		//genLogAlgorithmForMergeGraph();
+		//genSequenceLoadGraph();
+		//genBasicBinaryLoadGraph();
+	}
+
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/RunJobTestCase.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/RunJobTestCase.java
new file mode 100644
index 0000000..a5ddce3
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/RunJobTestCase.java
@@ -0,0 +1,168 @@
+package edu.uci.ics.genomix.pregelix.JobRun;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.pregelix.example.util.TestUtils;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.core.jobgen.JobGen;
+import edu.uci.ics.pregelix.core.jobgen.JobGenInnerJoin;
+import edu.uci.ics.pregelix.core.jobgen.JobGenOuterJoin;
+import edu.uci.ics.pregelix.core.jobgen.JobGenOuterJoinSingleSort;
+import edu.uci.ics.pregelix.core.jobgen.JobGenOuterJoinSort;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+import edu.uci.ics.pregelix.dataflow.util.IterationUtils;
+
+public class RunJobTestCase extends TestCase{
+	
+	private static final String NC1 = "nc1";
+    private static final String HYRACKS_APP_NAME = "pregelix";
+    private static String HDFS_INPUTPATH = "/webmap";
+    private static String HDFS_OUTPUTPAH = "/result";
+    
+    private final PregelixJob job;
+    private JobGen[] giraphJobGens;
+    private final String resultFileName;
+    private final String expectedFileName;
+    private final String jobFile;
+    
+
+
+    public RunJobTestCase(String hadoopConfPath, String jobName, String jobFile, String resultFile, String expectedFile)
+            throws Exception {
+        super("test");
+        this.jobFile = jobFile;
+        this.job = new PregelixJob("test");
+        this.job.getConfiguration().addResource(new Path(jobFile));
+        this.job.getConfiguration().addResource(new Path(hadoopConfPath));
+        Path[] inputPaths = FileInputFormat.getInputPaths(job);
+        if (inputPaths[0].toString().endsWith(HDFS_INPUTPATH)) {
+            FileInputFormat.setInputPaths(job, HDFS_INPUTPATH);
+            FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH));
+        } 
+        
+        job.setJobName(jobName);
+        this.resultFileName = resultFile;
+        this.expectedFileName = expectedFile;
+        giraphJobGens = new JobGen[1];
+        giraphJobGens[0] = new JobGenOuterJoin(job);
+        /*waitawhile();
+        giraphJobGens[1] = new JobGenInnerJoin(job);
+        waitawhile();
+        giraphJobGens[2] = new JobGenOuterJoinSort(job);
+        waitawhile();
+        giraphJobGens[3] = new JobGenOuterJoinSingleSort(job);*/
+    }
+    
+    private void waitawhile() throws InterruptedException {
+        synchronized (this) {
+            this.wait(20);
+        }
+    }
+	@Test
+	public void test() throws Exception {
+		setUp();
+		
+        for (JobGen jobGen : giraphJobGens) {
+            FileSystem dfs = FileSystem.get(job.getConfiguration());
+            dfs.delete(new Path(HDFS_OUTPUTPAH), true);
+            runCreate(jobGen);
+            runDataLoad(jobGen);
+            int i = 1;
+            boolean terminate = false;
+            do {
+                runLoopBodyIteration(jobGen, i);
+                terminate = IterationUtils.readTerminationState(job.getConfiguration(), jobGen.getJobId());
+                i++;
+            } while (!terminate);
+            runIndexScan(jobGen);
+            runHDFSWRite(jobGen);
+            runCleanup(jobGen);
+            compareResults();
+        }
+        tearDown();
+        waitawhile();
+	}
+	
+	private void runCreate(JobGen jobGen) throws Exception {
+        try {
+            JobSpecification treeCreateJobSpec = jobGen.generateCreatingJob();
+            PregelixHyracksIntegrationUtil.runJob(treeCreateJobSpec, HYRACKS_APP_NAME);
+        } catch (Exception e) {
+            throw e;
+        }
+    }
+
+    private void runDataLoad(JobGen jobGen) throws Exception {
+        try {
+            JobSpecification bulkLoadJobSpec = jobGen.generateLoadingJob();
+            PregelixHyracksIntegrationUtil.runJob(bulkLoadJobSpec, HYRACKS_APP_NAME);
+        } catch (Exception e) {
+            throw e;
+        }
+    }
+
+    private void runLoopBodyIteration(JobGen jobGen, int iteration) throws Exception {
+        try {
+            JobSpecification loopBody = jobGen.generateJob(iteration);
+            PregelixHyracksIntegrationUtil.runJob(loopBody, HYRACKS_APP_NAME);
+        } catch (Exception e) {
+            throw e;
+        }
+    }
+
+    private void runIndexScan(JobGen jobGen) throws Exception {
+        try {
+            JobSpecification scanSortPrintJobSpec = jobGen.scanIndexPrintGraph(NC1, resultFileName);
+            PregelixHyracksIntegrationUtil.runJob(scanSortPrintJobSpec, HYRACKS_APP_NAME);
+        } catch (Exception e) {
+            throw e;
+        }
+    }
+
+    private void runHDFSWRite(JobGen jobGen) throws Exception {
+        try {
+            JobSpecification scanSortPrintJobSpec = jobGen.scanIndexWriteGraph();
+            PregelixHyracksIntegrationUtil.runJob(scanSortPrintJobSpec, HYRACKS_APP_NAME);
+        } catch (Exception e) {
+            throw e;
+        }
+    }
+
+    private void runCleanup(JobGen jobGen) throws Exception {
+        try {
+            JobSpecification[] cleanups = jobGen.generateCleanup();
+            runJobArray(cleanups);
+        } catch (Exception e) {
+            throw e;
+        }
+    }
+
+    private void runJobArray(JobSpecification[] jobs) throws Exception {
+        for (JobSpecification job : jobs) {
+            PregelixHyracksIntegrationUtil.runJob(job, HYRACKS_APP_NAME);
+        }
+    }
+
+    private void compareResults() throws Exception {
+        TestUtils.compareWithResult(new File(resultFileName), new File(expectedFileName));
+    }
+
+    public String toString() {
+        return jobFile;
+    }
+
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/RunJobTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/RunJobTestSuite.java
new file mode 100644
index 0000000..0238ca0
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/RunJobTestSuite.java
@@ -0,0 +1,197 @@
+package edu.uci.ics.genomix.pregelix.JobRun;
+
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import junit.framework.TestSuite;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+
+public class RunJobTestSuite extends TestSuite{
+	
+	private static final Logger LOGGER = Logger.getLogger(RunJobTestSuite.class
+			.getName());
+
+	private static final String ACTUAL_RESULT_DIR = "actual";
+	private static final String EXPECTED_RESULT_DIR = "src/test/resources/expected";
+	private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
+	private static final String PATH_TO_CLUSTER_STORE = "src/test/resources/cluster/stores.properties";
+	private static final String PATH_TO_CLUSTER_PROPERTIES = "src/test/resources/cluster/cluster.properties";
+	private static final String PATH_TO_JOBS = "src/test/resources/jobs/";
+	private static final String PATH_TO_IGNORE = "src/test/resources/ignore.txt";
+	private static final String PATH_TO_ONLY = "src/test/resources/only.txt";
+	private static final String FILE_EXTENSION_OF_RESULTS = "result";
+
+	private static final String DATA_PATH = "data/input/part-1-out-500000";//sequenceShortFileMergeTest
+	private static final String HDFS_PATH = "/webmap/";
+	
+	private static final String HYRACKS_APP_NAME = "pregelix";
+	private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR
+			+ File.separator + "conf.xml";
+	private MiniDFSCluster dfsCluster;
+
+	private JobConf conf = new JobConf();
+	private int numberOfNC = 2;
+	
+	public void setUp() throws Exception {
+		ClusterConfig.setStorePath(PATH_TO_CLUSTER_STORE);
+		ClusterConfig.setClusterPropertiesPath(PATH_TO_CLUSTER_PROPERTIES);
+		cleanupStores();
+		PregelixHyracksIntegrationUtil.init("src/test/resources/topology.xml");
+		PregelixHyracksIntegrationUtil.createApp(HYRACKS_APP_NAME);
+		LOGGER.info("Hyracks mini-cluster started");
+		FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+		FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+		startHDFS();
+	}
+
+	private void cleanupStores() throws IOException {
+		FileUtils.forceMkdir(new File("teststore"));
+		FileUtils.forceMkdir(new File("build"));
+		FileUtils.cleanDirectory(new File("teststore"));
+		FileUtils.cleanDirectory(new File("build"));
+	}
+	
+	private void startHDFS() throws IOException {
+		conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+		conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+		conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+		FileSystem lfs = FileSystem.getLocal(new Configuration());
+		lfs.delete(new Path("build"), true);
+		System.setProperty("hadoop.log.dir", "logs");
+		dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
+		FileSystem dfs = FileSystem.get(conf);
+		Path src = new Path(DATA_PATH);
+		Path dest = new Path(HDFS_PATH);
+		dfs.mkdirs(dest);
+		dfs.copyFromLocalFile(src, dest);
+
+		DataOutputStream confOutput = new DataOutputStream(
+				new FileOutputStream(new File(HADOOP_CONF_PATH)));
+		conf.writeXml(confOutput);
+		confOutput.flush();
+		confOutput.close();
+	}
+	
+	/**
+	 * cleanup hdfs cluster
+	 */
+	private void cleanupHDFS() throws Exception {
+		dfsCluster.shutdown();
+	}
+
+	public void tearDown() throws Exception {
+		PregelixHyracksIntegrationUtil.destroyApp(HYRACKS_APP_NAME);
+		PregelixHyracksIntegrationUtil.deinit();
+		LOGGER.info("Hyracks mini-cluster shut down");
+		cleanupHDFS();
+	}
+	
+	public static Test suite() throws Exception {
+		List<String> ignores = getFileList(PATH_TO_IGNORE);
+		List<String> onlys = getFileList(PATH_TO_ONLY);
+		File testData = new File(PATH_TO_JOBS);
+		File[] queries = testData.listFiles();
+		RunJobTestSuite testSuite = new RunJobTestSuite();
+		testSuite.setUp();
+		boolean onlyEnabled = false;
+
+		if (onlys.size() > 0) {
+			onlyEnabled = true;
+		}
+		for (File qFile : queries) {
+			if (isInList(ignores, qFile.getName()))
+				continue;
+
+			if (qFile.isFile()) {
+				if (onlyEnabled && !isInList(onlys, qFile.getName())) {
+					continue;
+				} else {
+					String resultFileName = ACTUAL_RESULT_DIR + File.separator
+							+ jobExtToResExt(qFile.getName());
+					String expectedFileName = EXPECTED_RESULT_DIR
+							+ File.separator + jobExtToResExt(qFile.getName());
+					testSuite.addTest(new RunJobTestCase(HADOOP_CONF_PATH,
+							qFile.getName(),
+							qFile.getAbsolutePath().toString(), resultFileName,
+							expectedFileName));
+				}
+			}
+		}
+		return testSuite;
+	}
+	
+	/**
+	 * Runs the tests and collects their result in a TestResult.
+	 */
+	@Override
+	public void run(TestResult result) {		
+		try {
+			int testCount = countTestCases();
+			for (int i = 0; i < testCount; i++) {
+				// cleanupStores();
+				Test each = this.testAt(i);
+				if (result.shouldStop())
+					break;
+				runTest(each, result);
+			}
+
+			tearDown();
+		} catch (Exception e) {
+			throw new IllegalStateException(e);
+		}
+		
+		
+	}
+
+	protected static List<String> getFileList(String ignorePath)
+			throws FileNotFoundException, IOException {
+		BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
+		String s = null;
+		List<String> ignores = new ArrayList<String>();
+		while ((s = reader.readLine()) != null) {
+			ignores.add(s);
+		}
+		reader.close();
+		return ignores;
+	}
+
+	private static String jobExtToResExt(String fname) {
+		int dot = fname.lastIndexOf('.');
+		return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
+	}
+
+	private static boolean isInList(List<String> onlys, String name) {
+		for (String only : onlys)
+			if (name.indexOf(only) >= 0)
+				return true;
+		return false;
+	}
+
+	public JobConf getConf() {
+		return conf;
+	}
+
+	public void setConf(JobConf conf) {
+		this.conf = conf;
+	}
+
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/example/util/TestUtils.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/example/util/TestUtils.java
new file mode 100644
index 0000000..4ea3c1d
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/example/util/TestUtils.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pregelix.example.util;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+
+public class TestUtils {
+
+    public static void compareWithResult(File expectedFile, File actualFile) throws Exception {
+        BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
+        BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
+        String lineExpected, lineActual;
+        int num = 1;
+        try {
+            while ((lineExpected = readerExpected.readLine()) != null) {
+                lineActual = readerActual.readLine();
+                // Assert.assertEquals(lineExpected, lineActual);
+                if (lineActual == null) {
+                    throw new Exception("Actual result changed at line " + num + ":\n< " + lineExpected + "\n> ");
+                }
+                if (!equalStrings(lineExpected, lineActual)) {
+                    throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
+                            + lineActual);
+                }
+                ++num;
+            }
+            lineActual = readerActual.readLine();
+            if (lineActual != null) {
+                throw new Exception("Actual result changed at line " + num + ":\n< \n> " + lineActual);
+            }
+        } finally {
+            readerExpected.close();
+            readerActual.close();
+        }
+    }
+
+    private static boolean equalStrings(String s1, String s2) {
+        String[] rowsOne = s1.split("\n");
+        String[] rowsTwo = s2.split("\n");
+
+        if (rowsOne.length != rowsTwo.length)
+            return false;
+
+        for (int i = 0; i < rowsOne.length; i++) {
+            String row1 = rowsOne[i];
+            String row2 = rowsTwo[i];
+
+            if (row1.equals(row2))
+                continue;
+
+            String[] fields1 = row1.split(" ");
+            String[] fields2 = row2.split(" ");
+
+            for (int j = 0; j < fields1.length; j++) {
+                if (fields1[j].equals(fields2[j])) {
+                    continue;
+                } else if (fields1[j].indexOf('.') < 0) {
+                    return false;
+                } else {
+                    Double double1 = Double.parseDouble(fields1[j]);
+                    Double double2 = Double.parseDouble(fields2[j]);
+                    float float1 = (float) double1.doubleValue();
+                    float float2 = (float) double2.doubleValue();
+
+                    if (Math.abs(float1 - float2) == 0)
+                        continue;
+                    else {
+                        return false;
+                    }
+                }
+            }
+        }
+        return true;
+    }
+
+}
diff --git a/genomix/genomix-pregelix/src/test/resources/cluster/cluster.properties b/genomix/genomix-pregelix/src/test/resources/cluster/cluster.properties
new file mode 100644
index 0000000..14f8bd4
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/cluster/cluster.properties
@@ -0,0 +1,37 @@
+#The CC port for Hyracks clients
+CC_CLIENTPORT=3099
+
+#The CC port for Hyracks cluster management
+CC_CLUSTERPORT=1099
+
+#The directory of hyracks binaries
+HYRACKS_HOME=../../../../hyracks
+
+#The tmp directory for cc to install jars
+CCTMP_DIR=/tmp/t1
+
+#The tmp directory for nc to install jars
+NCTMP_DIR=/tmp/t2
+
+#The directory to put cc logs
+CCLOGS_DIR=$CCTMP_DIR/logs
+
+#The directory to put nc logs
+NCLOGS_DIR=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS="/tmp/t3,/tmp/t4"
+
+#The JAVA_HOME
+JAVA_HOME=$JAVA_HOME
+
+#The frame size of the internal dataflow engine
+FRAME_SIZE=65536
+
+#CC JAVA_OPTS
+CCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7001,server=y,suspend=n -Xmx3g -Djava.util.logging.config.file=logging.properties"
+# Yourkit option: -agentpath:/grid/0/dev/vborkar/tools/yjp-10.0.4/bin/linux-x86-64/libyjpagent.so=port=20001"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7002,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/genomix/genomix-pregelix/src/test/resources/cluster/stores.properties b/genomix/genomix-pregelix/src/test/resources/cluster/stores.properties
new file mode 100644
index 0000000..daf881e
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/cluster/stores.properties
@@ -0,0 +1 @@
+store=teststore
\ No newline at end of file
diff --git a/genomix/genomix-pregelix/src/test/resources/hadoop/conf/core-site.xml b/genomix/genomix-pregelix/src/test/resources/hadoop/conf/core-site.xml
new file mode 100644
index 0000000..47dfac5
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/hadoop/conf/core-site.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+<property>
+    <name>fs.default.name</name>
+    <value>hdfs://127.0.0.1:31888</value>
+</property>
+<property>
+    <name>hadoop.tmp.dir</name>
+    <value>/tmp/hadoop</value>
+</property>
+
+
+</configuration>
diff --git a/genomix/genomix-pregelix/src/test/resources/hadoop/conf/hdfs-site.xml b/genomix/genomix-pregelix/src/test/resources/hadoop/conf/hdfs-site.xml
new file mode 100644
index 0000000..8d29b1d
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/hadoop/conf/hdfs-site.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+<property>
+   <name>dfs.replication</name>
+   <value>1</value>
+</property>
+
+<property>
+	<name>dfs.block.size</name>
+	<value>65536</value>
+</property>
+
+</configuration>
diff --git a/genomix/genomix-pregelix/src/test/resources/hadoop/conf/log4j.properties b/genomix/genomix-pregelix/src/test/resources/hadoop/conf/log4j.properties
new file mode 100755
index 0000000..d5e6004
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/hadoop/conf/log4j.properties
@@ -0,0 +1,94 @@
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=FATAL,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshhold=FATAL
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this 
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# Rolling File Appender
+#
+
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+#
+# FSNamesystem Audit logging
+# All audit events are logged at INFO level
+#
+log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
diff --git a/genomix/genomix-pregelix/src/test/resources/hadoop/conf/mapred-site.xml b/genomix/genomix-pregelix/src/test/resources/hadoop/conf/mapred-site.xml
new file mode 100644
index 0000000..f75b072
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/hadoop/conf/mapred-site.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+  <property>
+    <name>mapred.job.tracker</name>
+    <value>localhost:29007</value>
+  </property>
+  <property>
+     <name>mapred.tasktracker.map.tasks.maximum</name>
+     <value>20</value>
+  </property>
+   <property>
+      <name>mapred.tasktracker.reduce.tasks.maximum</name>
+      <value>20</value>
+   </property>
+   <property>
+      <name>mapred.max.split.size</name>
+      <value>128000</value>
+   </property>
+
+</configuration>
diff --git a/genomix/genomix-pregelix/src/test/resources/hyracks-deployment.properties b/genomix/genomix-pregelix/src/test/resources/hyracks-deployment.properties
new file mode 100644
index 0000000..9c42b89
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/hyracks-deployment.properties
@@ -0,0 +1,2 @@
+#cc.bootstrap.class=edu.uci.ics.asterix.hyracks.bootstrap.CCBootstrapImpl
+nc.bootstrap.class=edu.uci.ics.pregelix.runtime.bootstrap.NCBootstrapImpl
diff --git a/genomix/genomix-pregelix/src/test/resources/ignore.txt b/genomix/genomix-pregelix/src/test/resources/ignore.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/ignore.txt
diff --git a/genomix/genomix-pregelix/src/test/resources/log4j.properties b/genomix/genomix-pregelix/src/test/resources/log4j.properties
new file mode 100755
index 0000000..d5e6004
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/log4j.properties
@@ -0,0 +1,94 @@
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=FATAL,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshhold=FATAL
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this 
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# Rolling File Appender
+#
+
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+#
+# FSNamesystem Audit logging
+# All audit events are logged at INFO level
+#
+log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
diff --git a/genomix/genomix-pregelix/src/test/resources/logging.properties b/genomix/genomix-pregelix/src/test/resources/logging.properties
new file mode 100644
index 0000000..b8f2be9
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/logging.properties
@@ -0,0 +1,66 @@
+############################################################
+#  	Default Logging Configuration File
+#
+# You can use a different file by specifying a filename
+# with the java.util.logging.config.file system property.  
+# For example java -Djava.util.logging.config.file=myfile
+############################################################
+
+############################################################
+#  	Global properties
+############################################################
+
+# "handlers" specifies a comma separated list of log Handler 
+# classes.  These handlers will be installed during VM startup.
+# Note that these classes must be on the system classpath.
+# By default we only configure a ConsoleHandler, which will only
+# show messages at the INFO and above levels.
+
+handlers= java.util.logging.ConsoleHandler
+
+# To also add the FileHandler, use the following line instead.
+
+# handlers= java.util.logging.FileHandler, java.util.logging.ConsoleHandler
+
+# Default global logging level.
+# This specifies which kinds of events are logged across
+# all loggers.  For any given facility this global level
+# can be overriden by a facility specific level
+# Note that the ConsoleHandler also has a separate level
+# setting to limit messages printed to the console.
+
+.level= SEVERE
+# .level= INFO
+# .level= FINE
+# .level = FINEST
+
+############################################################
+# Handler specific properties.
+# Describes specific configuration info for Handlers.
+############################################################
+
+# default file output is in user's home directory.
+
+# java.util.logging.FileHandler.pattern = %h/java%u.log
+# java.util.logging.FileHandler.limit = 50000
+# java.util.logging.FileHandler.count = 1
+# java.util.logging.FileHandler.formatter = java.util.logging.XMLFormatter
+
+# Limit the message that are printed on the console to FINE and above.
+
+java.util.logging.ConsoleHandler.level = FINEST
+java.util.logging.ConsoleHandler.formatter = java.util.logging.SimpleFormatter
+
+
+############################################################
+# Facility specific properties.
+# Provides extra control for each logger.
+############################################################
+
+# For example, set the com.xyz.foo logger to only log SEVERE
+# messages:
+
+#edu.uci.ics.asterix.level = FINE
+#edu.uci.ics.algebricks.level = FINE
+edu.uci.ics.hyracks.level = SEVERE
+#edu.uci.ics.hyracks.control.nc.net.level = FINE
\ No newline at end of file
diff --git a/genomix/genomix-pregelix/src/test/resources/only.txt b/genomix/genomix-pregelix/src/test/resources/only.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/only.txt
diff --git a/genomix/genomix-pregelix/src/test/resources/topology.xml b/genomix/genomix-pregelix/src/test/resources/topology.xml
new file mode 100755
index 0000000..2a6c380
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/topology.xml
@@ -0,0 +1,7 @@
+<cluster-topology>
+    <network-switch name="Global">
+        <network-switch name="local">
+            <terminal name="127.1.0.1"/>
+        </network-switch>
+    </network-switch>
+</cluster-topology>
\ No newline at end of file
diff --git a/genomix/pom.xml b/genomix/pom.xml
new file mode 100644
index 0000000..6fd28e8
--- /dev/null
+++ b/genomix/pom.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+	<groupId>edu.uci.ics.hyracks</groupId>
+	<artifactId>genomix</artifactId>
+	<version>0.2.4-SNAPSHOT</version>
+    <packaging>pom</packaging>
+	<name>genomix</name>
+
+    <distributionManagement>
+		<repository>
+			<id>hyracks-releases</id>
+			<url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-releases/</url>
+		</repository>
+		<snapshotRepository>
+			<id>hyracks-snapshots</id>
+			<url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-snapshots/</url>
+		</snapshotRepository>
+	</distributionManagement>
+
+	<repositories>
+		<repository>
+			<id>hyracks-public</id>
+			<url>http://obelix.ics.uci.edu/nexus/content/groups/hyracks-public/</url>
+		</repository>
+		<repository>
+			<id>jboss-public</id>
+			<url>https://repository.jboss.org/nexus/content/groups/public/</url>
+		</repository>
+	</repositories>
+
+	<pluginRepositories>
+		<pluginRepository>
+			<id>hyracks-public</id>
+			<url>http://obelix.ics.uci.edu/nexus/content/groups/hyracks-public/</url>
+			<releases>
+				<updatePolicy>always</updatePolicy>
+			</releases>
+		</pluginRepository>
+	</pluginRepositories>
+
+    <modules>
+        <module>genomix-data</module>
+        <module>genomix-hyracks</module>
+        <module>genomix-hadoop</module>
+	<module>genomix-pregelix</module>
+    </modules>
+</project>
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksClientInterfaceFunctions.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksClientInterfaceFunctions.java
index 0467eae..48dfb1c 100644
--- a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksClientInterfaceFunctions.java
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksClientInterfaceFunctions.java
@@ -181,6 +181,74 @@
         }
     }
 
+    public static class GetDatasetDirectoryServiceInfoFunction extends Function {
+        private static final long serialVersionUID = 1L;
+
+        @Override
+        public FunctionId getFunctionId() {
+            return FunctionId.GET_DATASET_DIRECTORY_SERIVICE_INFO;
+        }
+    }
+
+    public static class GetDatasetResultStatusFunction extends Function {
+        private static final long serialVersionUID = 1L;
+
+        private final JobId jobId;
+
+        private final ResultSetId rsId;
+
+        public GetDatasetResultStatusFunction(JobId jobId, ResultSetId rsId) {
+            this.jobId = jobId;
+            this.rsId = rsId;
+        }
+
+        @Override
+        public FunctionId getFunctionId() {
+            return FunctionId.GET_DATASET_RESULT_STATUS;
+        }
+
+        public JobId getJobId() {
+            return jobId;
+        }
+
+        public ResultSetId getResultSetId() {
+            return rsId;
+        }
+    }
+
+    public static class GetDatasetResultLocationsFunction extends Function {
+        private static final long serialVersionUID = 1L;
+
+        private final JobId jobId;
+
+        private final ResultSetId rsId;
+
+        private final DatasetDirectoryRecord[] knownRecords;
+
+        public GetDatasetResultLocationsFunction(JobId jobId, ResultSetId rsId, DatasetDirectoryRecord[] knownRecords) {
+            this.jobId = jobId;
+            this.rsId = rsId;
+            this.knownRecords = knownRecords;
+        }
+
+        @Override
+        public FunctionId getFunctionId() {
+            return FunctionId.GET_DATASET_RESULT_LOCATIONS;
+        }
+
+        public JobId getJobId() {
+            return jobId;
+        }
+
+        public ResultSetId getResultSetId() {
+            return rsId;
+        }
+
+        public DatasetDirectoryRecord[] getKnownRecords() {
+            return knownRecords;
+        }
+    }
+
     public static class WaitForCompletionFunction extends Function {
         private static final long serialVersionUID = 1L;
 
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksConnection.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksConnection.java
index eb25b70..da14e9b 100644
--- a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksConnection.java
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksConnection.java
@@ -106,6 +106,10 @@
         return hci.getDatasetDirectoryServiceInfo();
     }
 
+    public NetworkAddress getDatasetDirectoryServiceInfo() throws Exception {
+        return hci.getDatasetDirectoryServiceInfo();
+    }
+
     @Override
     public void waitForCompletion(JobId jobId) throws Exception {
         hci.waitForCompletion(jobId);
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/IHyracksClientConnection.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/IHyracksClientConnection.java
index 41b07d7..4521149 100644
--- a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/IHyracksClientConnection.java
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/IHyracksClientConnection.java
@@ -89,6 +89,14 @@
     public NetworkAddress getDatasetDirectoryServiceInfo() throws Exception;
 
     /**
+     * Gets the IP Address and port for the DatasetDirectoryService wrapped in NetworkAddress
+     * 
+     * @return {@link NetworkAddress}
+     * @throws Exception
+     */
+    public NetworkAddress getDatasetDirectoryServiceInfo() throws Exception;
+
+    /**
      * Waits until the specified job has completed, either successfully or has
      * encountered a permanent failure.
      * 
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/IHyracksClientInterface.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/IHyracksClientInterface.java
index aabc351..e1a4185 100644
--- a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/IHyracksClientInterface.java
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/IHyracksClientInterface.java
@@ -35,6 +35,8 @@
 
     public NetworkAddress getDatasetDirectoryServiceInfo() throws Exception;
 
+    public NetworkAddress getDatasetDirectoryServiceInfo() throws Exception;
+
     public void waitForCompletion(JobId jobId) throws Exception;
 
     public Map<String, NodeControllerInfo> getNodeControllersInfo() throws Exception;
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/ClusterControllerService.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/ClusterControllerService.java
index 9b8a996..623c54f 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/ClusterControllerService.java
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/ClusterControllerService.java
@@ -361,6 +361,27 @@
                     return;
                 }
 
+                case GET_DATASET_DIRECTORY_SERIVICE_INFO: {
+                    workQueue.schedule(new GetDatasetDirectoryServiceInfoWork(ClusterControllerService.this,
+                            new IPCResponder<NetworkAddress>(handle, mid)));
+                    return;
+                }
+
+                case GET_DATASET_RESULT_STATUS: {
+                    HyracksClientInterfaceFunctions.GetDatasetResultStatusFunction gdrlf = (HyracksClientInterfaceFunctions.GetDatasetResultStatusFunction) fn;
+                    workQueue.schedule(new GetResultStatusWork(ClusterControllerService.this, gdrlf.getJobId(), gdrlf
+                            .getResultSetId(), new IPCResponder<Status>(handle, mid)));
+                    return;
+                }
+
+                case GET_DATASET_RESULT_LOCATIONS: {
+                    HyracksClientInterfaceFunctions.GetDatasetResultLocationsFunction gdrlf = (HyracksClientInterfaceFunctions.GetDatasetResultLocationsFunction) fn;
+                    workQueue.schedule(new GetResultPartitionLocationsWork(ClusterControllerService.this, gdrlf
+                            .getJobId(), gdrlf.getResultSetId(), gdrlf.getKnownRecords(),
+                            new IPCResponder<DatasetDirectoryRecord[]>(handle, mid)));
+                    return;
+                }
+
                 case WAIT_FOR_COMPLETION: {
                     HyracksClientInterfaceFunctions.WaitForCompletionFunction wfcf = (HyracksClientInterfaceFunctions.WaitForCompletionFunction) fn;
                     workQueue.schedule(new WaitForJobCompletionWork(ClusterControllerService.this, wfcf.getJobId(),
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/partitions/MaterializingPipelinedPartition.java b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/partitions/MaterializingPipelinedPartition.java
index 62320c5..1cd98fb 100644
--- a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/partitions/MaterializingPipelinedPartition.java
+++ b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/partitions/MaterializingPipelinedPartition.java
@@ -139,7 +139,7 @@
         if (LOGGER.isLoggable(Level.INFO)) {
             LOGGER.info("open(" + pid + " by " + taId);
         }
-        fRef = manager.getFileFactory().createUnmanagedWorkspaceFile(pid.toString());
+        fRef = manager.getFileFactory().createUnmanagedWorkspaceFile(pid.toString().replace(':', '_'));
         handle = ctx.getIOManager().open(fRef, IIOManager.FileReadWriteMode.READ_WRITE,
                 IIOManager.FileSyncMode.METADATA_ASYNC_DATA_ASYNC);
         size = 0;
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/GroupRunMergingFrameReader.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/GroupRunMergingFrameReader.java
new file mode 100644
index 0000000..d63609e
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/GroupRunMergingFrameReader.java
@@ -0,0 +1,377 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hashsort;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
+import edu.uci.ics.hyracks.api.comm.IFrameReader;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
+
+public class GroupRunMergingFrameReader implements IFrameReader {
+
+    private static final int INT_SIZE = 4;
+
+    private final IHyracksTaskContext ctx;
+    private final IFrameReader[] runCursors;
+    private final List<ByteBuffer> inFrames;
+    private final int[] keyFields;
+    private final int framesLimit;
+    private final int tableSize;
+    private final IBinaryComparator[] comparators;
+    private final RecordDescriptor recordDesc;
+    private final FrameTupleAppender outFrameAppender;
+    private final ITuplePartitionComputer tpc;
+    private ReferencedPriorityQueue topTuples;
+    private int[] tupleIndexes;
+    private int[] currentFrameIndexForRuns, bufferedFramesForRuns;
+    private FrameTupleAccessor[] tupleAccessors;
+    private int framesBuffered;
+
+    private final IAggregatorDescriptor grouper;
+    private final AggregateState groupState;
+
+    private final boolean isLoadBuffered;
+
+    private final boolean isFinalPhase;
+
+    private final ArrayTupleBuilder groupTupleBuilder, outputTupleBuilder;
+
+    private byte[] groupResultCache;
+    private ByteBuffer groupResultCacheBuffer;
+    private IFrameTupleAccessor groupResultCacheAccessor;
+    private FrameTupleAppender groupResultCacheAppender;
+
+    // FIXME
+    long queueCompCounter = 0, mergeCompCounter = 0;
+
+    public GroupRunMergingFrameReader(IHyracksTaskContext ctx, IFrameReader[] runCursors, int framesLimit,
+            int tableSize, int[] keyFields, ITuplePartitionComputer tpc, IBinaryComparator[] comparators,
+            IAggregatorDescriptor grouper, RecordDescriptor recordDesc, boolean isFinalPhase) {
+        this(ctx, runCursors, framesLimit, tableSize, keyFields, tpc, comparators, grouper, recordDesc, isFinalPhase,
+                false);
+    }
+
+    public GroupRunMergingFrameReader(IHyracksTaskContext ctx, IFrameReader[] runCursors, int framesLimit,
+            int tableSize, int[] keyFields, ITuplePartitionComputer tpc, IBinaryComparator[] comparators,
+            IAggregatorDescriptor grouper, RecordDescriptor recordDesc, boolean isFinalPhase, boolean isLoadBuffered) {
+        this.ctx = ctx;
+        this.runCursors = runCursors;
+        this.inFrames = new ArrayList<ByteBuffer>();
+        this.keyFields = keyFields;
+        this.tableSize = tableSize;
+        this.comparators = comparators;
+        this.recordDesc = recordDesc;
+        this.grouper = grouper;
+        this.groupState = grouper.createAggregateStates();
+        this.outFrameAppender = new FrameTupleAppender(ctx.getFrameSize());
+        this.isLoadBuffered = isLoadBuffered;
+        this.isFinalPhase = isFinalPhase;
+        this.framesLimit = framesLimit;
+        this.tpc = tpc;
+
+        this.groupTupleBuilder = new ArrayTupleBuilder(recordDesc.getFieldCount());
+        this.outputTupleBuilder = new ArrayTupleBuilder(recordDesc.getFieldCount());
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see edu.uci.ics.hyracks.api.comm.IFrameReader#open()
+     */
+    @Override
+    public void open() throws HyracksDataException {
+        if (isLoadBuffered) {
+            while (inFrames.size() + 1 < framesLimit) {
+                inFrames.add(ctx.allocateFrame());
+            }
+            framesBuffered = inFrames.size() / runCursors.length;
+        } else {
+            while (inFrames.size() < framesLimit - 1 && inFrames.size() < runCursors.length) {
+                inFrames.add(ctx.allocateFrame());
+            }
+            framesBuffered = 1;
+        }
+        tupleAccessors = new FrameTupleAccessor[runCursors.length];
+        currentFrameIndexForRuns = new int[runCursors.length];
+        bufferedFramesForRuns = new int[runCursors.length];
+        Comparator<ReferenceEntryWithBucketID> comparator = createEntryComparator(comparators);
+        topTuples = new ReferencedPriorityQueue(ctx.getFrameSize(), recordDesc, runCursors.length, comparator);
+        tupleIndexes = new int[runCursors.length];
+
+        for (int i = 0; i < runCursors.length; i++) {
+            int runIndex = topTuples.peek().getRunid();
+            tupleIndexes[runIndex] = 0;
+            runCursors[runIndex].open();
+            for (int j = 0; j < framesBuffered; j++) {
+
+                if (runCursors[runIndex].nextFrame(inFrames.get(runIndex * framesBuffered + j))) {
+
+                    bufferedFramesForRuns[runIndex]++;
+                    if (j == 0) {
+                        tupleAccessors[runIndex] = new FrameTupleAccessor(ctx.getFrameSize(), recordDesc);
+                        tupleAccessors[runIndex].reset(inFrames.get(runIndex * framesBuffered + j));
+                        setNextTopTuple(runIndex, tupleIndexes, runCursors, tupleAccessors, topTuples);
+                        currentFrameIndexForRuns[runIndex] = runIndex * framesBuffered;
+                    }
+                } else {
+                    break;
+                }
+            }
+        }
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see edu.uci.ics.hyracks.api.comm.IFrameReader#nextFrame(java.nio.ByteBuffer)
+     */
+    @Override
+    public boolean nextFrame(ByteBuffer buffer) throws HyracksDataException {
+        outFrameAppender.reset(buffer, true);
+
+        while (!topTuples.areRunsExhausted()) {
+            ReferenceEntryWithBucketID top = topTuples.peek();
+            int runIndex = top.getRunid();
+            FrameTupleAccessor fta = top.getAccessor();
+            int tupleIndex = top.getTupleIndex();
+
+            // check whether we can do aggregation
+            boolean needInsert = true;
+            if (groupResultCache != null && groupResultCacheAccessor.getTupleCount() > 0) {
+                groupResultCacheAccessor.reset(ByteBuffer.wrap(groupResultCache));
+                if (compareFrameTuples(fta, tupleIndex, groupResultCacheAccessor, 0) == 0) {
+                    needInsert = false;
+                }
+            }
+
+            if (needInsert) {
+
+                // try to flush the group cache into the output buffer, if any
+                if (groupResultCacheAccessor != null && groupResultCacheAccessor.getFieldCount() > 0) {
+                    outputTupleBuilder.reset();
+                    for (int k = 0; k < keyFields.length; k++) {
+                        outputTupleBuilder.addField(groupResultCacheAccessor, 0, k);
+                    }
+                    if (isFinalPhase) {
+                        grouper.outputFinalResult(outputTupleBuilder, groupResultCacheAccessor, 0, groupState);
+                    } else {
+                        grouper.outputPartialResult(outputTupleBuilder, groupResultCacheAccessor, 0, groupState);
+                    }
+
+                    // return if the buffer is full
+                    if (!outFrameAppender.append(outputTupleBuilder.getFieldEndOffsets(),
+                            outputTupleBuilder.getByteArray(), 0, outputTupleBuilder.getSize())) {
+                        return true;
+                    }
+                    groupResultCacheBuffer.putInt(groupResultCache.length - 4, 0);
+                }
+
+                groupTupleBuilder.reset();
+                for (int k : keyFields) {
+                    groupTupleBuilder.addField(fta, tupleIndex, k);
+                }
+                grouper.init(groupTupleBuilder, fta, tupleIndex, groupState);
+
+                // enlarge the cache buffer if necessary
+                int requiredSize = groupTupleBuilder.getSize() + groupTupleBuilder.getFieldEndOffsets().length
+                        * INT_SIZE + 2 * INT_SIZE;
+
+                if (groupResultCache == null || groupResultCache.length < requiredSize) {
+                    groupResultCache = new byte[requiredSize];
+                    groupResultCacheAppender = new FrameTupleAppender(groupResultCache.length);
+                    groupResultCacheBuffer = ByteBuffer.wrap(groupResultCache);
+                    groupResultCacheAccessor = new FrameTupleAccessor(groupResultCache.length, recordDesc);
+                }
+
+                // always reset the group cache
+                groupResultCacheAppender.reset(groupResultCacheBuffer, true);
+                if (!groupResultCacheAppender.append(groupTupleBuilder.getFieldEndOffsets(),
+                        groupTupleBuilder.getByteArray(), 0, groupTupleBuilder.getSize())) {
+                    throw new HyracksDataException("The partial result is too large to be initialized in a frame.");
+                }
+
+                groupResultCacheAccessor.reset(groupResultCacheBuffer);
+
+            } else {
+                grouper.aggregate(fta, tupleIndex, groupResultCacheAccessor, 0, groupState);
+            }
+
+            ++tupleIndexes[runIndex];
+            setNextTopTuple(runIndex, tupleIndexes, runCursors, tupleAccessors, topTuples);
+        }
+
+        if (groupResultCacheAccessor != null && groupResultCacheAccessor.getTupleCount() > 0) {
+            outputTupleBuilder.reset();
+            for (int k = 0; k < keyFields.length; k++) {
+                outputTupleBuilder.addField(groupResultCacheAccessor, 0, k);
+            }
+            if (isFinalPhase) {
+                grouper.outputFinalResult(outputTupleBuilder, groupResultCacheAccessor, 0, groupState);
+            } else {
+                grouper.outputPartialResult(outputTupleBuilder, groupResultCacheAccessor, 0, groupState);
+            }
+
+            // return if the buffer is full
+            if (!outFrameAppender.append(outputTupleBuilder.getFieldEndOffsets(), outputTupleBuilder.getByteArray(), 0,
+                    outputTupleBuilder.getSize())) {
+                return true;
+            }
+
+            groupResultCacheAccessor = null;
+            groupResultCache = null;
+            groupResultCacheBuffer = null;
+            groupResultCacheAppender = null;
+        }
+
+        if (outFrameAppender.getTupleCount() > 0) {
+            return true;
+        }
+
+        return false;
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see edu.uci.ics.hyracks.api.comm.IFrameReader#close()
+     */
+    @Override
+    public void close() throws HyracksDataException {
+        for (int i = 0; i < runCursors.length; ++i) {
+            closeRun(i, runCursors, tupleAccessors);
+        }
+    }
+
+    private void setNextTopTuple(int runIndex, int[] tupleIndexes, IFrameReader[] runCursors,
+            FrameTupleAccessor[] tupleAccessors, ReferencedPriorityQueue topTuples) throws HyracksDataException {
+        boolean exists = hasNextTuple(runIndex, tupleIndexes, runCursors, tupleAccessors);
+        if (exists) {
+            int h = tpc.partition(tupleAccessors[runIndex], tupleIndexes[runIndex], tableSize);
+            topTuples.popAndReplace(tupleAccessors[runIndex], tupleIndexes[runIndex], h);
+        } else {
+            topTuples.pop();
+            closeRun(runIndex, runCursors, tupleAccessors);
+        }
+    }
+
+    private boolean hasNextTuple(int runIndex, int[] tupleIndexes, IFrameReader[] runCursors,
+            FrameTupleAccessor[] tupleAccessors) throws HyracksDataException {
+        if (tupleAccessors[runIndex] == null || runCursors[runIndex] == null) {
+            return false;
+        } else if (tupleIndexes[runIndex] >= tupleAccessors[runIndex].getTupleCount()) {
+            if (currentFrameIndexForRuns[runIndex] - runIndex * framesBuffered < bufferedFramesForRuns[runIndex] - 1) {
+                currentFrameIndexForRuns[runIndex]++;
+            } else {
+                bufferedFramesForRuns[runIndex] = 0;
+                for (int j = 0; j < framesBuffered; j++) {
+                    if (runCursors[runIndex].nextFrame(inFrames.get(runIndex * framesBuffered + j))) {
+                        bufferedFramesForRuns[runIndex]++;
+                    } else {
+                        break;
+                    }
+                }
+                currentFrameIndexForRuns[runIndex] = runIndex * framesBuffered;
+            }
+            if (bufferedFramesForRuns[runIndex] > 0) {
+                tupleAccessors[runIndex].reset(inFrames.get(currentFrameIndexForRuns[runIndex]));
+                tupleIndexes[runIndex] = 0;
+                return true;
+            } else {
+                return false;
+            }
+        } else {
+            return true;
+        }
+    }
+
+    private void closeRun(int index, IFrameReader[] runCursors, IFrameTupleAccessor[] tupleAccessors)
+            throws HyracksDataException {
+        if (runCursors[index] != null) {
+            runCursors[index].close();
+            runCursors[index] = null;
+            tupleAccessors[index] = null;
+        }
+    }
+
+    private int compareFrameTuples(IFrameTupleAccessor fta1, int j1, IFrameTupleAccessor fta2, int j2) {
+        mergeCompCounter++;
+        byte[] b1 = fta1.getBuffer().array();
+        byte[] b2 = fta2.getBuffer().array();
+        for (int f = 0; f < keyFields.length; ++f) {
+            int fIdx = f;
+            int s1 = fta1.getTupleStartOffset(j1) + fta1.getFieldSlotsLength() + fta1.getFieldStartOffset(j1, fIdx);
+            int l1 = fta1.getFieldLength(j1, fIdx);
+            int s2 = fta2.getTupleStartOffset(j2) + fta2.getFieldSlotsLength() + fta2.getFieldStartOffset(j2, fIdx);
+            int l2_start = fta2.getFieldStartOffset(j2, fIdx);
+            int l2_end = fta2.getFieldEndOffset(j2, fIdx);
+            int l2 = l2_end - l2_start;
+            int c = comparators[f].compare(b1, s1, l1, b2, s2, l2);
+            if (c != 0) {
+                return c;
+            }
+        }
+        return 0;
+    }
+
+    private Comparator<ReferenceEntryWithBucketID> createEntryComparator(final IBinaryComparator[] comparators) {
+        return new Comparator<ReferenceEntryWithBucketID>() {
+            public int compare(ReferenceEntryWithBucketID tp1, ReferenceEntryWithBucketID tp2) {
+
+                queueCompCounter++;
+
+                int cmp = tp1.getBucketID() - tp2.getBucketID();
+
+                if (cmp != 0) {
+                    return cmp;
+                }
+
+                FrameTupleAccessor fta1 = (FrameTupleAccessor) tp1.getAccessor();
+                FrameTupleAccessor fta2 = (FrameTupleAccessor) tp2.getAccessor();
+                int j1 = tp1.getTupleIndex();
+                int j2 = tp2.getTupleIndex();
+                byte[] b1 = fta1.getBuffer().array();
+                byte[] b2 = fta2.getBuffer().array();
+                for (int f = 0; f < keyFields.length; ++f) {
+                    int fIdx = keyFields[f];
+                    int s1 = fta1.getTupleStartOffset(j1) + fta1.getFieldSlotsLength()
+                            + fta1.getFieldStartOffset(j1, fIdx);
+                    int l1 = fta1.getFieldEndOffset(j1, fIdx) - fta1.getFieldStartOffset(j1, fIdx);
+                    int s2 = fta2.getTupleStartOffset(j2) + fta2.getFieldSlotsLength()
+                            + fta2.getFieldStartOffset(j2, fIdx);
+                    int l2 = fta2.getFieldEndOffset(j2, fIdx) - fta2.getFieldStartOffset(j2, fIdx);
+                    int c = comparators[f].compare(b1, s1, l1, b2, s2, l2);
+                    if (c != 0) {
+                        return c;
+                    }
+                }
+
+                return cmp;
+            }
+        };
+    }
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortGroupHashTable.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortGroupHashTable.java
new file mode 100644
index 0000000..6e85cff
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortGroupHashTable.java
@@ -0,0 +1,686 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hashsort;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.LinkedList;
+
+import edu.uci.ics.hyracks.api.comm.FrameHelper;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
+import edu.uci.ics.hyracks.dataflow.common.io.RunFileReader;
+import edu.uci.ics.hyracks.dataflow.common.io.RunFileWriter;
+import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.group.hybridhash.FrameTupleAccessorForGroupHashtable;
+import edu.uci.ics.hyracks.dataflow.std.group.hybridhash.FrameTupleAppenderForGroupHashtable;
+import edu.uci.ics.hyracks.dataflow.std.structures.TuplePointer;
+
+public class HybridHashSortGroupHashTable {
+
+    protected static final int INT_SIZE = 4;
+    protected static final int INIT_REF_COUNT = 8;
+    protected static final int PTR_SIZE = 3;
+
+    protected final int tableSize, framesLimit, frameSize;
+
+    protected final ByteBuffer[] headers;
+    protected final ByteBuffer[] contents;
+
+    protected final IHyracksTaskContext ctx;
+
+    protected int currentLargestFrameIndex;
+    protected int totalTupleCount;
+
+    protected final IAggregatorDescriptor aggregator;
+    protected final AggregateState aggState;
+
+    protected final int[] keys, internalKeys;
+
+    private final IBinaryComparator[] comparators;
+
+    protected final ITuplePartitionComputer tpc;
+
+    protected final INormalizedKeyComputer firstNormalizer;
+
+    private ByteBuffer outputBuffer;
+
+    private LinkedList<RunFileReader> runReaders;
+
+    protected TuplePointer matchPointer;
+
+    protected final FrameTupleAccessorForGroupHashtable hashtableRecordAccessor;
+
+    private final FrameTupleAccessorForGroupHashtable compFrameAccessor1, compFrameAccessor2;
+
+    protected final FrameTupleAppenderForGroupHashtable internalAppender;
+
+    private final FrameTupleAppender outputAppender;
+
+    /**
+     * Tuple builder for hash table insertion
+     */
+    protected final ArrayTupleBuilder internalTupleBuilder, outputTupleBuilder;
+
+    /**
+     * pointers for sort records in an entry
+     */
+    protected int[] tPointers;
+
+    protected int usedEntries = 0;
+
+    protected long hashedKeys = 0, hashedRawRec = 0;
+
+    public HybridHashSortGroupHashTable(IHyracksTaskContext ctx, int frameLimits, int tableSize, int[] keys,
+            IBinaryComparator[] comparators, ITuplePartitionComputer tpc,
+            INormalizedKeyComputer firstNormalizerComputer, IAggregatorDescriptor aggregator,
+            RecordDescriptor inRecDesc, RecordDescriptor outRecDesc) {
+        this.ctx = ctx;
+        this.tableSize = tableSize;
+        this.framesLimit = frameLimits;
+        this.frameSize = ctx.getFrameSize();
+
+        this.keys = keys;
+        this.internalKeys = new int[keys.length];
+        for (int i = 0; i < internalKeys.length; i++) {
+            internalKeys[i] = i;
+        }
+
+        this.aggregator = aggregator;
+        this.aggState = aggregator.createAggregateStates();
+
+        this.tpc = tpc;
+        this.comparators = comparators;
+        this.firstNormalizer = firstNormalizerComputer;
+
+        // initialize the hash table
+        int residual = ((tableSize % frameSize) * INT_SIZE * 2) % frameSize == 0 ? 0 : 1;
+        this.headers = new ByteBuffer[tableSize / frameSize * INT_SIZE * 2 + tableSize % frameSize * 2 * INT_SIZE
+                / frameSize + residual];
+
+        this.outputBuffer = ctx.allocateFrame();
+
+        this.contents = new ByteBuffer[framesLimit - 1 - headers.length];
+        this.currentLargestFrameIndex = -1;
+        this.totalTupleCount = 0;
+
+        this.runReaders = new LinkedList<RunFileReader>();
+        this.hashtableRecordAccessor = new FrameTupleAccessorForGroupHashtable(frameSize, outRecDesc);
+        this.compFrameAccessor1 = new FrameTupleAccessorForGroupHashtable(frameSize, outRecDesc);
+        this.compFrameAccessor2 = new FrameTupleAccessorForGroupHashtable(frameSize, outRecDesc);
+
+        this.internalTupleBuilder = new ArrayTupleBuilder(outRecDesc.getFieldCount());
+        this.outputTupleBuilder = new ArrayTupleBuilder(outRecDesc.getFieldCount());
+        this.internalAppender = new FrameTupleAppenderForGroupHashtable(frameSize);
+        this.outputAppender = new FrameTupleAppender(frameSize);
+
+        this.matchPointer = new TuplePointer();
+
+    }
+
+    /**
+     * Reset the header page
+     * 
+     * @param headerFrameIndex
+     */
+    protected void resetHeader(int headerFrameIndex) {
+        for (int i = 0; i < frameSize; i += INT_SIZE) {
+            headers[headerFrameIndex].putInt(i, -1);
+        }
+    }
+
+    /**
+     * Get the header frame index of the given hash table entry
+     * 
+     * @param entry
+     * @return
+     */
+    protected int getHeaderFrameIndex(int entry) {
+        int frameIndex = entry / frameSize * 2 * INT_SIZE + entry % frameSize * 2 * INT_SIZE / frameSize;
+        return frameIndex;
+    }
+
+    /**
+     * Get the tuple index of the given hash table entry
+     * 
+     * @param entry
+     * @return
+     */
+    protected int getHeaderTupleIndex(int entry) {
+        int offset = entry % frameSize * 2 * INT_SIZE % frameSize;
+        return offset;
+    }
+
+    public void insert(FrameTupleAccessor accessor, int tupleIndex) throws HyracksDataException {
+
+        int entry = tpc.partition(accessor, tupleIndex, tableSize);
+
+        hashedRawRec++;
+
+        if (findMatch(entry, accessor, tupleIndex)) {
+            // find match; do aggregation
+            hashtableRecordAccessor.reset(contents[matchPointer.frameIndex]);
+            aggregator.aggregate(accessor, tupleIndex, hashtableRecordAccessor, matchPointer.tupleIndex, aggState);
+        } else {
+
+            internalTupleBuilder.reset();
+            for (int k = 0; k < keys.length; k++) {
+                internalTupleBuilder.addField(accessor, tupleIndex, keys[k]);
+            }
+            aggregator.init(internalTupleBuilder, accessor, tupleIndex, aggState);
+            int insertFrameIndex = -1, insertTupleIndex = -1;
+            boolean inserted = false;
+
+            if (currentLargestFrameIndex < 0) {
+                currentLargestFrameIndex = 0;
+            }
+
+            if (contents[currentLargestFrameIndex] == null) {
+                contents[currentLargestFrameIndex] = ctx.allocateFrame();
+            }
+
+            internalAppender.reset(contents[currentLargestFrameIndex], false);
+            if (internalAppender.append(internalTupleBuilder.getFieldEndOffsets(), internalTupleBuilder.getByteArray(),
+                    0, internalTupleBuilder.getSize())) {
+                inserted = true;
+                insertFrameIndex = currentLargestFrameIndex;
+                insertTupleIndex = internalAppender.getTupleCount() - 1;
+            }
+
+            if (!inserted && currentLargestFrameIndex < contents.length - 1) {
+                currentLargestFrameIndex++;
+                if (contents[currentLargestFrameIndex] == null) {
+                    contents[currentLargestFrameIndex] = ctx.allocateFrame();
+                }
+                internalAppender.reset(contents[currentLargestFrameIndex], true);
+                if (!internalAppender.append(internalTupleBuilder.getFieldEndOffsets(),
+                        internalTupleBuilder.getByteArray(), 0, internalTupleBuilder.getSize())) {
+                    throw new HyracksDataException("Failed to insert an aggregation value.");
+                } else {
+                    insertFrameIndex = currentLargestFrameIndex;
+                    insertTupleIndex = internalAppender.getTupleCount() - 1;
+                    inserted = true;
+                }
+            }
+
+            // memory is full
+            if (!inserted) {
+                // flush hash table and try to insert again
+                flush();
+
+                // update the match point to the header reference
+                matchPointer.frameIndex = -1;
+                matchPointer.tupleIndex = -1;
+                // re-insert
+                currentLargestFrameIndex++;
+                if (contents[currentLargestFrameIndex] == null) {
+                    contents[currentLargestFrameIndex] = ctx.allocateFrame();
+                }
+                internalAppender.reset(contents[currentLargestFrameIndex], true);
+                if (!internalAppender.append(internalTupleBuilder.getFieldEndOffsets(),
+                        internalTupleBuilder.getByteArray(), 0, internalTupleBuilder.getSize())) {
+                    throw new HyracksDataException("Failed to insert an aggregation value.");
+                } else {
+                    insertFrameIndex = currentLargestFrameIndex;
+                    insertTupleIndex = internalAppender.getTupleCount() - 1;
+                }
+            }
+
+            // no match; new insertion
+            if (matchPointer.frameIndex < 0) {
+                // first record for this entry; update the header references
+                int headerFrameIndex = getHeaderFrameIndex(entry);
+                int headerFrameOffset = getHeaderTupleIndex(entry);
+                if (headers[headerFrameIndex] == null) {
+                    headers[headerFrameIndex] = ctx.allocateFrame();
+                    resetHeader(headerFrameIndex);
+                }
+                headers[headerFrameIndex].putInt(headerFrameOffset, insertFrameIndex);
+                headers[headerFrameIndex].putInt(headerFrameOffset + INT_SIZE, insertTupleIndex);
+                usedEntries++;
+
+            } else {
+                // update the previous reference
+                hashtableRecordAccessor.reset(contents[matchPointer.frameIndex]);
+                int refOffset = hashtableRecordAccessor.getTupleHashReferenceOffset(matchPointer.tupleIndex);
+                contents[matchPointer.frameIndex].putInt(refOffset, insertFrameIndex);
+                contents[matchPointer.frameIndex].putInt(refOffset + INT_SIZE, insertTupleIndex);
+            }
+            hashedKeys++;
+            totalTupleCount++;
+        }
+    }
+
+    /**
+     * Flush the hash table directly to the output
+     */
+    public void flushHashtableToOutput(IFrameWriter outputWriter) throws HyracksDataException {
+
+        outputAppender.reset(outputBuffer, true);
+        for (int i = 0; i < contents.length; i++) {
+            if (contents[i] == null) {
+                continue;
+            }
+            hashtableRecordAccessor.reset(contents[i]);
+            int tupleCount = hashtableRecordAccessor.getTupleCount();
+            for (int j = 0; j < tupleCount; j++) {
+                outputTupleBuilder.reset();
+
+                int tupleOffset = hashtableRecordAccessor.getTupleStartOffset(j);
+                int fieldOffset = hashtableRecordAccessor.getFieldCount() * INT_SIZE;
+
+                for (int k = 0; k < internalKeys.length; k++) {
+                    outputTupleBuilder.addField(hashtableRecordAccessor.getBuffer().array(), tupleOffset + fieldOffset
+                            + hashtableRecordAccessor.getFieldStartOffset(j, k),
+                            hashtableRecordAccessor.getFieldLength(j, k));
+                }
+
+                aggregator.outputFinalResult(outputTupleBuilder, hashtableRecordAccessor, j, aggState);
+
+                if (!outputAppender.append(outputTupleBuilder.getFieldEndOffsets(), outputTupleBuilder.getByteArray(),
+                        0, outputTupleBuilder.getSize())) {
+
+                    FrameUtils.flushFrame(outputBuffer, outputWriter);
+
+                    outputAppender.reset(outputBuffer, true);
+                    if (!outputAppender.append(outputTupleBuilder.getFieldEndOffsets(),
+                            outputTupleBuilder.getByteArray(), 0, outputTupleBuilder.getSize())) {
+                        throw new HyracksDataException("Failed to flush the hash table to the final output");
+                    }
+                }
+            }
+        }
+
+        if (outputAppender.getTupleCount() > 0) {
+
+            FrameUtils.flushFrame(outputBuffer, outputWriter);
+
+            outputAppender.reset(outputBuffer, true);
+        }
+
+        totalTupleCount = 0;
+        usedEntries = 0;
+    }
+
+    /**
+     * Flush hash table into a run file.
+     * 
+     * @throws HyracksDataException
+     */
+    protected void flush() throws HyracksDataException {
+
+        long methodTimer = System.nanoTime();
+
+        FileReference runFile;
+        try {
+            runFile = ctx.getJobletContext().createManagedWorkspaceFile(
+                    HybridHashSortGroupHashTable.class.getSimpleName());
+        } catch (IOException e) {
+            throw new HyracksDataException(e);
+        }
+        RunFileWriter runWriter = new RunFileWriter(runFile, ctx.getIOManager());
+        runWriter.open();
+        flushEntries(runWriter);
+        runWriter.close();
+        runReaders.add(runWriter.createReader());
+        reset();
+
+        ctx.getCounterContext()
+                .getCounter("optional." + HybridHashSortGroupHashTable.class.getSimpleName() + ".flush.time", true)
+                .update(System.nanoTime() - methodTimer);
+    }
+
+    private void flushEntries(IFrameWriter writer) throws HyracksDataException {
+
+        outputAppender.reset(outputBuffer, true);
+        for (int i = 0; i < tableSize; i++) {
+            int tupleInEntry = sortEntry(i);
+
+            for (int ptr = 0; ptr < tupleInEntry; ptr++) {
+                int frameIndex = tPointers[ptr * PTR_SIZE];
+                int tupleIndex = tPointers[ptr * PTR_SIZE + 1];
+
+                hashtableRecordAccessor.reset(contents[frameIndex]);
+                outputTupleBuilder.reset();
+
+                int tupleOffset = hashtableRecordAccessor.getTupleStartOffset(tupleIndex);
+                int fieldOffset = hashtableRecordAccessor.getFieldCount() * INT_SIZE;
+
+                for (int k = 0; k < internalKeys.length; k++) {
+                    outputTupleBuilder.addField(hashtableRecordAccessor.getBuffer().array(), tupleOffset + fieldOffset
+                            + hashtableRecordAccessor.getFieldStartOffset(tupleIndex, k),
+                            hashtableRecordAccessor.getFieldLength(tupleIndex, k));
+                }
+
+                aggregator.outputPartialResult(outputTupleBuilder, hashtableRecordAccessor, tupleIndex, aggState);
+
+                if (!outputAppender.append(outputTupleBuilder.getFieldEndOffsets(), outputTupleBuilder.getByteArray(),
+                        0, outputTupleBuilder.getSize())) {
+
+                    FrameUtils.flushFrame(outputBuffer, writer);
+
+                    outputAppender.reset(outputBuffer, true);
+                    if (!outputAppender.append(outputTupleBuilder.getFieldEndOffsets(),
+                            outputTupleBuilder.getByteArray(), 0, outputTupleBuilder.getSize())) {
+                        throw new HyracksDataException("Failed to flush an aggregation result.");
+                    }
+                }
+                totalTupleCount--;
+            }
+
+            if (tupleInEntry > 0) {
+                usedEntries--;
+            }
+        }
+
+        if (outputAppender.getTupleCount() > 0) {
+
+            FrameUtils.flushFrame(outputBuffer, writer);
+
+            outputAppender.reset(outputBuffer, true);
+        }
+    }
+
+    protected int sortEntry(int entryID) {
+
+        if (tPointers == null)
+            tPointers = new int[INIT_REF_COUNT * PTR_SIZE];
+        int ptr = 0;
+
+        int headerFrameIndex = entryID / frameSize * 2 * INT_SIZE + (entryID % frameSize) * 2 * INT_SIZE / frameSize;
+        int headerFrameOffset = (entryID % frameSize) * 2 * INT_SIZE % frameSize;
+
+        if (headers[headerFrameIndex] == null) {
+            return 0;
+        }
+
+        int entryFrameIndex = headers[headerFrameIndex].getInt(headerFrameOffset);
+        int entryTupleIndex = headers[headerFrameIndex].getInt(headerFrameOffset + INT_SIZE);
+
+        do {
+            if (entryFrameIndex < 0) {
+                break;
+            }
+            hashtableRecordAccessor.reset(contents[entryFrameIndex]);
+            tPointers[ptr * PTR_SIZE] = entryFrameIndex;
+            tPointers[ptr * PTR_SIZE + 1] = entryTupleIndex;
+            int tStart = hashtableRecordAccessor.getTupleStartOffset(entryTupleIndex);
+            int f0StartRel = hashtableRecordAccessor.getFieldStartOffset(entryTupleIndex, internalKeys[0]);
+            int f0EndRel = hashtableRecordAccessor.getFieldEndOffset(entryTupleIndex, internalKeys[0]);
+            int f0Start = f0StartRel + tStart + hashtableRecordAccessor.getFieldSlotsLength();
+            tPointers[ptr * PTR_SIZE + 2] = firstNormalizer == null ? 0 : firstNormalizer.normalize(
+                    hashtableRecordAccessor.getBuffer().array(), f0Start, f0EndRel - f0StartRel);
+
+            ptr++;
+
+            if (ptr * PTR_SIZE >= tPointers.length) {
+                int[] newTPointers = new int[tPointers.length * 2];
+                System.arraycopy(tPointers, 0, newTPointers, 0, tPointers.length);
+                tPointers = newTPointers;
+            }
+
+            // move to the next record
+            int refOffset = hashtableRecordAccessor.getTupleHashReferenceOffset(entryTupleIndex);
+            int prevFrameIndex = entryFrameIndex;
+            entryFrameIndex = contents[prevFrameIndex].getInt(refOffset);
+            entryTupleIndex = contents[prevFrameIndex].getInt(refOffset + INT_SIZE);
+
+        } while (true);
+
+        // sort records
+        if (ptr > 1) {
+            sort(0, ptr);
+        }
+
+        return ptr;
+    }
+
+    protected void sort(int offset, int len) {
+        int m = offset + (len >> 1);
+        int mFrameIndex = tPointers[m * PTR_SIZE];
+        int mTupleIndex = tPointers[m * PTR_SIZE + 1];
+        int mNormKey = tPointers[m * PTR_SIZE + 2];
+        compFrameAccessor1.reset(contents[mFrameIndex]);
+
+        int a = offset;
+        int b = a;
+        int c = offset + len - 1;
+        int d = c;
+        while (true) {
+            while (b <= c) {
+                int bFrameIndex = tPointers[b * PTR_SIZE];
+                int bTupleIndex = tPointers[b * PTR_SIZE + 1];
+                int bNormKey = tPointers[b * PTR_SIZE + 2];
+                int cmp = 0;
+                if (bNormKey != mNormKey) {
+                    cmp = ((((long) bNormKey) & 0xffffffffL) < (((long) mNormKey) & 0xffffffffL)) ? -1 : 1;
+                } else {
+                    compFrameAccessor2.reset(contents[bFrameIndex]);
+                    cmp = compare(compFrameAccessor2, bTupleIndex, compFrameAccessor1, mTupleIndex);
+                }
+                if (cmp > 0) {
+                    break;
+                }
+                if (cmp == 0) {
+                    swap(a++, b);
+                }
+                ++b;
+            }
+            while (c >= b) {
+                int cFrameIndex = tPointers[c * PTR_SIZE];
+                int cTupleIndex = tPointers[c * PTR_SIZE + 1];
+                int cNormKey = tPointers[c * PTR_SIZE + 2];
+                int cmp = 0;
+                if (cNormKey != mNormKey) {
+                    cmp = ((((long) cNormKey) & 0xffffffffL) < (((long) mNormKey) & 0xffffffffL)) ? -1 : 1;
+                } else {
+                    compFrameAccessor2.reset(contents[cFrameIndex]);
+                    cmp = compare(compFrameAccessor2, cTupleIndex, compFrameAccessor1, mTupleIndex);
+                }
+                if (cmp < 0) {
+                    break;
+                }
+                if (cmp == 0) {
+                    swap(c, d--);
+                }
+                --c;
+            }
+            if (b > c)
+                break;
+            swap(b++, c--);
+        }
+
+        int s;
+        int n = offset + len;
+        s = Math.min(a - offset, b - a);
+        vecswap(offset, b - s, s);
+        s = Math.min(d - c, n - d - 1);
+        vecswap(b, n - s, s);
+
+        if ((s = b - a) > 1) {
+            sort(offset, s);
+        }
+        if ((s = d - c) > 1) {
+            sort(n - s, s);
+        }
+    }
+
+    private void swap(int a, int b) {
+        for (int i = 0; i < PTR_SIZE; i++) {
+            int t = tPointers[a * PTR_SIZE + i];
+            tPointers[a * PTR_SIZE + i] = tPointers[b * PTR_SIZE + i];
+            tPointers[b * PTR_SIZE + i] = t;
+        }
+    }
+
+    private void vecswap(int a, int b, int n) {
+        for (int i = 0; i < n; i++, a++, b++) {
+            swap(a, b);
+        }
+    }
+
+    protected boolean findMatch(int entry, FrameTupleAccessor accessor, int tupleIndex) {
+
+        // reset the match pointer
+        matchPointer.frameIndex = -1;
+        matchPointer.tupleIndex = -1;
+
+        // get reference in the header
+        int headerFrameIndex = getHeaderFrameIndex(entry);
+        int headerFrameOffset = getHeaderTupleIndex(entry);
+
+        if (headers[headerFrameIndex] == null) {
+            return false;
+        }
+
+        // initialize the pointer to the first record 
+        int entryFrameIndex = headers[headerFrameIndex].getInt(headerFrameOffset);
+        int entryTupleIndex = headers[headerFrameIndex].getInt(headerFrameOffset + INT_SIZE);
+
+        while (entryFrameIndex >= 0) {
+            matchPointer.frameIndex = entryFrameIndex;
+            matchPointer.tupleIndex = entryTupleIndex;
+            hashtableRecordAccessor.reset(contents[entryFrameIndex]);
+
+            if (compare(accessor, tupleIndex, hashtableRecordAccessor, entryTupleIndex) == 0) {
+                return true;
+            }
+            // Move to the next record in this entry following the linked list
+            int refOffset = hashtableRecordAccessor.getTupleHashReferenceOffset(entryTupleIndex);
+            int prevFrameIndex = entryFrameIndex;
+            entryFrameIndex = contents[prevFrameIndex].getInt(refOffset);
+            entryTupleIndex = contents[prevFrameIndex].getInt(refOffset + INT_SIZE);
+        }
+
+        return false;
+    }
+
+    public LinkedList<RunFileReader> getRunFileReaders() {
+        return runReaders;
+    }
+
+    private int compare(FrameTupleAccessor accessor, int tupleIndex, FrameTupleAccessorForGroupHashtable hashAccessor,
+            int hashTupleIndex) {
+        int tStart0 = accessor.getTupleStartOffset(tupleIndex);
+        int fStartOffset0 = accessor.getFieldSlotsLength() + tStart0;
+
+        int tStart1 = hashAccessor.getTupleStartOffset(hashTupleIndex);
+        int fStartOffset1 = hashAccessor.getFieldSlotsLength() + tStart1;
+
+        for (int i = 0; i < keys.length; ++i) {
+            int fStart0 = accessor.getFieldStartOffset(tupleIndex, keys[i]);
+            int fEnd0 = accessor.getFieldEndOffset(tupleIndex, keys[i]);
+            int fLen0 = fEnd0 - fStart0;
+
+            int fStart1 = hashAccessor.getFieldStartOffset(hashTupleIndex, internalKeys[i]);
+            int fEnd1 = hashAccessor.getFieldEndOffset(hashTupleIndex, internalKeys[i]);
+            int fLen1 = fEnd1 - fStart1;
+
+            int c = comparators[i].compare(accessor.getBuffer().array(), fStart0 + fStartOffset0, fLen0, hashAccessor
+                    .getBuffer().array(), fStart1 + fStartOffset1, fLen1);
+            if (c != 0) {
+                return c;
+            }
+        }
+        return 0;
+    }
+
+    private int compare(FrameTupleAccessorForGroupHashtable accessor1, int tupleIndex1,
+            FrameTupleAccessorForGroupHashtable accessor2, int tupleIndex2) {
+        int tStart1 = accessor1.getTupleStartOffset(tupleIndex1);
+        int fStartOffset1 = accessor1.getFieldSlotsLength() + tStart1;
+
+        int tStart2 = accessor2.getTupleStartOffset(tupleIndex2);
+        int fStartOffset2 = accessor2.getFieldSlotsLength() + tStart2;
+
+        for (int i = 0; i < internalKeys.length; ++i) {
+            int fStart1 = accessor1.getFieldStartOffset(tupleIndex1, internalKeys[i]);
+            int fEnd1 = accessor1.getFieldEndOffset(tupleIndex1, internalKeys[i]);
+            int fLen1 = fEnd1 - fStart1;
+
+            int fStart2 = accessor2.getFieldStartOffset(tupleIndex2, internalKeys[i]);
+            int fEnd2 = accessor2.getFieldEndOffset(tupleIndex2, internalKeys[i]);
+            int fLen2 = fEnd2 - fStart2;
+
+            int c = comparators[i].compare(accessor1.getBuffer().array(), fStart1 + fStartOffset1, fLen1, accessor2
+                    .getBuffer().array(), fStart2 + fStartOffset2, fLen2);
+            if (c != 0) {
+                return c;
+            }
+        }
+        return 0;
+    }
+
+    public void reset() {
+        for (int i = 0; i < headers.length; i++) {
+            if (headers[i] != null) {
+                resetHeader(i);
+            }
+        }
+        for (int i = 0; i < contents.length; i++) {
+            if (contents[i] != null) {
+                contents[i].putInt(FrameHelper.getTupleCountOffset(frameSize), 0);
+            }
+        }
+
+        usedEntries = 0;
+        totalTupleCount = 0;
+        currentLargestFrameIndex = -1;
+    }
+
+    public void finishup() throws HyracksDataException {
+        if (runReaders.size() > 0) {
+            flush();
+        }
+
+        hashedKeys = 0;
+        hashedRawRec = 0;
+    }
+
+    /**
+     * Close the hash table. Note that only memory allocated by frames are freed. Aggregation
+     * states maintained in {@link #aggState} and run file readers in {@link #runReaders} should
+     * be valid for later processing.
+     */
+    public void close() throws HyracksDataException {
+        for (int i = 0; i < headers.length; i++) {
+            headers[i] = null;
+        }
+        for (int i = 0; i < contents.length; i++) {
+            contents[i] = null;
+        }
+        outputBuffer = null;
+        tPointers = null;
+    }
+
+    public int getTupleCount() {
+        return totalTupleCount;
+    }
+
+    public int getFrameSize() {
+        return headers.length + contents.length + 1;
+    }
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortGroupOperatorDescriptor.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortGroupOperatorDescriptor.java
new file mode 100644
index 0000000..5296c9f
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortGroupOperatorDescriptor.java
@@ -0,0 +1,275 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hashsort;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.LinkedList;
+
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.ActivityId;
+import edu.uci.ics.hyracks.api.dataflow.IActivityGraphBuilder;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
+import edu.uci.ics.hyracks.api.dataflow.TaskId;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputerFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.JobId;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.io.RunFileReader;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractActivityNode;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractStateObject;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
+
+public class HybridHashSortGroupOperatorDescriptor extends AbstractOperatorDescriptor {
+
+    private static final int AGGREGATE_ACTIVITY_ID = 0;
+
+    private static final int MERGE_ACTIVITY_ID = 1;
+
+    private static final long serialVersionUID = 1L;
+    private final int[] keyFields, storedKeyFields;
+    private final INormalizedKeyComputerFactory firstNormalizerFactory;
+
+    private final IAggregatorDescriptorFactory aggregatorFactory;
+    private final IAggregatorDescriptorFactory mergerFactory;
+
+    private final ITuplePartitionComputerFactory aggTpcf, mergeTpcf;
+
+    private final int framesLimit;
+    private final IBinaryComparatorFactory[] comparatorFactories;
+
+    private final int tableSize;
+
+    private final boolean isLoadOptimized;
+
+    public HybridHashSortGroupOperatorDescriptor(JobSpecification spec, int[] keyFields, int framesLimit,
+            int tableSize, IBinaryComparatorFactory[] comparatorFactories, ITuplePartitionComputerFactory aggTpcf,
+            ITuplePartitionComputerFactory mergeTpcf, IAggregatorDescriptorFactory aggregatorFactory,
+            IAggregatorDescriptorFactory mergerFactory, RecordDescriptor recordDescriptor) {
+        this(spec, keyFields, framesLimit, tableSize, comparatorFactories, aggTpcf, mergeTpcf, null, aggregatorFactory,
+                mergerFactory, recordDescriptor, false);
+    }
+
+    public HybridHashSortGroupOperatorDescriptor(JobSpecification spec, int[] keyFields, int framesLimit,
+            int tableSize, IBinaryComparatorFactory[] comparatorFactories, ITuplePartitionComputerFactory aggTpcf,
+            ITuplePartitionComputerFactory mergeTpcf, INormalizedKeyComputerFactory firstNormalizerFactory,
+            IAggregatorDescriptorFactory aggregatorFactory, IAggregatorDescriptorFactory mergerFactory,
+            RecordDescriptor recordDescriptor) {
+        this(spec, keyFields, framesLimit, tableSize, comparatorFactories, aggTpcf, mergeTpcf, firstNormalizerFactory,
+                aggregatorFactory, mergerFactory, recordDescriptor, false);
+    }
+
+    public HybridHashSortGroupOperatorDescriptor(JobSpecification spec, int[] keyFields, int framesLimit,
+            int tableSize, IBinaryComparatorFactory[] comparatorFactories, ITuplePartitionComputerFactory aggTpcf,
+            ITuplePartitionComputerFactory mergeTpcf, INormalizedKeyComputerFactory firstNormalizerFactory,
+            IAggregatorDescriptorFactory aggregatorFactory, IAggregatorDescriptorFactory mergerFactory,
+            RecordDescriptor recordDescriptor, boolean isLoadOpt) {
+        super(spec, 1, 1);
+        this.framesLimit = framesLimit;
+        if (framesLimit <= 2) {
+            /**
+             * Minimum of 3 frames: 2 for in-memory hash table, and 1 for output
+             * aggregation results.
+             */
+            throw new IllegalStateException("frame limit should at least be 3, but it is " + framesLimit + "!");
+        }
+
+        storedKeyFields = new int[keyFields.length];
+        for (int i = 0; i < storedKeyFields.length; i++) {
+            storedKeyFields[i] = i;
+        }
+        this.aggregatorFactory = aggregatorFactory;
+        this.mergerFactory = mergerFactory;
+        this.keyFields = keyFields;
+        this.comparatorFactories = comparatorFactories;
+        this.firstNormalizerFactory = firstNormalizerFactory;
+        this.aggTpcf = aggTpcf;
+        this.mergeTpcf = mergeTpcf;
+        this.tableSize = tableSize;
+
+        /**
+         * Set the record descriptor. Note that since this operator is a unary
+         * operator, only the first record descriptor is used here.
+         */
+        recordDescriptors[0] = recordDescriptor;
+
+        this.isLoadOptimized = isLoadOpt;
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor#contributeActivities(edu.uci.ics.hyracks.api.dataflow.
+     * IActivityGraphBuilder)
+     */
+    @Override
+    public void contributeActivities(IActivityGraphBuilder builder) {
+        AggregateActivity aggregateAct = new AggregateActivity(new ActivityId(getOperatorId(), AGGREGATE_ACTIVITY_ID));
+        MergeActivity mergeAct = new MergeActivity(new ActivityId(odId, MERGE_ACTIVITY_ID));
+
+        builder.addActivity(this, aggregateAct);
+        builder.addSourceEdge(0, aggregateAct, 0);
+
+        builder.addActivity(this, mergeAct);
+        builder.addTargetEdge(0, mergeAct, 0);
+
+        builder.addBlockingEdge(aggregateAct, mergeAct);
+    }
+
+    public static class AggregateActivityState extends AbstractStateObject {
+
+        private HybridHashSortGroupHashTable gTable;
+
+        public AggregateActivityState() {
+        }
+
+        private AggregateActivityState(JobId jobId, TaskId tId) {
+            super(jobId, tId);
+        }
+
+        @Override
+        public void toBytes(DataOutput out) throws IOException {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public void fromBytes(DataInput in) throws IOException {
+            throw new UnsupportedOperationException();
+        }
+    }
+
+    private class AggregateActivity extends AbstractActivityNode {
+
+        private static final long serialVersionUID = 1L;
+
+        public AggregateActivity(ActivityId id) {
+            super(id);
+        }
+
+        @Override
+        public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
+                final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions)
+                throws HyracksDataException {
+            return new AbstractUnaryInputSinkOperatorNodePushable() {
+
+                HybridHashSortGroupHashTable serializableGroupHashtable;
+
+                FrameTupleAccessor accessor;
+
+                @Override
+                public void open() throws HyracksDataException {
+
+                    RecordDescriptor inRecDesc = recordDescProvider.getInputRecordDescriptor(getActivityId(), 0);
+
+                    IBinaryComparator[] comparators = new IBinaryComparator[comparatorFactories.length];
+                    for (int i = 0; i < comparatorFactories.length; i++) {
+                        comparators[i] = comparatorFactories[i].createBinaryComparator();
+                    }
+
+                    serializableGroupHashtable = new HybridHashSortGroupHashTable(ctx, framesLimit, tableSize,
+                            keyFields, comparators, aggTpcf.createPartitioner(),
+                            firstNormalizerFactory.createNormalizedKeyComputer(), aggregatorFactory.createAggregator(
+                                    ctx, inRecDesc, recordDescriptors[0], keyFields, storedKeyFields), inRecDesc,
+                            recordDescriptors[0]);
+                    accessor = new FrameTupleAccessor(ctx.getFrameSize(), inRecDesc);
+                }
+
+                @Override
+                public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+                    accessor.reset(buffer);
+                    int tupleCount = accessor.getTupleCount();
+                    for (int i = 0; i < tupleCount; i++) {
+                        serializableGroupHashtable.insert(accessor, i);
+                    }
+                }
+
+                @Override
+                public void fail() throws HyracksDataException {
+                }
+
+                @Override
+                public void close() throws HyracksDataException {
+                    serializableGroupHashtable.finishup();
+                    AggregateActivityState state = new AggregateActivityState(ctx.getJobletContext().getJobId(),
+                            new TaskId(getActivityId(), partition));
+                    state.gTable = serializableGroupHashtable;
+                    ctx.setStateObject(state);
+                }
+            };
+        }
+    }
+
+    private class MergeActivity extends AbstractActivityNode {
+
+        private static final long serialVersionUID = 1L;
+
+        public MergeActivity(ActivityId id) {
+            super(id);
+        }
+
+        @Override
+        public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
+                IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions)
+                throws HyracksDataException {
+
+            return new AbstractUnaryOutputSourceOperatorNodePushable() {
+
+                public void initialize() throws HyracksDataException {
+
+                    AggregateActivityState aggState = (AggregateActivityState) ctx.getStateObject(new TaskId(
+                            new ActivityId(getOperatorId(), AGGREGATE_ACTIVITY_ID), partition));
+
+                    LinkedList<RunFileReader> runs = aggState.gTable.getRunFileReaders();
+
+                    writer.open();
+                    if (runs.size() <= 0) {
+                        aggState.gTable.flushHashtableToOutput(writer);
+                        aggState.gTable.close();
+                    } else {
+                        aggState.gTable.close();
+
+                        IBinaryComparator[] comparators = new IBinaryComparator[comparatorFactories.length];
+                        for (int i = 0; i < comparatorFactories.length; i++) {
+                            comparators[i] = comparatorFactories[i].createBinaryComparator();
+                        }
+
+                        HybridHashSortRunMerger merger = new HybridHashSortRunMerger(ctx, runs, storedKeyFields,
+                                comparators, recordDescriptors[0], mergeTpcf.createPartitioner(),
+                                mergerFactory.createAggregator(ctx, recordDescriptors[0], recordDescriptors[0],
+                                        storedKeyFields, storedKeyFields), framesLimit, tableSize, writer,
+                                isLoadOptimized);
+
+                        merger.process();
+                    }
+
+                    writer.close();
+                }
+
+            };
+        }
+    }
+
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortGrouperBucketMerge.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortGrouperBucketMerge.java
new file mode 100644
index 0000000..1de2237
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortGrouperBucketMerge.java
@@ -0,0 +1,488 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hashsort;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.LinkedList;
+import java.util.List;
+
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
+import edu.uci.ics.hyracks.dataflow.common.io.RunFileReader;
+import edu.uci.ics.hyracks.dataflow.common.io.RunFileWriter;
+import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.util.ReferenceEntry;
+
+public class HybridHashSortGrouperBucketMerge {
+
+    private final int[] keyFields;
+    private final IBinaryComparator[] comparators;
+
+    private final IAggregatorDescriptor merger;
+    private final AggregateState mergeState;
+
+    private final int framesLimit, tableSize;
+
+    private final RecordDescriptor inRecDesc;
+
+    private final IHyracksTaskContext ctx;
+
+    private final ArrayTupleBuilder tupleBuilder;
+
+    private final IFrameWriter outputWriter;
+
+    private final ITuplePartitionComputer tpc;
+
+    private final boolean isLoadOptimized;
+
+    List<ByteBuffer> inFrames;
+    ByteBuffer outFrame, writerFrame;
+    FrameTupleAppender outAppender, writerAppender;
+    LinkedList<RunFileReader> runs;
+    ArrayTupleBuilder finalTupleBuilder;
+    FrameTupleAccessor outFrameAccessor;
+    int[] currentFrameIndexInRun, currentRunFrames, currentBucketInRun;
+    int runFrameLimit = 1;
+
+    public HybridHashSortGrouperBucketMerge(IHyracksTaskContext ctx, int[] keyFields, int framesLimit, int tableSize,
+            ITuplePartitionComputer tpc, IBinaryComparator[] comparators, IAggregatorDescriptor merger,
+            RecordDescriptor inRecDesc, RecordDescriptor outRecDesc, IFrameWriter outputWriter)
+            throws HyracksDataException {
+        this.ctx = ctx;
+        this.framesLimit = framesLimit;
+        this.tableSize = tableSize;
+
+        this.keyFields = keyFields;
+        this.comparators = comparators;
+        this.merger = merger;
+        this.mergeState = merger.createAggregateStates();
+
+        this.inRecDesc = inRecDesc;
+
+        this.tupleBuilder = new ArrayTupleBuilder(inRecDesc.getFieldCount());
+
+        this.outAppender = new FrameTupleAppender(ctx.getFrameSize());
+
+        this.outputWriter = outputWriter;
+
+        this.outFrameAccessor = new FrameTupleAccessor(ctx.getFrameSize(), inRecDesc);
+
+        this.tpc = tpc;
+
+        this.isLoadOptimized = true;
+    }
+
+    public HybridHashSortGrouperBucketMerge(IHyracksTaskContext ctx, int[] keyFields, int framesLimit, int tableSize,
+            ITuplePartitionComputer tpc, IBinaryComparator[] comparators, IAggregatorDescriptor merger,
+            RecordDescriptor inRecDesc, RecordDescriptor outRecDesc, IFrameWriter outputWriter, boolean loadOptimized)
+            throws HyracksDataException {
+        this.ctx = ctx;
+        this.framesLimit = framesLimit;
+        this.tableSize = tableSize;
+
+        this.keyFields = keyFields;
+        this.comparators = comparators;
+        this.merger = merger;
+        this.mergeState = merger.createAggregateStates();
+
+        this.inRecDesc = inRecDesc;
+
+        this.tupleBuilder = new ArrayTupleBuilder(inRecDesc.getFieldCount());
+
+        this.outAppender = new FrameTupleAppender(ctx.getFrameSize());
+
+        this.outputWriter = outputWriter;
+
+        this.outFrameAccessor = new FrameTupleAccessor(ctx.getFrameSize(), inRecDesc);
+
+        this.tpc = tpc;
+
+        this.isLoadOptimized = loadOptimized;
+    }
+
+    public void initialize(LinkedList<RunFileReader> runFiles) throws HyracksDataException {
+
+        runs = runFiles;
+
+        try {
+            if (runs.size() <= 0) {
+                return;
+            } else {
+                inFrames = new ArrayList<ByteBuffer>();
+                outFrame = ctx.allocateFrame();
+                outAppender.reset(outFrame, true);
+                outFrameAccessor.reset(outFrame);
+                int runProcOffset = 0;
+                while (runs.size() > 0) {
+                    try {
+                        doPass(runs, runProcOffset);
+                        if (runs.size() + 2 <= framesLimit) {
+                            // final phase
+                            runProcOffset = 0;
+                        } else {
+                            // one more merge level
+                            runProcOffset++;
+                        }
+                    } catch (Exception e) {
+                        throw new HyracksDataException(e);
+                    }
+                }
+                inFrames.clear();
+            }
+        } catch (Exception e) {
+            outputWriter.fail();
+            throw new HyracksDataException(e);
+        } finally {
+            mergeState.close();
+        }
+    }
+
+    private void doPass(LinkedList<RunFileReader> runs, int offset) throws HyracksDataException {
+        FileReference newRun = null;
+        IFrameWriter writer = outputWriter;
+        boolean finalPass = false;
+
+        int runNumber = runs.size() - offset;
+
+        while (inFrames.size() + 2 < framesLimit) {
+            inFrames.add(ctx.allocateFrame());
+        }
+
+        if (runNumber + 2 <= framesLimit) {
+            finalPass = true;
+            if (isLoadOptimized)
+                runFrameLimit = (framesLimit - 2) / runNumber;
+            else
+                runFrameLimit = 1;
+        } else {
+            runFrameLimit = 1;
+            runNumber = framesLimit - 2;
+            newRun = ctx.getJobletContext().createManagedWorkspaceFile(
+                    HybridHashSortGrouperBucketMerge.class.getSimpleName());
+            writer = new RunFileWriter(newRun, ctx.getIOManager());
+            writer.open();
+        }
+        try {
+            currentFrameIndexInRun = new int[runNumber];
+            currentRunFrames = new int[runNumber];
+            currentBucketInRun = new int[runNumber];
+            /**
+             * Create file readers for each input run file, only for
+             * the ones fit into the inFrames
+             */
+            RunFileReader[] runFileReaders = new RunFileReader[runNumber];
+            FrameTupleAccessor[] tupleAccessors = new FrameTupleAccessor[inFrames.size()];
+            Comparator<ReferenceHashEntry> comparator = createEntryComparator(comparators);
+            ReferencedBucketBasedPriorityQueue topTuples = new ReferencedBucketBasedPriorityQueue(ctx.getFrameSize(),
+                    inRecDesc, runNumber, comparator, tpc, tableSize);
+            /**
+             * current tuple index in each run
+             */
+            int[] tupleIndices = new int[runNumber];
+
+            for (int i = 0; i < runNumber; i++) {
+                int runIndex = i + offset;
+                tupleIndices[i] = 0;
+                // Load the run file
+                runFileReaders[i] = runs.get(runIndex);
+                runFileReaders[i].open();
+
+                currentRunFrames[i] = 0;
+                currentFrameIndexInRun[i] = i * runFrameLimit;
+                for (int j = 0; j < runFrameLimit; j++) {
+                    int frameIndex = currentFrameIndexInRun[i] + j;
+                    boolean hasNextFrame = runFileReaders[runIndex].nextFrame(inFrames.get(frameIndex));
+                    if (hasNextFrame) {
+                        tupleAccessors[frameIndex] = new FrameTupleAccessor(ctx.getFrameSize(), inRecDesc);
+                        tupleAccessors[frameIndex].reset(inFrames.get(frameIndex));
+                        currentRunFrames[i]++;
+                        if (j == 0) {
+                            currentBucketInRun[i] = tpc.partition(tupleAccessors[frameIndex], tupleIndices[i],
+                                    tableSize);
+                            setNextTopTuple(i, tupleIndices, runFileReaders, tupleAccessors, topTuples);
+                        }
+                    } else {
+                        break;
+                    }
+                }
+            }
+
+            /**
+             * Start merging
+             */
+            while (!topTuples.areRunsExhausted()) {
+                /**
+                 * Get the top record
+                 */
+                ReferenceEntry top = topTuples.peek();
+                int tupleIndex = top.getTupleIndex();
+                int runIndex = topTuples.peek().getRunid();
+
+                FrameTupleAccessor fta = top.getAccessor();
+
+                int currentTupleInOutFrame = outFrameAccessor.getTupleCount() - 1;
+                if (currentTupleInOutFrame < 0
+                        || compareFrameTuples(fta, tupleIndex, outFrameAccessor, currentTupleInOutFrame) != 0) {
+
+                    tupleBuilder.reset();
+
+                    for (int k = 0; k < keyFields.length; k++) {
+                        tupleBuilder.addField(fta, tupleIndex, keyFields[k]);
+                    }
+
+                    merger.init(tupleBuilder, fta, tupleIndex, mergeState);
+
+                    if (!outAppender.appendSkipEmptyField(tupleBuilder.getFieldEndOffsets(),
+                            tupleBuilder.getByteArray(), 0, tupleBuilder.getSize())) {
+                        flushOutFrame(writer, finalPass);
+                        if (!outAppender.appendSkipEmptyField(tupleBuilder.getFieldEndOffsets(),
+                                tupleBuilder.getByteArray(), 0, tupleBuilder.getSize())) {
+                            throw new HyracksDataException(
+                                    "The partial result is too large to be initialized in a frame.");
+                        }
+                    }
+
+                } else {
+                    /**
+                     * if new tuple is in the same group of the
+                     * current aggregator do merge and output to the
+                     * outFrame
+                     */
+
+                    merger.aggregate(fta, tupleIndex, outFrameAccessor, currentTupleInOutFrame, mergeState);
+
+                }
+                tupleIndices[runIndex]++;
+                setNextTopTuple(runIndex, tupleIndices, runFileReaders, tupleAccessors, topTuples);
+            }
+
+            if (outAppender.getTupleCount() > 0) {
+                flushOutFrame(writer, finalPass);
+                outAppender.reset(outFrame, true);
+            }
+
+            merger.close();
+
+            runs.subList(offset, runNumber).clear();
+            /**
+             * insert the new run file into the beginning of the run
+             * file list
+             */
+            if (!finalPass) {
+                runs.add(offset, ((RunFileWriter) writer).createReader());
+            }
+        } finally {
+            if (!finalPass) {
+                writer.close();
+            }
+            mergeState.reset();
+        }
+    }
+
+    private void flushOutFrame(IFrameWriter writer, boolean isFinal) throws HyracksDataException {
+
+        if (finalTupleBuilder == null) {
+            finalTupleBuilder = new ArrayTupleBuilder(inRecDesc.getFields().length);
+        }
+
+        if (writerFrame == null) {
+            writerFrame = ctx.allocateFrame();
+        }
+
+        if (writerAppender == null) {
+            writerAppender = new FrameTupleAppender(ctx.getFrameSize());
+        }
+        writerAppender.reset(writerFrame, true);
+
+        outFrameAccessor.reset(outFrame);
+
+        for (int i = 0; i < outFrameAccessor.getTupleCount(); i++) {
+
+            finalTupleBuilder.reset();
+
+            for (int k = 0; k < keyFields.length; k++) {
+                finalTupleBuilder.addField(outFrameAccessor, i, keyFields[k]);
+            }
+
+            if (isFinal) {
+
+                merger.outputFinalResult(finalTupleBuilder, outFrameAccessor, i, mergeState);
+
+            } else {
+
+                merger.outputPartialResult(finalTupleBuilder, outFrameAccessor, i, mergeState);
+            }
+
+            if (!writerAppender.appendSkipEmptyField(finalTupleBuilder.getFieldEndOffsets(),
+                    finalTupleBuilder.getByteArray(), 0, finalTupleBuilder.getSize())) {
+                FrameUtils.flushFrame(writerFrame, writer);
+                writerAppender.reset(writerFrame, true);
+                if (!writerAppender.appendSkipEmptyField(finalTupleBuilder.getFieldEndOffsets(),
+                        finalTupleBuilder.getByteArray(), 0, finalTupleBuilder.getSize())) {
+                    throw new HyracksDataException("Aggregation output is too large to be fit into a frame.");
+                }
+            }
+        }
+        if (writerAppender.getTupleCount() > 0) {
+            FrameUtils.flushFrame(writerFrame, writer);
+            writerAppender.reset(writerFrame, true);
+        }
+
+        outAppender.reset(outFrame, true);
+    }
+
+    private void setNextTopTuple(int runIndex, int[] tupleIndices, RunFileReader[] runCursors,
+            FrameTupleAccessor[] tupleAccessors, ReferencedBucketBasedPriorityQueue topTuples)
+            throws HyracksDataException {
+        int runStart = runIndex * runFrameLimit;
+        boolean existNext = false;
+        if (tupleAccessors[currentFrameIndexInRun[runIndex]] == null || runCursors[runIndex] == null) {
+            /**
+             * run already closed
+             */
+            existNext = false;
+        } else if (currentFrameIndexInRun[runIndex] - runStart < currentRunFrames[runIndex] - 1) {
+            /**
+             * not the last frame for this run
+             */
+            existNext = true;
+            if (tupleIndices[runIndex] >= tupleAccessors[currentFrameIndexInRun[runIndex]].getTupleCount()) {
+                tupleIndices[runIndex] = 0;
+                currentFrameIndexInRun[runIndex]++;
+            }
+        } else if (tupleIndices[runIndex] < tupleAccessors[currentFrameIndexInRun[runIndex]].getTupleCount()) {
+            /**
+             * the last frame has expired
+             */
+            existNext = true;
+        } else {
+            /**
+             * If all tuples in the targeting frame have been
+             * checked.
+             */
+            tupleIndices[runIndex] = 0;
+            currentFrameIndexInRun[runIndex] = runStart;
+            /**
+             * read in batch
+             */
+            currentRunFrames[runIndex] = 0;
+            for (int j = 0; j < runFrameLimit; j++) {
+                int frameIndex = currentFrameIndexInRun[runIndex] + j;
+                if (runCursors[runIndex].nextFrame(inFrames.get(frameIndex))) {
+                    tupleAccessors[frameIndex].reset(inFrames.get(frameIndex));
+                    existNext = true;
+                    currentRunFrames[runIndex]++;
+                } else {
+                    break;
+                }
+            }
+        }
+
+        if (existNext) {
+            topTuples.popAndReplace(tupleAccessors[currentFrameIndexInRun[runIndex]], tupleIndices[runIndex]);
+        } else {
+            topTuples.pop();
+            closeRun(runIndex, runCursors, tupleAccessors);
+        }
+    }
+
+    /**
+     * Close the run file, and also the corresponding readers and
+     * input frame.
+     * 
+     * @param index
+     * @param runCursors
+     * @param tupleAccessor
+     * @throws HyracksDataException
+     */
+    private void closeRun(int index, RunFileReader[] runCursors, IFrameTupleAccessor[] tupleAccessor)
+            throws HyracksDataException {
+        if (runCursors[index] != null) {
+            runCursors[index].close();
+            runCursors[index] = null;
+            int frameOffset = index * runFrameLimit;
+            for (int j = 0; j < runFrameLimit; j++) {
+                tupleAccessor[frameOffset + j] = null;
+            }
+        }
+    }
+
+    private int compareFrameTuples(IFrameTupleAccessor fta1, int j1, IFrameTupleAccessor fta2, int j2) {
+        byte[] b1 = fta1.getBuffer().array();
+        byte[] b2 = fta2.getBuffer().array();
+        for (int f = 0; f < keyFields.length; ++f) {
+            int fIdx = f;
+            int s1 = fta1.getTupleStartOffset(j1) + fta1.getFieldSlotsLength() + fta1.getFieldStartOffset(j1, fIdx);
+            int l1 = fta1.getFieldLength(j1, fIdx);
+            int s2 = fta2.getTupleStartOffset(j2) + fta2.getFieldSlotsLength() + fta2.getFieldStartOffset(j2, fIdx);
+            int l2_start = fta2.getFieldStartOffset(j2, fIdx);
+            int l2_end = fta2.getFieldEndOffset(j2, fIdx);
+            int l2 = l2_end - l2_start;
+            int c = comparators[f].compare(b1, s1, l1, b2, s2, l2);
+            if (c != 0) {
+                return c;
+            }
+        }
+        return 0;
+    }
+
+    private Comparator<ReferenceHashEntry> createEntryComparator(final IBinaryComparator[] comparators) {
+        return new Comparator<ReferenceHashEntry>() {
+
+            @Override
+            public int compare(ReferenceHashEntry o1, ReferenceHashEntry o2) {
+                int cmp = o1.getHashValue() - o2.getHashValue();
+                if (cmp != 0) {
+                    return cmp;
+                } else {
+                    FrameTupleAccessor fta1 = (FrameTupleAccessor) o1.getAccessor();
+                    FrameTupleAccessor fta2 = (FrameTupleAccessor) o2.getAccessor();
+                    int j1 = o1.getTupleIndex();
+                    int j2 = o2.getTupleIndex();
+                    byte[] b1 = fta1.getBuffer().array();
+                    byte[] b2 = fta2.getBuffer().array();
+                    for (int f = 0; f < keyFields.length; ++f) {
+                        int fIdx = f;
+                        int s1 = fta1.getTupleStartOffset(j1) + fta1.getFieldSlotsLength()
+                                + fta1.getFieldStartOffset(j1, fIdx);
+                        int l1 = fta1.getFieldEndOffset(j1, fIdx) - fta1.getFieldStartOffset(j1, fIdx);
+                        int s2 = fta2.getTupleStartOffset(j2) + fta2.getFieldSlotsLength()
+                                + fta2.getFieldStartOffset(j2, fIdx);
+                        int l2 = fta2.getFieldEndOffset(j2, fIdx) - fta2.getFieldStartOffset(j2, fIdx);
+                        int c = comparators[f].compare(b1, s1, l1, b2, s2, l2);
+                        if (c != 0) {
+                            return c;
+                        }
+                    }
+                    return 0;
+                }
+            }
+
+        };
+    }
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortRunMerger.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortRunMerger.java
new file mode 100644
index 0000000..a846e4a
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortRunMerger.java
@@ -0,0 +1,144 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hashsort;
+
+import java.nio.ByteBuffer;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import edu.uci.ics.hyracks.api.comm.IFrameReader;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
+import edu.uci.ics.hyracks.dataflow.common.io.RunFileReader;
+import edu.uci.ics.hyracks.dataflow.common.io.RunFileWriter;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
+
+public class HybridHashSortRunMerger {
+
+    private final IHyracksTaskContext ctx;
+    private final List<RunFileReader> runs;
+    private final int[] keyFields;
+    private final IBinaryComparator[] comparators;
+    private final RecordDescriptor recordDesc;
+    private final int framesLimit;
+    private final int tableSize;
+    private final IFrameWriter writer;
+    private final IAggregatorDescriptor grouper;
+    private final ITuplePartitionComputer tpc;
+    private ByteBuffer outFrame;
+    private FrameTupleAppender outFrameAppender;
+    private final boolean isLoadBuffered;
+
+    public HybridHashSortRunMerger(IHyracksTaskContext ctx, LinkedList<RunFileReader> runs, int[] keyFields,
+            IBinaryComparator[] comparators, RecordDescriptor recordDesc, ITuplePartitionComputer tpc,
+            IAggregatorDescriptor grouper, int framesLimit, int tableSize, IFrameWriter writer, boolean isLoadBuffered) {
+        this.ctx = ctx;
+        this.runs = runs;
+        this.keyFields = keyFields;
+        this.comparators = comparators;
+        this.recordDesc = recordDesc;
+        this.framesLimit = framesLimit;
+        this.writer = writer;
+        this.isLoadBuffered = isLoadBuffered;
+        this.tableSize = tableSize;
+        this.tpc = tpc;
+        this.grouper = grouper;
+    }
+
+    public void process() throws HyracksDataException {
+        
+        // FIXME
+        int mergeLevels = 0, mergeRunCount = 0;
+        try {
+
+            outFrame = ctx.allocateFrame();
+            outFrameAppender = new FrameTupleAppender(ctx.getFrameSize());
+            outFrameAppender.reset(outFrame, true);
+
+            int maxMergeWidth = framesLimit - 1;
+            while (runs.size() > maxMergeWidth) {
+                int generationSeparator = 0;
+                // FIXME
+                int mergeRounds = 0;
+                while (generationSeparator < runs.size() && runs.size() > maxMergeWidth) {
+                    int mergeWidth = Math.min(Math.min(runs.size() - generationSeparator, maxMergeWidth), runs.size()
+                            - maxMergeWidth + 1);
+                    FileReference newRun = null;
+                    IFrameWriter mergeResultWriter = this.writer;
+                    newRun = ctx.createManagedWorkspaceFile(HybridHashSortRunMerger.class.getSimpleName());
+                    mergeResultWriter = new RunFileWriter(newRun, ctx.getIOManager());
+                    mergeResultWriter.open();
+                    IFrameReader[] runCursors = new RunFileReader[mergeWidth];
+                    for (int i = 0; i < mergeWidth; i++) {
+                        runCursors[i] = runs.get(generationSeparator + i);
+                    }
+                    merge(mergeResultWriter, runCursors, false);
+                    runs.subList(generationSeparator, generationSeparator + mergeWidth).clear();
+                    runs.add(generationSeparator++, ((RunFileWriter) mergeResultWriter).createReader());
+                    mergeRounds++;
+                }
+                mergeLevels++;
+                mergeRunCount += mergeRounds;
+            }
+            if (!runs.isEmpty()) {
+                IFrameReader[] runCursors = new RunFileReader[runs.size()];
+                for (int i = 0; i < runCursors.length; i++) {
+                    runCursors[i] = runs.get(i);
+                }
+                merge(writer, runCursors, true);
+            }
+        } catch (Exception e) {
+            writer.fail();
+            throw new HyracksDataException(e);
+        } finally {
+
+            ctx.getCounterContext()
+                    .getCounter("optional." + HybridHashSortRunMerger.class.getSimpleName() + ".merge.runs.count", true)
+                    .set(mergeRunCount);
+
+            ctx.getCounterContext()
+                    .getCounter("optional." + HybridHashSortRunMerger.class.getSimpleName() + ".merge.levels", true)
+                    .set(mergeLevels);
+        }
+    }
+
+    private void merge(IFrameWriter mergeResultWriter, IFrameReader[] runCursors, boolean isFinal)
+            throws HyracksDataException {
+        // FIXME
+        long methodTimer = System.nanoTime();
+
+        IFrameReader merger = new GroupRunMergingFrameReader(ctx, runCursors, framesLimit, tableSize, keyFields, tpc,
+                comparators, grouper, recordDesc, isFinal, isLoadBuffered);
+        merger.open();
+        try {
+            while (merger.nextFrame(outFrame)) {
+                FrameUtils.flushFrame(outFrame, mergeResultWriter);
+            }
+        } finally {
+            merger.close();
+        }
+        ctx.getCounterContext()
+                .getCounter("optional." + HybridHashSortRunMerger.class.getSimpleName() + ".merge.time", true)
+                .update(System.nanoTime() - methodTimer);
+    }
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferenceEntryWithBucketID.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferenceEntryWithBucketID.java
new file mode 100644
index 0000000..3c91fea
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferenceEntryWithBucketID.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hashsort;
+
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.std.util.ReferenceEntry;
+
+public class ReferenceEntryWithBucketID extends ReferenceEntry {
+
+    private int bucketID;
+
+    public ReferenceEntryWithBucketID(int runid, FrameTupleAccessor fta, int tupleIndex, int bucketID) {
+        super(runid, fta, tupleIndex);
+        this.bucketID = bucketID;
+    }
+
+    public int getBucketID() {
+        return bucketID;
+    }
+
+    public void setBucketID(int bucketID) {
+        this.bucketID = bucketID;
+    }
+
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferenceHashEntry.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferenceHashEntry.java
new file mode 100644
index 0000000..394f0a8
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferenceHashEntry.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hashsort;
+
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.std.util.ReferenceEntry;
+
+public class ReferenceHashEntry extends ReferenceEntry {
+
+    private int hashValue;
+
+    public ReferenceHashEntry(int runid, FrameTupleAccessor fta, int tupleIndex, int hashVal) {
+        super(runid, fta, tupleIndex);
+        this.hashValue = hashVal;
+    }
+
+    public int getHashValue() {
+        return hashValue;
+    }
+
+    public void setHashValue(int hashVal) {
+        this.hashValue = hashVal;
+    }
+
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferencedBucketBasedPriorityQueue.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferencedBucketBasedPriorityQueue.java
new file mode 100644
index 0000000..adfbe81
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferencedBucketBasedPriorityQueue.java
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hashsort;
+
+import java.io.IOException;
+import java.util.BitSet;
+import java.util.Comparator;
+
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.std.util.ReferenceEntry;
+
+public class ReferencedBucketBasedPriorityQueue {
+
+    private final int frameSize;
+    private final RecordDescriptor recordDescriptor;
+    private final ReferenceHashEntry entries[];
+    private final int size;
+    private final BitSet runAvail;
+    private int nItems;
+    private final int tableSize;
+
+    private final Comparator<ReferenceHashEntry> comparator;
+
+    private final ITuplePartitionComputer tpc;
+
+    public ReferencedBucketBasedPriorityQueue(int frameSize, RecordDescriptor recordDescriptor, int initSize,
+            Comparator<ReferenceHashEntry> comparator, ITuplePartitionComputer tpc, int tableSize) {
+        this.frameSize = frameSize;
+        this.recordDescriptor = recordDescriptor;
+        if (initSize < 1)
+            throw new IllegalArgumentException();
+        this.comparator = comparator;
+        nItems = initSize;
+        size = (initSize + 1) & 0xfffffffe;
+        entries = new ReferenceHashEntry[size];
+        runAvail = new BitSet(size);
+        runAvail.set(0, initSize, true);
+        for (int i = 0; i < size; i++) {
+            entries[i] = new ReferenceHashEntry(i, null, -1, -1);
+        }
+        this.tpc = tpc;
+        this.tableSize = tableSize;
+    }
+
+    /**
+     * Retrieve the top entry without removing it
+     * 
+     * @return the top entry
+     */
+    public ReferenceEntry peek() {
+        return entries[0];
+    }
+
+    /**
+     * compare the new entry with entries within the queue, to find a spot for
+     * this new entry
+     * 
+     * @param entry
+     * @return runid of this entry
+     * @throws HyracksDataException
+     * @throws IOException
+     */
+    public int popAndReplace(FrameTupleAccessor fta, int tIndex) throws HyracksDataException {
+        ReferenceHashEntry entry = entries[0];
+        if (entry.getAccessor() == null) {
+            entry.setAccessor(new FrameTupleAccessor(frameSize, recordDescriptor));
+        }
+        entry.getAccessor().reset(fta.getBuffer());
+        entry.setTupleIndex(tIndex);
+        entry.setHashValue(tpc.partition(fta, tIndex, tableSize));
+
+        add(entry);
+        return entry.getRunid();
+    }
+
+    /**
+     * Push entry into priority queue
+     * 
+     * @param e
+     *            the new Entry
+     * @throws HyracksDataException
+     */
+    private void add(ReferenceHashEntry e) throws HyracksDataException {
+        ReferenceHashEntry min = entries[0];
+        int slot = (size >> 1) + (min.getRunid() >> 1);
+
+        ReferenceHashEntry curr = e;
+        while (!runAvail.isEmpty() && slot > 0) {
+            int c = 0;
+            if (!runAvail.get(entries[slot].getRunid())) {
+                // run of entries[slot] is exhausted, i.e. not available, curr
+                // wins
+                c = 1;
+            } else if (entries[slot].getAccessor() != null /*
+                                                            * entries[slot] is
+                                                            * not MIN value
+                                                            */
+                    && runAvail.get(curr.getRunid() /* curr run is available */)) {
+
+                if (curr.getAccessor() != null) {
+                    c = comparator.compare(entries[slot], curr);
+                } else {
+                    // curr is MIN value, wins
+                    c = 1;
+                }
+            }
+
+            if (c <= 0) { // curr lost
+                // entries[slot] swaps up
+                ReferenceHashEntry tmp = entries[slot];
+                entries[slot] = curr;
+                curr = tmp;// winner to pass up
+            }// else curr wins
+            slot >>= 1;
+        }
+        // set new entries[0]
+        entries[0] = curr;
+    }
+
+    /**
+     * Pop is called only when a run is exhausted
+     * 
+     * @return
+     * @throws HyracksDataException
+     */
+    public ReferenceHashEntry pop() throws HyracksDataException {
+        ReferenceHashEntry min = entries[0];
+        runAvail.clear(min.getRunid());
+        add(min);
+        nItems--;
+        return min;
+    }
+
+    public boolean areRunsExhausted() {
+        return runAvail.isEmpty();
+    }
+
+    public int size() {
+        return nItems;
+    }
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferencedPriorityQueue.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferencedPriorityQueue.java
new file mode 100644
index 0000000..d9d5118
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferencedPriorityQueue.java
@@ -0,0 +1,133 @@
+package edu.uci.ics.hyracks.dataflow.std.group.hashsort;
+
+import java.io.IOException;
+import java.util.BitSet;
+import java.util.Comparator;
+
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+
+/**
+ * TODO need to be merged with the ReferencedPriorityQueue in the util package
+ */
+public class ReferencedPriorityQueue {
+    private final int frameSize;
+    private final RecordDescriptor recordDescriptor;
+    private final ReferenceEntryWithBucketID entries[];
+    private final int size;
+    private final BitSet runAvail;
+    private int nItems;
+
+    private final Comparator<ReferenceEntryWithBucketID> comparator;
+
+    public ReferencedPriorityQueue(int frameSize, RecordDescriptor recordDescriptor, int initSize,
+            Comparator<ReferenceEntryWithBucketID> comparator) {
+        this.frameSize = frameSize;
+        this.recordDescriptor = recordDescriptor;
+        if (initSize < 1)
+            throw new IllegalArgumentException();
+        this.comparator = comparator;
+        nItems = initSize;
+        size = (initSize + 1) & 0xfffffffe;
+        entries = new ReferenceEntryWithBucketID[size];
+        runAvail = new BitSet(size);
+        runAvail.set(0, initSize, true);
+        for (int i = 0; i < size; i++) {
+            entries[i] = new ReferenceEntryWithBucketID(i, null, -1, -1);
+        }
+    }
+
+    /**
+     * Retrieve the top entry without removing it
+     * 
+     * @return the top entry
+     */
+    public ReferenceEntryWithBucketID peek() {
+        return entries[0];
+    }
+
+    /**
+     * compare the new entry with entries within the queue, to find a spot for
+     * this new entry
+     * 
+     * @param entry
+     * @return runid of this entry
+     * @throws IOException
+     */
+    public int popAndReplace(FrameTupleAccessor fta, int tIndex, int bucketID) {
+        ReferenceEntryWithBucketID entry = entries[0];
+        if (entry.getAccessor() == null) {
+            entry.setAccessor(new FrameTupleAccessor(frameSize, recordDescriptor));
+        }
+        entry.getAccessor().reset(fta.getBuffer());
+        entry.setTupleIndex(tIndex);
+        entry.setBucketID(bucketID);
+
+        add(entry);
+        return entry.getRunid();
+    }
+
+    /**
+     * Push entry into priority queue
+     * 
+     * @param e
+     *            the new Entry
+     */
+    private void add(ReferenceEntryWithBucketID e) {
+        ReferenceEntryWithBucketID min = entries[0];
+        int slot = (size >> 1) + (min.getRunid() >> 1);
+
+        ReferenceEntryWithBucketID curr = e;
+        while (!runAvail.isEmpty() && slot > 0) {
+            int c = 0;
+            if (!runAvail.get(entries[slot].getRunid())) {
+                // run of entries[slot] is exhausted, i.e. not available, curr
+                // wins
+                c = 1;
+            } else if (entries[slot].getAccessor() != null /*
+                                                            * entries[slot] is
+                                                            * not MIN value
+                                                            */
+                    && runAvail.get(curr.getRunid() /* curr run is available */)) {
+
+                if (curr.getAccessor() != null) {
+                    c = comparator.compare(entries[slot], curr);
+                } else {
+                    // curr is MIN value, wins
+                    c = 1;
+                }
+            }
+
+            if (c <= 0) { // curr lost
+                // entries[slot] swaps up
+                ReferenceEntryWithBucketID tmp = entries[slot];
+                entries[slot] = curr;
+                curr = tmp;// winner to pass up
+            }// else curr wins
+            slot >>= 1;
+        }
+        // set new entries[0]
+        entries[0] = curr;
+    }
+
+    /**
+     * Pop is called only when a run is exhausted
+     * 
+     * @return
+     */
+    public ReferenceEntryWithBucketID pop() {
+        ReferenceEntryWithBucketID min = entries[0];
+        runAvail.clear(min.getRunid());
+        add(min);
+        nItems--;
+        return min;
+    }
+
+    public boolean areRunsExhausted() {
+        return runAvail.isEmpty();
+    }
+
+    public int size() {
+        return nItems;
+    }
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/FrameTupleAccessorForGroupHashtable.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/FrameTupleAccessorForGroupHashtable.java
new file mode 100644
index 0000000..72bae76
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/FrameTupleAccessorForGroupHashtable.java
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hybridhash;
+
+import java.nio.ByteBuffer;
+
+import edu.uci.ics.hyracks.api.comm.FrameHelper;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+
+public class FrameTupleAccessorForGroupHashtable implements IFrameTupleAccessor {
+    private final int frameSize;
+    private final RecordDescriptor recordDescriptor;
+
+    private final static int INT_SIZE = 4;
+
+    private ByteBuffer buffer;
+
+    public FrameTupleAccessorForGroupHashtable(int frameSize, RecordDescriptor recordDescriptor) {
+        this.frameSize = frameSize;
+        this.recordDescriptor = recordDescriptor;
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#getFieldCount()
+     */
+    @Override
+    public int getFieldCount() {
+        return recordDescriptor.getFieldCount();
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#getFieldSlotsLength()
+     */
+    @Override
+    public int getFieldSlotsLength() {
+        return getFieldCount() * 4;
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#getFieldEndOffset(int, int)
+     */
+    @Override
+    public int getFieldEndOffset(int tupleIndex, int fIdx) {
+        return buffer.getInt(getTupleStartOffset(tupleIndex) + fIdx * 4);
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#getFieldStartOffset(int, int)
+     */
+    @Override
+    public int getFieldStartOffset(int tupleIndex, int fIdx) {
+        return fIdx == 0 ? 0 : buffer.getInt(getTupleStartOffset(tupleIndex) + (fIdx - 1) * 4);
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#getFieldLength(int, int)
+     */
+    @Override
+    public int getFieldLength(int tupleIndex, int fIdx) {
+        return getFieldEndOffset(tupleIndex, fIdx) - getFieldStartOffset(tupleIndex, fIdx);
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#getTupleEndOffset(int)
+     */
+    @Override
+    public int getTupleEndOffset(int tupleIndex) {
+        return buffer.getInt(FrameHelper.getTupleCountOffset(frameSize) - 4 * (tupleIndex + 1)) - 2 * INT_SIZE;
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#getTupleStartOffset(int)
+     */
+    @Override
+    public int getTupleStartOffset(int tupleIndex) {
+        return tupleIndex == 0 ? 0 : buffer.getInt(FrameHelper.getTupleCountOffset(frameSize) - 4 * tupleIndex);
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#getTupleCount()
+     */
+    @Override
+    public int getTupleCount() {
+        return buffer.getInt(FrameHelper.getTupleCountOffset(frameSize));
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#getBuffer()
+     */
+    @Override
+    public ByteBuffer getBuffer() {
+        return buffer;
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#reset(java.nio.ByteBuffer)
+     */
+    @Override
+    public void reset(ByteBuffer buffer) {
+        this.buffer = buffer;
+    }
+
+    public int getTupleHashReferenceOffset(int tupleIndex) {
+        return getTupleEndOffset(tupleIndex);
+    }
+
+    public int getTupleEndOffsetWithHashReference(int tupleIndex) {
+        return buffer.getInt(FrameHelper.getTupleCountOffset(frameSize) - 4 * (tupleIndex + 1));
+    }
+
+    public int getHashReferenceNextFrameIndex(int tupleIndex) {
+        return buffer.getInt(getTupleHashReferenceOffset(tupleIndex));
+    }
+
+    public int getHashReferenceNextTupleIndex(int tupleIndex) {
+        return buffer.getInt(getTupleHashReferenceOffset(tupleIndex) + INT_SIZE);
+    }
+
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/FrameTupleAppenderForGroupHashtable.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/FrameTupleAppenderForGroupHashtable.java
new file mode 100644
index 0000000..c5668f5
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/FrameTupleAppenderForGroupHashtable.java
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hybridhash;
+
+import java.nio.ByteBuffer;
+
+import edu.uci.ics.hyracks.api.comm.FrameHelper;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+
+public class FrameTupleAppenderForGroupHashtable {
+    private final int frameSize;
+
+    private ByteBuffer buffer;
+
+    private int tupleCount;
+
+    private int tupleDataEndOffset;
+
+    public FrameTupleAppenderForGroupHashtable(int frameSize) {
+        this.frameSize = frameSize;
+    }
+
+    public void reset(ByteBuffer buffer, boolean clear) {
+        this.buffer = buffer;
+        if (clear) {
+            buffer.putInt(FrameHelper.getTupleCountOffset(frameSize), 0);
+            tupleCount = 0;
+            tupleDataEndOffset = 0;
+        } else {
+            tupleCount = buffer.getInt(FrameHelper.getTupleCountOffset(frameSize));
+            tupleDataEndOffset = tupleCount == 0 ? 0 : buffer.getInt(FrameHelper.getTupleCountOffset(frameSize)
+                    - tupleCount * 4);
+        }
+    }
+
+    public boolean append(int[] fieldSlots, byte[] bytes, int offset, int length) {
+        if (tupleDataEndOffset + fieldSlots.length * 4 + length + 2 * 4 + 4 + (tupleCount + 1) * 4 <= frameSize) {
+            for (int i = 0; i < fieldSlots.length; ++i) {
+                buffer.putInt(tupleDataEndOffset + i * 4, fieldSlots[i]);
+            }
+            System.arraycopy(bytes, offset, buffer.array(), tupleDataEndOffset + fieldSlots.length * 4, length);
+            buffer.putInt(tupleDataEndOffset + fieldSlots.length * 4 + length, -1);
+            buffer.putInt(tupleDataEndOffset + fieldSlots.length * 4 + length + 4, -1);
+            tupleDataEndOffset += fieldSlots.length * 4 + length + 2 * 4;
+            buffer.putInt(FrameHelper.getTupleCountOffset(frameSize) - 4 * (tupleCount + 1), tupleDataEndOffset);
+            ++tupleCount;
+            buffer.putInt(FrameHelper.getTupleCountOffset(frameSize), tupleCount);
+            return true;
+        }
+        return false;
+    }
+
+    public boolean append(byte[] bytes, int offset, int length) {
+        if (tupleDataEndOffset + length + 2 * 4 + 4 + (tupleCount + 1) * 4 <= frameSize) {
+            System.arraycopy(bytes, offset, buffer.array(), tupleDataEndOffset, length);
+            buffer.putInt(tupleDataEndOffset + length, -1);
+            buffer.putInt(tupleDataEndOffset + length + 4, -1);
+            tupleDataEndOffset += length + 2 * 4;
+            buffer.putInt(FrameHelper.getTupleCountOffset(frameSize) - 4 * (tupleCount + 1), tupleDataEndOffset);
+            ++tupleCount;
+            buffer.putInt(FrameHelper.getTupleCountOffset(frameSize), tupleCount);
+            return true;
+        }
+        return false;
+    }
+
+    public boolean appendSkipEmptyField(int[] fieldSlots, byte[] bytes, int offset, int length) {
+        if (tupleDataEndOffset + fieldSlots.length * 4 + length + 2 * 4 + 4 + (tupleCount + 1) * 4 <= frameSize) {
+            int effectiveSlots = 0;
+            for (int i = 0; i < fieldSlots.length; ++i) {
+                if (fieldSlots[i] > 0) {
+                    buffer.putInt(tupleDataEndOffset + i * 4, fieldSlots[i]);
+                    effectiveSlots++;
+                }
+            }
+            System.arraycopy(bytes, offset, buffer.array(), tupleDataEndOffset + effectiveSlots * 4, length);
+            buffer.putInt(tupleDataEndOffset + effectiveSlots * 4 + length, -1);
+            buffer.putInt(tupleDataEndOffset + effectiveSlots * 4 + length + 4, -1);
+            tupleDataEndOffset += effectiveSlots * 4 + length + 2 * 4;
+            buffer.putInt(FrameHelper.getTupleCountOffset(frameSize) - 4 * (tupleCount + 1), tupleDataEndOffset);
+            ++tupleCount;
+            buffer.putInt(FrameHelper.getTupleCountOffset(frameSize), tupleCount);
+            return true;
+        }
+        return false;
+    }
+
+    public boolean append(IFrameTupleAccessor tupleAccessor, int tStartOffset, int tEndOffset) {
+        int length = tEndOffset - tStartOffset;
+        if (tupleDataEndOffset + length + 2 * 4 + 4 + (tupleCount + 1) * 4 <= frameSize) {
+            ByteBuffer src = tupleAccessor.getBuffer();
+            System.arraycopy(src.array(), tStartOffset, buffer.array(), tupleDataEndOffset, length);
+            buffer.putInt(tupleDataEndOffset + length, -1);
+            buffer.putInt(tupleDataEndOffset + length + 4, -1);
+            tupleDataEndOffset += length + 2 * 4;
+            buffer.putInt(FrameHelper.getTupleCountOffset(frameSize) - 4 * (tupleCount + 1), tupleDataEndOffset);
+            ++tupleCount;
+            buffer.putInt(FrameHelper.getTupleCountOffset(frameSize), tupleCount);
+            return true;
+        }
+        return false;
+    }
+
+    public boolean append(IFrameTupleAccessor tupleAccessor, int tIndex) {
+        int tStartOffset = tupleAccessor.getTupleStartOffset(tIndex);
+        int tEndOffset = tupleAccessor.getTupleEndOffset(tIndex);
+        return append(tupleAccessor, tStartOffset, tEndOffset);
+    }
+
+    public int getTupleCount() {
+        return tupleCount;
+    }
+
+    public ByteBuffer getBuffer() {
+        return buffer;
+    }
+}
+
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/HybridHashGroupHashTable.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/HybridHashGroupHashTable.java
new file mode 100644
index 0000000..b325b83
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/HybridHashGroupHashTable.java
@@ -0,0 +1,609 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hybridhash;
+
+import java.nio.ByteBuffer;
+import java.util.LinkedList;
+import java.util.List;
+
+import edu.uci.ics.hyracks.api.comm.IFrameReader;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputerFamily;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
+import edu.uci.ics.hyracks.dataflow.common.io.RunFileWriter;
+import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.structures.TuplePointer;
+
+public class HybridHashGroupHashTable implements IFrameWriter {
+
+    private final static int HEADER_REF_EMPTY = -1;
+
+    private static final int INT_SIZE = 4;
+
+    private IHyracksTaskContext ctx;
+
+    private final int frameSize;
+
+    private final int framesLimit;
+
+    private final int tableSize;
+
+    private final int numOfPartitions;
+
+    private final IFrameWriter outputWriter;
+
+    private final IBinaryComparator[] comparators;
+
+    /**
+     * index for keys
+     */
+    private final int[] inputKeys, internalKeys;
+
+    private final RecordDescriptor inputRecordDescriptor, outputRecordDescriptor;
+
+    /**
+     * hash partitioner for hashing
+     */
+    private final ITuplePartitionComputer hashComputer;
+
+    /**
+     * hash partitioner for partitioning
+     */
+    private final ITuplePartitionComputer partitionComputer;
+
+    /**
+     * Hashtable haders
+     */
+    private ByteBuffer[] headers;
+
+    /**
+     * buffers for hash table
+     */
+    private ByteBuffer[] contents;
+
+    /**
+     * output buffers for spilled partitions
+     */
+    private ByteBuffer[] spilledPartOutputBuffers;
+
+    /**
+     * run writers for spilled partitions
+     */
+    private RunFileWriter[] spilledPartRunWriters;
+
+    private int[] spilledPartRunSizeArrayInFrames;
+    private int[] spilledPartRunSizeArrayInTuples;
+
+    private List<IFrameReader> spilledPartRunReaders = null;
+    private List<Integer> spilledRunAggregatedPages = null;
+    private List<Integer> spilledPartRunSizesInFrames = null;
+    private List<Integer> spilledPartRunSizesInTuples = null;
+
+    /**
+     * index of the current working buffer in hash table
+     */
+    private int currentHashTableFrame;
+
+    /**
+     * Aggregation state
+     */
+    private AggregateState htAggregateState;
+
+    /**
+     * the aggregator
+     */
+    private final IAggregatorDescriptor aggregator;
+
+    /**
+     * records inserted into the in-memory hash table (for hashing and aggregation)
+     */
+    private int hashedRawRecords = 0;
+
+    /**
+     * in-memory part size in tuples
+     */
+    private int hashedKeys = 0;
+
+    /**
+     * Hash table tuple pointer
+     */
+    private TuplePointer matchPointer;
+
+    /**
+     * Frame tuple accessor for input data frames
+     */
+    private FrameTupleAccessor inputFrameTupleAccessor;
+
+    /**
+     * flag for whether the hash table if full
+     */
+    private boolean isHashtableFull;
+
+    /**
+     * flag for only partition (no aggregation and hashing)
+     */
+    private boolean isPartitionOnly;
+
+    /**
+     * Tuple accessor for hash table contents
+     */
+    private FrameTupleAccessorForGroupHashtable hashtableRecordAccessor;
+
+    private ArrayTupleBuilder internalTupleBuilder;
+
+    private FrameTupleAppender spilledPartInsertAppender;
+
+    private FrameTupleAppenderForGroupHashtable htInsertAppender;
+
+    public HybridHashGroupHashTable(IHyracksTaskContext ctx, int framesLimit, int tableSize, int numOfPartitions,
+            int[] keys, int hashSeedOffset, IBinaryComparator[] comparators, ITuplePartitionComputerFamily tpcFamily,
+            IAggregatorDescriptor aggregator, RecordDescriptor inputRecordDescriptor,
+            RecordDescriptor outputRecordDescriptor, IFrameWriter outputWriter) throws HyracksDataException {
+        this.ctx = ctx;
+        this.frameSize = ctx.getFrameSize();
+        this.tableSize = tableSize;
+        this.framesLimit = framesLimit;
+        this.numOfPartitions = numOfPartitions;
+        this.inputKeys = keys;
+        this.internalKeys = new int[keys.length];
+        for (int i = 0; i < internalKeys.length; i++) {
+            internalKeys[i] = i;
+        }
+
+        this.comparators = comparators;
+
+        this.inputRecordDescriptor = inputRecordDescriptor;
+        this.outputRecordDescriptor = outputRecordDescriptor;
+
+        this.outputWriter = outputWriter;
+
+        this.hashComputer = tpcFamily.createPartitioner(hashSeedOffset * 2);
+        this.partitionComputer = tpcFamily.createPartitioner(hashSeedOffset * 2 + 1);
+
+        this.aggregator = aggregator;
+
+    }
+
+    public static double getHashtableOverheadRatio(int tableSize, int frameSize, int framesLimit, int recordSizeInByte) {
+        int pagesForRecord = framesLimit - getHeaderPages(tableSize, frameSize);
+        int recordsInHashtable = (pagesForRecord - 1) * ((int) (frameSize / (recordSizeInByte + 2 * INT_SIZE)));
+
+        return (double) framesLimit * frameSize / recordsInHashtable / recordSizeInByte;
+    }
+
+    public static int getHeaderPages(int tableSize, int frameSize) {
+        return (int) Math.ceil((double)tableSize * INT_SIZE * 2 / frameSize);
+    }
+
+    @Override
+    public void open() throws HyracksDataException {
+        // initialize hash headers
+        int htHeaderCount = getHeaderPages(tableSize, frameSize);
+
+        isPartitionOnly = false;
+        if (numOfPartitions >= framesLimit - htHeaderCount) {
+            isPartitionOnly = true;
+        }
+
+        if (isPartitionOnly) {
+            htHeaderCount = 0;
+        }
+
+        headers = new ByteBuffer[htHeaderCount];
+
+        // initialize hash table contents
+        contents = new ByteBuffer[framesLimit - htHeaderCount - numOfPartitions];
+        currentHashTableFrame = 0;
+        isHashtableFull = false;
+
+        // initialize hash table aggregate state
+        htAggregateState = aggregator.createAggregateStates();
+
+        // initialize partition information
+        spilledPartOutputBuffers = new ByteBuffer[numOfPartitions];
+        spilledPartRunWriters = new RunFileWriter[numOfPartitions];
+        spilledPartRunSizeArrayInFrames = new int[numOfPartitions];
+        spilledPartRunSizeArrayInTuples = new int[numOfPartitions];
+
+        // initialize other helper classes
+        inputFrameTupleAccessor = new FrameTupleAccessor(frameSize, inputRecordDescriptor);
+        internalTupleBuilder = new ArrayTupleBuilder(outputRecordDescriptor.getFieldCount());
+        spilledPartInsertAppender = new FrameTupleAppender(frameSize);
+
+        htInsertAppender = new FrameTupleAppenderForGroupHashtable(frameSize);
+        matchPointer = new TuplePointer();
+        hashtableRecordAccessor = new FrameTupleAccessorForGroupHashtable(frameSize, outputRecordDescriptor);
+    }
+
+    @Override
+    public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+        inputFrameTupleAccessor.reset(buffer);
+        int tupleCount = inputFrameTupleAccessor.getTupleCount();
+        for (int i = 0; i < tupleCount; i++) {
+            insert(inputFrameTupleAccessor, i);
+        }
+    }
+
+    @Override
+    public void fail() throws HyracksDataException {
+        // TODO Auto-generated method stub
+
+    }
+
+    @Override
+    public void close() throws HyracksDataException {
+        for (int i = 0; i < numOfPartitions; i++) {
+            if (spilledPartRunWriters[i] != null) {
+                spilledPartRunWriters[i].close();
+            }
+        }
+        htAggregateState.close();
+    }
+
+    private void insert(FrameTupleAccessor accessor, int tupleIndex) throws HyracksDataException {
+
+        if (isPartitionOnly) {
+            // for partition only
+            int pid = partitionComputer.partition(accessor, tupleIndex, tableSize) % numOfPartitions;
+            insertSpilledPartition(accessor, tupleIndex, pid);
+            spilledPartRunSizeArrayInTuples[pid]++;
+            return;
+        }
+
+        int hid = hashComputer.partition(accessor, tupleIndex, tableSize);
+
+        if (findMatch(hid, accessor, tupleIndex)) {
+            // found a matching: do aggregation
+            hashtableRecordAccessor.reset(contents[matchPointer.frameIndex]);
+            aggregator.aggregate(accessor, tupleIndex, hashtableRecordAccessor, matchPointer.tupleIndex,
+                    htAggregateState);
+            hashedRawRecords++;
+        } else {
+            if (isHashtableFull) {
+                // when hash table is full: spill the record
+                int pid = partitionComputer.partition(accessor, tupleIndex, tableSize) % numOfPartitions;
+                insertSpilledPartition(accessor, tupleIndex, pid);
+                spilledPartRunSizeArrayInTuples[pid]++;
+            } else {
+                // insert a new entry into the hash table
+                internalTupleBuilder.reset();
+                for (int k = 0; k < inputKeys.length; k++) {
+                    internalTupleBuilder.addField(accessor, tupleIndex, inputKeys[k]);
+                }
+
+                aggregator.init(internalTupleBuilder, accessor, tupleIndex, htAggregateState);
+
+                if (contents[currentHashTableFrame] == null) {
+                    contents[currentHashTableFrame] = ctx.allocateFrame();
+                }
+
+                htInsertAppender.reset(contents[currentHashTableFrame], false);
+                if (!htInsertAppender.append(internalTupleBuilder.getFieldEndOffsets(),
+                        internalTupleBuilder.getByteArray(), 0, internalTupleBuilder.getSize())) {
+                    // hash table is full: try to allocate more frame
+                    currentHashTableFrame++;
+                    if (currentHashTableFrame >= contents.length) {
+                        // no more frame to allocate: stop expending the hash table
+                        isHashtableFull = true;
+
+                        // reinsert the record
+                        insert(accessor, tupleIndex);
+
+                        return;
+                    } else {
+                        if (contents[currentHashTableFrame] == null) {
+                            contents[currentHashTableFrame] = ctx.allocateFrame();
+                        }
+
+                        htInsertAppender.reset(contents[currentHashTableFrame], true);
+
+                        if (!htInsertAppender.append(internalTupleBuilder.getFieldEndOffsets(),
+                                internalTupleBuilder.getByteArray(), 0, internalTupleBuilder.getSize())) {
+                            throw new HyracksDataException(
+                                    "Failed to insert an aggregation partial result into the in-memory hash table: it has the length of "
+                                            + internalTupleBuilder.getSize() + " and fields "
+                                            + internalTupleBuilder.getFieldEndOffsets().length);
+                        }
+
+                    }
+                }
+
+                // update hash table reference
+                if (matchPointer.frameIndex < 0) {
+                    // need to initialize the hash table header
+                    int headerFrameIndex = getHeaderFrameIndex(hid);
+                    int headerFrameOffset = getHeaderTupleIndex(hid);
+
+                    if (headers[headerFrameIndex] == null) {
+                        headers[headerFrameIndex] = ctx.allocateFrame();
+                        resetHeader(headerFrameIndex);
+                    }
+
+                    headers[headerFrameIndex].putInt(headerFrameOffset, currentHashTableFrame);
+                    headers[headerFrameIndex]
+                            .putInt(headerFrameOffset + INT_SIZE, htInsertAppender.getTupleCount() - 1);
+                } else {
+                    // update the previous reference
+                    hashtableRecordAccessor.reset(contents[matchPointer.frameIndex]);
+                    int refOffset = hashtableRecordAccessor.getTupleHashReferenceOffset(matchPointer.tupleIndex);
+                    contents[matchPointer.frameIndex].putInt(refOffset, currentHashTableFrame);
+                    contents[matchPointer.frameIndex]
+                            .putInt(refOffset + INT_SIZE, htInsertAppender.getTupleCount() - 1);
+                }
+
+                hashedKeys++;
+                hashedRawRecords++;
+            }
+        }
+    }
+
+    /**
+     * Insert record into a spilled partition, by directly copying the tuple into the output buffer.
+     * 
+     * @param accessor
+     * @param tupleIndex
+     * @param pid
+     */
+    private void insertSpilledPartition(FrameTupleAccessor accessor, int tupleIndex, int pid)
+            throws HyracksDataException {
+
+        if (spilledPartOutputBuffers[pid] == null) {
+            spilledPartOutputBuffers[pid] = ctx.allocateFrame();
+        }
+
+        spilledPartInsertAppender.reset(spilledPartOutputBuffers[pid], false);
+
+        if (!spilledPartInsertAppender.append(accessor, tupleIndex)) {
+            // the output buffer is full: flush
+            flushSpilledPartitionOutputBuffer(pid);
+            // reset the output buffer
+            spilledPartInsertAppender.reset(spilledPartOutputBuffers[pid], true);
+
+            if (!spilledPartInsertAppender.append(accessor, tupleIndex)) {
+                throw new HyracksDataException("Failed to insert a record into a spilled partition!");
+            }
+        }
+
+    }
+
+    /**
+     * Flush a spilled partition's output buffer.
+     * 
+     * @param pid
+     * @throws HyracksDataException
+     */
+    private void flushSpilledPartitionOutputBuffer(int pid) throws HyracksDataException {
+        if (spilledPartRunWriters[pid] == null) {
+            spilledPartRunWriters[pid] = new RunFileWriter(
+                    ctx.createManagedWorkspaceFile("HashHashPrePartitionHashTable"), ctx.getIOManager());
+            spilledPartRunWriters[pid].open();
+        }
+
+        FrameUtils.flushFrame(spilledPartOutputBuffers[pid], spilledPartRunWriters[pid]);
+
+        spilledPartRunSizeArrayInFrames[pid]++;
+    }
+
+    /**
+     * Hash table lookup
+     * 
+     * @param hid
+     * @param accessor
+     * @param tupleIndex
+     * @return
+     */
+    private boolean findMatch(int hid, FrameTupleAccessor accessor, int tupleIndex) {
+
+        matchPointer.frameIndex = -1;
+        matchPointer.tupleIndex = -1;
+
+        // get reference in the header
+        int headerFrameIndex = getHeaderFrameIndex(hid);
+        int headerFrameOffset = getHeaderTupleIndex(hid);
+
+        if (headers[headerFrameIndex] == null) {
+            return false;
+        }
+
+        // initialize the pointer to the first record 
+        int entryFrameIndex = headers[headerFrameIndex].getInt(headerFrameOffset);
+        int entryTupleIndex = headers[headerFrameIndex].getInt(headerFrameOffset + INT_SIZE);
+
+        while (entryFrameIndex >= 0) {
+            matchPointer.frameIndex = entryFrameIndex;
+            matchPointer.tupleIndex = entryTupleIndex;
+            hashtableRecordAccessor.reset(contents[entryFrameIndex]);
+            if (compare(accessor, tupleIndex, hashtableRecordAccessor, entryTupleIndex) == 0) {
+                return true;
+            }
+            // Move to the next record in this entry following the linked list
+            int refOffset = hashtableRecordAccessor.getTupleHashReferenceOffset(entryTupleIndex);
+            int prevFrameIndex = entryFrameIndex;
+            entryFrameIndex = contents[prevFrameIndex].getInt(refOffset);
+            entryTupleIndex = contents[prevFrameIndex].getInt(refOffset + INT_SIZE);
+        }
+
+        return false;
+    }
+
+    public void finishup() throws HyracksDataException {
+        // spill all output buffers
+        for (int i = 0; i < numOfPartitions; i++) {
+            if (spilledPartOutputBuffers[i] != null) {
+                flushSpilledPartitionOutputBuffer(i);
+            }
+        }
+        spilledPartOutputBuffers = null;
+
+        // flush in-memory aggregation results: no more frame cost here as all output buffers are recycled
+        ByteBuffer outputBuffer = ctx.allocateFrame();
+        FrameTupleAppender outputBufferAppender = new FrameTupleAppender(frameSize);
+        outputBufferAppender.reset(outputBuffer, true);
+
+        ArrayTupleBuilder outFlushTupleBuilder = new ArrayTupleBuilder(outputRecordDescriptor.getFieldCount());
+
+        for (ByteBuffer htFrame : contents) {
+            if (htFrame == null) {
+                continue;
+            }
+            hashtableRecordAccessor.reset(htFrame);
+            int tupleCount = hashtableRecordAccessor.getTupleCount();
+            for (int i = 0; i < tupleCount; i++) {
+                outFlushTupleBuilder.reset();
+
+                for (int k = 0; k < internalKeys.length; k++) {
+                    outFlushTupleBuilder.addField(hashtableRecordAccessor, i, internalKeys[k]);
+                }
+
+                aggregator.outputFinalResult(outFlushTupleBuilder, hashtableRecordAccessor, i, htAggregateState);
+
+                if (!outputBufferAppender.append(outFlushTupleBuilder.getFieldEndOffsets(),
+                        outFlushTupleBuilder.getByteArray(), 0, outFlushTupleBuilder.getSize())) {
+                    FrameUtils.flushFrame(outputBuffer, outputWriter);
+                    outputBufferAppender.reset(outputBuffer, true);
+
+                    if (!outputBufferAppender.append(outFlushTupleBuilder.getFieldEndOffsets(),
+                            outFlushTupleBuilder.getByteArray(), 0, outFlushTupleBuilder.getSize())) {
+                        throw new HyracksDataException(
+                                "Failed to flush a record from in-memory hash table: record has length of "
+                                        + outFlushTupleBuilder.getSize() + " and fields "
+                                        + outFlushTupleBuilder.getFieldEndOffsets().length);
+                    }
+                }
+            }
+        }
+
+        if (outputBufferAppender.getTupleCount() > 0) {
+            FrameUtils.flushFrame(outputBuffer, outputWriter);
+        }
+
+        // create run readers and statistic information for spilled runs
+        spilledPartRunReaders = new LinkedList<IFrameReader>();
+        spilledRunAggregatedPages = new LinkedList<Integer>();
+        spilledPartRunSizesInFrames = new LinkedList<Integer>();
+        spilledPartRunSizesInTuples = new LinkedList<Integer>();
+        for (int i = 0; i < numOfPartitions; i++) {
+            if (spilledPartRunWriters[i] != null) {
+                spilledPartRunReaders.add(spilledPartRunWriters[i].createReader());
+                spilledRunAggregatedPages.add(0);
+                spilledPartRunWriters[i].close();
+                spilledPartRunSizesInFrames.add(spilledPartRunSizeArrayInFrames[i]);
+                spilledPartRunSizesInTuples.add(spilledPartRunSizeArrayInTuples[i]);
+            }
+        }
+    }
+
+    /**
+     * Compare an input record with a hash table entry.
+     * 
+     * @param accessor
+     * @param tupleIndex
+     * @param hashAccessor
+     * @param hashTupleIndex
+     * @return
+     */
+    private int compare(FrameTupleAccessor accessor, int tupleIndex, FrameTupleAccessorForGroupHashtable hashAccessor,
+            int hashTupleIndex) {
+        int tStart0 = accessor.getTupleStartOffset(tupleIndex);
+        int fStartOffset0 = accessor.getFieldSlotsLength() + tStart0;
+
+        int tStart1 = hashAccessor.getTupleStartOffset(hashTupleIndex);
+        int fStartOffset1 = hashAccessor.getFieldSlotsLength() + tStart1;
+
+        for (int i = 0; i < internalKeys.length; ++i) {
+            int fStart0 = accessor.getFieldStartOffset(tupleIndex, inputKeys[i]);
+            int fEnd0 = accessor.getFieldEndOffset(tupleIndex, inputKeys[i]);
+            int fLen0 = fEnd0 - fStart0;
+
+            int fStart1 = hashAccessor.getFieldStartOffset(hashTupleIndex, internalKeys[i]);
+            int fEnd1 = hashAccessor.getFieldEndOffset(hashTupleIndex, internalKeys[i]);
+            int fLen1 = fEnd1 - fStart1;
+
+            int c = comparators[i].compare(accessor.getBuffer().array(), fStart0 + fStartOffset0, fLen0, hashAccessor
+                    .getBuffer().array(), fStart1 + fStartOffset1, fLen1);
+            if (c != 0) {
+                return c;
+            }
+        }
+        return 0;
+    }
+
+    /**
+     * Get the header frame index of the given hash table entry
+     * 
+     * @param entry
+     * @return
+     */
+    private int getHeaderFrameIndex(int entry) {
+        int frameIndex = (entry / frameSize * 2 * INT_SIZE) + (entry % frameSize * 2 * INT_SIZE / frameSize);
+        return frameIndex;
+    }
+
+    /**
+     * Get the tuple index of the given hash table entry
+     * 
+     * @param entry
+     * @return
+     */
+    private int getHeaderTupleIndex(int entry) {
+        int offset = (entry % frameSize) * 2 * INT_SIZE % frameSize;
+        return offset;
+    }
+
+    /**
+     * reset the header page.
+     * 
+     * @param headerFrameIndex
+     */
+    private void resetHeader(int headerFrameIndex) {
+        for (int i = 0; i < frameSize; i += INT_SIZE) {
+            headers[headerFrameIndex].putInt(i, HEADER_REF_EMPTY);
+        }
+    }
+
+    public List<Integer> getSpilledRunsSizeInRawTuples() throws HyracksDataException {
+        return spilledPartRunSizesInTuples;
+    }
+
+    public int getHashedUniqueKeys() throws HyracksDataException {
+        return hashedKeys;
+    }
+
+    public int getHashedRawRecords() throws HyracksDataException {
+        return hashedRawRecords;
+    }
+
+    public List<Integer> getSpilledRunsAggregatedPages() throws HyracksDataException {
+        return spilledRunAggregatedPages;
+    }
+
+    public List<IFrameReader> getSpilledRuns() throws HyracksDataException {
+        return spilledPartRunReaders;
+    }
+
+    public List<Integer> getSpilledRunsSizeInPages() throws HyracksDataException {
+        return spilledPartRunSizesInFrames;
+    }
+
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/HybridHashGroupOperatorDescriptor.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/HybridHashGroupOperatorDescriptor.java
new file mode 100644
index 0000000..118ca75
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/HybridHashGroupOperatorDescriptor.java
@@ -0,0 +1,399 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hybridhash;
+
+import java.nio.ByteBuffer;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import edu.uci.ics.hyracks.api.comm.IFrameReader;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFamily;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputerFamily;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFamily;
+import edu.uci.ics.hyracks.dataflow.common.io.RunFileReader;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputUnaryOutputOperatorNodePushable;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
+import edu.uci.ics.hyracks.dataflow.std.group.hashsort.HybridHashSortGroupHashTable;
+import edu.uci.ics.hyracks.dataflow.std.group.hashsort.HybridHashSortRunMerger;
+
+public class HybridHashGroupOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor {
+
+    private static final long serialVersionUID = 1L;
+
+    private static final double HYBRID_FALLBACK_THRESHOLD = 0.8;
+
+    // merge with fudge factor
+    private static final double ESTIMATOR_MAGNIFIER = 1.2;
+
+    // input key fields
+    private final int[] keyFields;
+
+    // intermediate and final key fields
+    private final int[] storedKeyFields;
+
+    /**
+     * Input sizes as the count of the raw records.
+     */
+    private final long inputSizeInRawRecords;
+
+    /**
+     * Input size as the count of the unique keys.
+     */
+    private final long inputSizeInUniqueKeys;
+
+    // hash table size
+    private final int tableSize;
+
+    // estimated record size: used for compute the fudge factor
+    private final int userProvidedRecordSizeInBytes;
+
+    // aggregator
+    private final IAggregatorDescriptorFactory aggregatorFactory;
+
+    // merger, in case of falling back to the hash-sort algorithm for hash skewness
+    private final IAggregatorDescriptorFactory mergerFactory;
+
+    // for the sort fall-back algorithm
+    private final INormalizedKeyComputerFactory firstNormalizerFactory;
+
+    // total memory in pages
+    private final int framesLimit;
+
+    // comparator factories for key fields.
+    private final IBinaryComparatorFactory[] comparatorFactories;
+
+    /**
+     * hash families for each field: a hash function family is need as we may have
+     * more than one levels of hashing
+     */
+    private final IBinaryHashFunctionFamily[] hashFamilies;
+
+    /**
+     * Flag for input adjustment
+     */
+    private final boolean doInputAdjustment;
+
+    private final static double FUDGE_FACTOR_ESTIMATION = 1.2;
+
+    public HybridHashGroupOperatorDescriptor(JobSpecification spec, int[] keyFields, int framesLimit,
+            long inputSizeInRawRecords, long inputSizeInUniqueKeys, int recordSizeInBytes, int tableSize,
+            IBinaryComparatorFactory[] comparatorFactories, IBinaryHashFunctionFamily[] hashFamilies,
+            int hashFuncStartLevel, INormalizedKeyComputerFactory firstNormalizerFactory,
+            IAggregatorDescriptorFactory aggregatorFactory, IAggregatorDescriptorFactory mergerFactory,
+            RecordDescriptor outRecDesc) throws HyracksDataException {
+        this(spec, keyFields, framesLimit, inputSizeInRawRecords, inputSizeInUniqueKeys, recordSizeInBytes, tableSize,
+                comparatorFactories, hashFamilies, hashFuncStartLevel, firstNormalizerFactory, aggregatorFactory,
+                mergerFactory, outRecDesc, true);
+    }
+
+    public HybridHashGroupOperatorDescriptor(JobSpecification spec, int[] keyFields, int framesLimit,
+            long inputSizeInRawRecords, long inputSizeInUniqueKeys, int recordSizeInBytes, int tableSize,
+            IBinaryComparatorFactory[] comparatorFactories, IBinaryHashFunctionFamily[] hashFamilies,
+            int hashFuncStartLevel, INormalizedKeyComputerFactory firstNormalizerFactory,
+            IAggregatorDescriptorFactory aggregatorFactory, IAggregatorDescriptorFactory mergerFactory,
+            RecordDescriptor outRecDesc, boolean doInputAdjustment) throws HyracksDataException {
+        super(spec, 1, 1);
+        this.framesLimit = framesLimit;
+        this.tableSize = tableSize;
+        this.userProvidedRecordSizeInBytes = recordSizeInBytes;
+
+        this.inputSizeInRawRecords = inputSizeInRawRecords;
+        this.inputSizeInUniqueKeys = inputSizeInUniqueKeys;
+
+        if (framesLimit <= 3) {
+            // at least 3 frames: 2 for in-memory hash table, and 1 for output buffer
+            throw new HyracksDataException(
+                    "Not enough memory for Hash-Hash Aggregation algorithm: at least 3 frames are necessary, but only "
+                            + framesLimit + " available.");
+        }
+
+        this.keyFields = keyFields;
+        storedKeyFields = new int[keyFields.length];
+        for (int i = 0; i < storedKeyFields.length; i++) {
+            storedKeyFields[i] = i;
+        }
+
+        this.aggregatorFactory = aggregatorFactory;
+
+        this.mergerFactory = mergerFactory;
+        this.firstNormalizerFactory = firstNormalizerFactory;
+
+        this.comparatorFactories = comparatorFactories;
+
+        this.hashFamilies = hashFamilies;
+
+        recordDescriptors[0] = outRecDesc;
+
+        this.doInputAdjustment = doInputAdjustment;
+    }
+
+    @Override
+    public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
+            IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
+
+        final IBinaryComparator[] comparators = new IBinaryComparator[comparatorFactories.length];
+        for (int i = 0; i < comparators.length; i++) {
+            comparators[i] = comparatorFactories[i].createBinaryComparator();
+        }
+
+        final RecordDescriptor inRecDesc = recordDescProvider.getInputRecordDescriptor(getActivityId(), 0);
+
+        final int frameSize = ctx.getFrameSize();
+
+        final double fudgeFactor = HybridHashGroupHashTable.getHashtableOverheadRatio(tableSize, frameSize,
+                framesLimit, userProvidedRecordSizeInBytes) * FUDGE_FACTOR_ESTIMATION;
+
+        return new AbstractUnaryInputUnaryOutputOperatorNodePushable() {
+
+            HybridHashGroupHashTable topProcessor;
+
+            int observedInputSizeInRawTuples;
+            int observedInputSizeInFrames, maxRecursiveLevels;
+
+            int userProvidedInputSizeInFrames;
+
+            boolean topLevelFallbackCheck = true;
+
+            ITuplePartitionComputerFamily tpcf = new FieldHashPartitionComputerFamily(keyFields, hashFamilies);
+
+            ITuplePartitionComputerFamily tpcfMerge = new FieldHashPartitionComputerFamily(storedKeyFields,
+                    hashFamilies);
+
+            ByteBuffer readAheadBuf;
+
+            /**
+             * Compute the partition numbers using hybrid-hash formula.
+             * 
+             * @param tableSize
+             * @param framesLimit
+             * @param inputKeySize
+             * @param partitionInOperator
+             * @param factor
+             * @return
+             */
+            private int getNumberOfPartitions(int tableSize, int framesLimit, long inputKeySize, double factor) {
+
+                int hashtableHeaderPages = HybridHashGroupHashTable.getHeaderPages(tableSize, frameSize);
+
+                int numberOfPartitions = HybridHashUtil.hybridHashPartitionComputer((int) Math.ceil(inputKeySize),
+                        framesLimit, factor);
+
+                // if the partition number is more than the available hash table contents, do pure partition.
+                if (numberOfPartitions >= framesLimit - hashtableHeaderPages) {
+                    numberOfPartitions = framesLimit;
+                }
+
+                if (numberOfPartitions <= 0) {
+                    numberOfPartitions = 1;
+                }
+
+                return numberOfPartitions;
+            }
+
+            @Override
+            public void open() throws HyracksDataException {
+
+                observedInputSizeInFrames = 0;
+
+                // estimate the number of unique keys for this partition, given the total raw record count and unique record count
+                long estimatedNumberOfUniqueKeys = HybridHashUtil.getEstimatedPartitionSizeOfUniqueKeys(
+                        inputSizeInRawRecords, inputSizeInUniqueKeys, 1);
+
+                userProvidedInputSizeInFrames = (int) Math.ceil(estimatedNumberOfUniqueKeys
+                        * userProvidedRecordSizeInBytes / frameSize);
+
+                int topPartitions = getNumberOfPartitions(tableSize, framesLimit,
+                        (int) Math.ceil(userProvidedInputSizeInFrames * ESTIMATOR_MAGNIFIER), fudgeFactor);
+
+                topProcessor = new HybridHashGroupHashTable(ctx, framesLimit, tableSize, topPartitions, keyFields, 0,
+                        comparators, tpcf, aggregatorFactory.createAggregator(ctx, inRecDesc, recordDescriptors[0],
+                                keyFields, storedKeyFields), inRecDesc, recordDescriptors[0], writer);
+
+                writer.open();
+                topProcessor.open();
+            }
+
+            @Override
+            public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+                observedInputSizeInRawTuples += buffer.getInt(buffer.capacity() - 4);
+                observedInputSizeInFrames++;
+                topProcessor.nextFrame(buffer);
+            }
+
+            @Override
+            public void fail() throws HyracksDataException {
+                // TODO Auto-generated method stub
+
+            }
+
+            @Override
+            public void close() throws HyracksDataException {
+                // estimate the maximum recursive levels
+                maxRecursiveLevels = (int) Math.max(
+                        Math.ceil(Math.log(observedInputSizeInFrames * fudgeFactor) / Math.log(framesLimit)) + 1, 1);
+
+                finishAndRecursion(topProcessor, observedInputSizeInRawTuples, inputSizeInUniqueKeys, 0,
+                        topLevelFallbackCheck);
+
+                writer.close();
+
+            }
+
+            private void processRunFiles(IFrameReader runReader, int inputCardinality, int runLevel)
+                    throws HyracksDataException {
+
+                boolean checkFallback = true;
+
+                int numOfPartitions = getNumberOfPartitions(tableSize, framesLimit, (long)inputCardinality
+                        * userProvidedRecordSizeInBytes / frameSize, fudgeFactor);
+
+                HybridHashGroupHashTable processor = new HybridHashGroupHashTable(ctx, framesLimit, tableSize,
+                        numOfPartitions, keyFields, runLevel, comparators, tpcf, aggregatorFactory.createAggregator(
+                                ctx, inRecDesc, recordDescriptors[0], keyFields, storedKeyFields), inRecDesc,
+                        recordDescriptors[0], writer);
+
+                processor.open();
+
+                runReader.open();
+
+                int inputRunRawSizeInTuples = 0;
+
+                if (readAheadBuf == null) {
+                    readAheadBuf = ctx.allocateFrame();
+                }
+                while (runReader.nextFrame(readAheadBuf)) {
+                    inputRunRawSizeInTuples += readAheadBuf.getInt(readAheadBuf.capacity() - 4);
+                    processor.nextFrame(readAheadBuf);
+                }
+
+                runReader.close();
+
+                finishAndRecursion(processor, inputRunRawSizeInTuples, inputCardinality, runLevel, checkFallback);
+            }
+
+            /**
+             * Finish the hash table processing and start recursive processing on run files.
+             * 
+             * @param ht
+             * @param inputRawRecordCount
+             * @param inputCardinality
+             * @param level
+             * @param checkFallback
+             * @throws HyracksDataException
+             */
+            private void finishAndRecursion(HybridHashGroupHashTable ht, long inputRawRecordCount,
+                    long inputCardinality, int level, boolean checkFallback) throws HyracksDataException {
+
+                ht.finishup();
+
+                List<IFrameReader> generatedRunReaders = ht.getSpilledRuns();
+                List<Integer> partitionRawRecords = ht.getSpilledRunsSizeInRawTuples();
+
+                int directFlushKeysInTuples = ht.getHashedUniqueKeys();
+                int directFlushRawRecordsInTuples = ht.getHashedRawRecords();
+
+                ht.close();
+                ht = null;
+
+                ctx.getCounterContext().getCounter("optional.levels." + level + ".estiInputKeyCardinality", true)
+                        .update(inputCardinality);
+
+                // do adjustment
+                if (doInputAdjustment && directFlushRawRecordsInTuples > 0) {
+                    inputCardinality = (int) Math.ceil((double) directFlushKeysInTuples / directFlushRawRecordsInTuples
+                            * inputRawRecordCount);
+                }
+
+                ctx.getCounterContext()
+                        .getCounter("optional.levels." + level + ".estiInputKeyCardinalityAdjusted", true)
+                        .update(inputCardinality);
+
+                IFrameReader recurRunReader;
+                int subPartitionRawRecords;
+
+                while (!generatedRunReaders.isEmpty()) {
+                    recurRunReader = generatedRunReaders.remove(0);
+                    subPartitionRawRecords = partitionRawRecords.remove(0);
+
+                    int runKeyCardinality = (int) Math.ceil((double) inputCardinality * subPartitionRawRecords
+                            / inputRawRecordCount);
+
+                    if ((checkFallback && runKeyCardinality > HYBRID_FALLBACK_THRESHOLD * inputCardinality)
+                            || level > maxRecursiveLevels) {
+                        Logger.getLogger(HybridHashGroupOperatorDescriptor.class.getSimpleName()).warning(
+                                "Hybrid-hash falls back to hash-sort algorithm! (" + level + ":" + maxRecursiveLevels
+                                        + ")");
+                        fallback(recurRunReader, level);
+                    } else {
+                        processRunFiles(recurRunReader, runKeyCardinality, level + 1);
+                    }
+
+                }
+            }
+
+            private void fallback(IFrameReader recurRunReader, int runLevel) throws HyracksDataException {
+                // fall back
+                FrameTupleAccessor runFrameTupleAccessor = new FrameTupleAccessor(frameSize, inRecDesc);
+                HybridHashSortGroupHashTable hhsTable = new HybridHashSortGroupHashTable(ctx, framesLimit, tableSize,
+                        keyFields, comparators, tpcf.createPartitioner(runLevel + 1),
+                        firstNormalizerFactory.createNormalizedKeyComputer(), aggregatorFactory.createAggregator(ctx,
+                                inRecDesc, recordDescriptors[0], keyFields, storedKeyFields), inRecDesc,
+                        recordDescriptors[0]);
+
+                recurRunReader.open();
+                if (readAheadBuf == null) {
+                    readAheadBuf = ctx.allocateFrame();
+                }
+                while (recurRunReader.nextFrame(readAheadBuf)) {
+                    runFrameTupleAccessor.reset(readAheadBuf);
+                    int tupleCount = runFrameTupleAccessor.getTupleCount();
+                    for (int j = 0; j < tupleCount; j++) {
+                        hhsTable.insert(runFrameTupleAccessor, j);
+                    }
+                }
+
+                recurRunReader.close();
+                hhsTable.finishup();
+
+                LinkedList<RunFileReader> hhsRuns = hhsTable.getRunFileReaders();
+
+                if (hhsRuns.isEmpty()) {
+                    hhsTable.flushHashtableToOutput(writer);
+                    hhsTable.close();
+                } else {
+                    hhsTable.close();
+                    HybridHashSortRunMerger hhsMerger = new HybridHashSortRunMerger(ctx, hhsRuns, storedKeyFields,
+                            comparators, recordDescriptors[0], tpcfMerge.createPartitioner(runLevel + 1),
+                            mergerFactory.createAggregator(ctx, recordDescriptors[0], recordDescriptors[0],
+                                    storedKeyFields, storedKeyFields), framesLimit, tableSize, writer, false);
+                    hhsMerger.process();
+                }
+            }
+
+        };
+    }
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/HybridHashUtil.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/HybridHashUtil.java
new file mode 100644
index 0000000..5323887
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/HybridHashUtil.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hybridhash;
+
+public class HybridHashUtil {
+
+    /**
+     * Compute the expected number of spilling partitions (in-memory partition is not included), using the hybrid-hash
+     * algorithm from [Shapiro86]. Note that 0 means that there is no need to have spilling partitions.
+     * 
+     * @param inputSizeInFrames
+     * @param memorySizeInFrames
+     * @param fudgeFactor
+     * @return
+     */
+    public static int hybridHashPartitionComputer(int inputSizeOfUniqueKeysInFrames, int memorySizeInFrames,
+            double fudgeFactor) {
+        return Math.max(
+                (int) Math.ceil((inputSizeOfUniqueKeysInFrames * fudgeFactor - memorySizeInFrames)
+                        / (memorySizeInFrames - 1)), 0);
+    }
+
+    /**
+     * Compute the estimated number of unique keys in a partition of a dataset, using Yao's formula
+     * 
+     * @param inputSizeInRawRecords
+     * @param inputSizeInUniqueKeys
+     * @param numOfPartitions
+     * @return
+     */
+    public static long getEstimatedPartitionSizeOfUniqueKeys(long inputSizeInRawRecords, long inputSizeInUniqueKeys,
+            int numOfPartitions) {
+        if (numOfPartitions == 1) {
+            return inputSizeInUniqueKeys;
+        }
+        return (long) Math.ceil(inputSizeInUniqueKeys
+                * (1 - Math.pow(1 - ((double) inputSizeInRawRecords / (double) numOfPartitions)
+                        / (double) inputSizeInRawRecords, (double) inputSizeInRawRecords
+                        / (double) inputSizeInUniqueKeys)));
+    }
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/preclustered/PreclusteredGroupWriter.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/preclustered/PreclusteredGroupWriter.java
index d1fec29..fd4f8da 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/preclustered/PreclusteredGroupWriter.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/preclustered/PreclusteredGroupWriter.java
@@ -29,20 +29,26 @@
 import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
 
 public class PreclusteredGroupWriter implements IFrameWriter {
+
+    private final static int INT_SIZE = 4;
+
     private final int[] groupFields;
     private final IBinaryComparator[] comparators;
     private final IAggregatorDescriptor aggregator;
     private final AggregateState aggregateState;
     private final IFrameWriter writer;
-    private final ByteBuffer copyFrame;
     private final FrameTupleAccessor inFrameAccessor;
-    private final FrameTupleAccessor copyFrameAccessor;
 
     private final ByteBuffer outFrame;
     private final FrameTupleAppender appender;
     private final ArrayTupleBuilder tupleBuilder;
 
-    private boolean first;
+    private final RecordDescriptor outRecordDesc;
+
+    private byte[] groupResultCache;
+    private ByteBuffer groupResultCacheBuffer;
+    private FrameTupleAccessor groupResultCacheAccessor;
+    private FrameTupleAppender groupResultCacheAppender;
 
     public PreclusteredGroupWriter(IHyracksTaskContext ctx, int[] groupFields, IBinaryComparator[] comparators,
             IAggregatorDescriptor aggregator, RecordDescriptor inRecordDesc, RecordDescriptor outRecordDesc,
@@ -52,10 +58,9 @@
         this.aggregator = aggregator;
         this.aggregateState = aggregator.createAggregateStates();
         this.writer = writer;
-        copyFrame = ctx.allocateFrame();
+        this.outRecordDesc = outRecordDesc;
+
         inFrameAccessor = new FrameTupleAccessor(ctx.getFrameSize(), inRecordDesc);
-        copyFrameAccessor = new FrameTupleAccessor(ctx.getFrameSize(), inRecordDesc);
-        copyFrameAccessor.reset(copyFrame);
 
         outFrame = ctx.allocateFrame();
         appender = new FrameTupleAppender(ctx.getFrameSize());
@@ -67,7 +72,6 @@
     @Override
     public void open() throws HyracksDataException {
         writer.open();
-        first = true;
     }
 
     @Override
@@ -75,40 +79,45 @@
         inFrameAccessor.reset(buffer);
         int nTuples = inFrameAccessor.getTupleCount();
         for (int i = 0; i < nTuples; ++i) {
-            if (first) {
 
-                tupleBuilder.reset();
-                for (int j = 0; j < groupFields.length; j++) {
-                    tupleBuilder.addField(inFrameAccessor, i, groupFields[j]);
-                }
-                aggregator.init(tupleBuilder, inFrameAccessor, i, aggregateState);
-
-                first = false;
-
-            } else {
-                if (i == 0) {
-                    switchGroupIfRequired(copyFrameAccessor, copyFrameAccessor.getTupleCount() - 1, inFrameAccessor, i);
+            if (groupResultCache != null && groupResultCacheAccessor.getTupleCount() > 0) {
+                groupResultCacheAccessor.reset(ByteBuffer.wrap(groupResultCache));
+                if (sameGroup(inFrameAccessor, i, groupResultCacheAccessor, 0)) {
+                    // find match: do aggregation
+                    aggregator.aggregate(inFrameAccessor, i, groupResultCacheAccessor, 0, aggregateState);
+                    continue;
                 } else {
-                    switchGroupIfRequired(inFrameAccessor, i - 1, inFrameAccessor, i);
+                    // write the cached group into the final output
+                    writeOutput(groupResultCacheAccessor, 0);
                 }
-
             }
-        }
-        FrameUtils.copy(buffer, copyFrame);
-    }
-
-    private void switchGroupIfRequired(FrameTupleAccessor prevTupleAccessor, int prevTupleIndex,
-            FrameTupleAccessor currTupleAccessor, int currTupleIndex) throws HyracksDataException {
-        if (!sameGroup(prevTupleAccessor, prevTupleIndex, currTupleAccessor, currTupleIndex)) {
-            writeOutput(prevTupleAccessor, prevTupleIndex);
 
             tupleBuilder.reset();
+
             for (int j = 0; j < groupFields.length; j++) {
-                tupleBuilder.addField(currTupleAccessor, currTupleIndex, groupFields[j]);
+                tupleBuilder.addField(inFrameAccessor, i, groupFields[j]);
             }
-            aggregator.init(tupleBuilder, currTupleAccessor, currTupleIndex, aggregateState);
-        } else {
-            aggregator.aggregate(currTupleAccessor, currTupleIndex, null, 0, aggregateState);
+
+            aggregator.init(tupleBuilder, inFrameAccessor, i, aggregateState);
+
+            // enlarge the cache buffer if necessary
+            int requiredSize = tupleBuilder.getSize() + tupleBuilder.getFieldEndOffsets().length * INT_SIZE + 2
+                    * INT_SIZE;
+
+            if (groupResultCache == null || groupResultCache.length < requiredSize) {
+                groupResultCache = new byte[requiredSize];
+                groupResultCacheAppender = new FrameTupleAppender(groupResultCache.length);
+                groupResultCacheBuffer = ByteBuffer.wrap(groupResultCache);
+                groupResultCacheAccessor = new FrameTupleAccessor(groupResultCache.length, outRecordDesc);
+            }
+
+            groupResultCacheAppender.reset(groupResultCacheBuffer, true);
+            if (!groupResultCacheAppender.append(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray(), 0,
+                    tupleBuilder.getSize())) {
+                throw new HyracksDataException("The partial result is too large to be initialized in a frame.");
+            }
+            
+            groupResultCacheAccessor.reset(groupResultCacheBuffer);
         }
     }
 
@@ -117,7 +126,7 @@
 
         tupleBuilder.reset();
         for (int j = 0; j < groupFields.length; j++) {
-            tupleBuilder.addField(lastTupleAccessor, lastTupleIndex, groupFields[j]);
+            tupleBuilder.addField(lastTupleAccessor, lastTupleIndex, j);
         }
         aggregator.outputFinalResult(tupleBuilder, lastTupleAccessor, lastTupleIndex, aggregateState);
 
@@ -138,8 +147,8 @@
             int fIdx = groupFields[i];
             int s1 = a1.getTupleStartOffset(t1Idx) + a1.getFieldSlotsLength() + a1.getFieldStartOffset(t1Idx, fIdx);
             int l1 = a1.getFieldLength(t1Idx, fIdx);
-            int s2 = a2.getTupleStartOffset(t2Idx) + a2.getFieldSlotsLength() + a2.getFieldStartOffset(t2Idx, fIdx);
-            int l2 = a2.getFieldLength(t2Idx, fIdx);
+            int s2 = a2.getTupleStartOffset(t2Idx) + a2.getFieldSlotsLength() + a2.getFieldStartOffset(t2Idx, i);
+            int l2 = a2.getFieldLength(t2Idx, i);
             if (comparators[i].compare(a1.getBuffer().array(), s1, l1, a2.getBuffer().array(), s2, l2) != 0) {
                 return false;
             }
@@ -154,8 +163,8 @@
 
     @Override
     public void close() throws HyracksDataException {
-        if (!first) {
-            writeOutput(copyFrameAccessor, copyFrameAccessor.getTupleCount() - 1);
+        if (groupResultCache != null && groupResultCacheAccessor.getTupleCount() > 0) {
+            writeOutput(groupResultCacheAccessor, 0);
             if (appender.getTupleCount() > 0) {
                 FrameUtils.flushFrame(outFrame, writer);
             }
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/misc/PrinterOperatorDescriptor.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/misc/PrinterOperatorDescriptor.java
index 47a8616..0af2dd8 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/misc/PrinterOperatorDescriptor.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/misc/PrinterOperatorDescriptor.java
@@ -48,10 +48,10 @@
         @Override
         public void writeData(Object[] data) throws HyracksDataException {
             for (int i = 0; i < data.length; ++i) {
-                System.err.print(StringSerializationUtils.toString(data[i]));
-                System.err.print(", ");
+            //    System.err.print(StringSerializationUtils.toString(data[i]));
+            //    System.err.print(", ");
             }
-            System.err.println();
+            //System.err.println();
         }
 
         @Override
diff --git a/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatserver/pom.xml b/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatserver/pom.xml
index 36d612b..2eb35e1 100644
--- a/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatserver/pom.xml
+++ b/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatserver/pom.xml
@@ -111,8 +111,9 @@
       	<artifactId>maven-compiler-plugin</artifactId>
       	<version>2.0.2</version>
         <configuration>
-          <source>1.6</source>
-          <target>1.6</target>
+          <source>1.7</source>
+          <target>1.7</target>
+          <fork>true</fork>
         </configuration>
       </plugin>
       <plugin>
diff --git a/hyracks/hyracks-examples/text-example/textserver/pom.xml b/hyracks/hyracks-examples/text-example/textserver/pom.xml
index bfbb0f4..d3eaaca 100644
--- a/hyracks/hyracks-examples/text-example/textserver/pom.xml
+++ b/hyracks/hyracks-examples/text-example/textserver/pom.xml
@@ -111,8 +111,9 @@
       	<artifactId>maven-compiler-plugin</artifactId>
       	<version>2.0.2</version>
         <configuration>
-          <source>1.6</source>
-          <target>1.6</target>
+          <source>1.7</source>
+          <target>1.7</target>
+          <fork>true</fork>
         </configuration>
       </plugin>
       <plugin>
diff --git a/pom.xml b/pom.xml
index 367101e..2840cb1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,4 +1,4 @@
-
+<?xml version="1.0" encoding="UTF-8"?>
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
   <modelVersion>4.0.0</modelVersion>
   <groupId>edu.uci.ics.hyracks</groupId>
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/driver/Driver.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/driver/Driver.java
index 1f071bf..3c9281a 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/driver/Driver.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/driver/Driver.java
@@ -147,6 +147,16 @@
             hcc.unDeployBinary(deploymentId);
             LOG.info("job finished");
         } catch (Exception e) {
+            try {
+                /**
+                 * destroy application if there is any exception
+                 */
+                if (hcc != null) {
+                    destroyApplication(applicationName);
+                }
+            } catch (Exception e2) {
+                throw new HyracksException(e2);
+            }
             throw new HyracksException(e);
         }
     }
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/PregelixHyracksIntegrationUtil.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/PregelixHyracksIntegrationUtil.java
index d099645..2173e10 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/PregelixHyracksIntegrationUtil.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/PregelixHyracksIntegrationUtil.java
@@ -14,6 +14,7 @@
  */
 package edu.uci.ics.pregelix.core.util;
 
+import java.io.File;
 import java.util.EnumSet;
 
 import edu.uci.ics.hyracks.api.client.HyracksConnection;
@@ -45,7 +46,7 @@
     private static NodeControllerService nc2;
     private static IHyracksClientConnection hcc;
 
-    public static void init() throws Exception {
+    public static void init(String topologyFilePath) throws Exception {
         CCConfig ccConfig = new CCConfig();
         ccConfig.clientNetIpAddress = CC_HOST;
         ccConfig.clusterNetIpAddress = CC_HOST;
diff --git a/pregelix/pregelix-core/src/main/resources/scripts/stopnc.sh b/pregelix/pregelix-core/src/main/resources/scripts/stopnc.sh
index 31de984..7a5352d 100644
--- a/pregelix/pregelix-core/src/main/resources/scripts/stopnc.sh
+++ b/pregelix/pregelix-core/src/main/resources/scripts/stopnc.sh
@@ -9,6 +9,10 @@
 fi
 
 if [ "$PID" == "" ]; then
+  PID=`ps -ef|grep ${USER}|grep java|grep 'hyracks'|awk '{print $2}'`
+fi
+
+if [ "$PID" == "" ]; then
   USERID=`id | sed 's/^uid=//;s/(.*$//'`
   PID=`ps -ef|grep ${USERID}|grep java|grep 'Dapp.name=pregelixnc'|awk '{print $2}'`
 fi
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/VertexFileScanOperatorDescriptor.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/VertexFileScanOperatorDescriptor.java
index 343ba7e..cec5b55 100644
--- a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/VertexFileScanOperatorDescriptor.java
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/VertexFileScanOperatorDescriptor.java
@@ -85,6 +85,7 @@
 
             @Override
             public void initialize() throws HyracksDataException {
+                ctxCL = Thread.currentThread().getContextClassLoader();
                 try {
                     Configuration conf = confFactory.createConfiguration(ctx);
                     writer.open();
@@ -106,6 +107,8 @@
                     writer.close();
                 } catch (Exception e) {
                     throw new HyracksDataException(e);
+                } finally {
+                    Thread.currentThread().setContextClassLoader(ctxCL);
                 }
             }
 
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/ComputeUpdateFunctionFactory.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/ComputeUpdateFunctionFactory.java
index 0a0a14f..b6df213 100644
--- a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/ComputeUpdateFunctionFactory.java
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/ComputeUpdateFunctionFactory.java
@@ -183,6 +183,7 @@
                 }
 
                 try {
+                	vertex.activate();
                     vertex.compute(msgIterator);
                     vertex.finishCompute();
                 } catch (IOException e) {