Resolve the conflict
diff --git a/genomix/HyracksCodeFormatProfile.xml b/genomix/HyracksCodeFormatProfile.xml
new file mode 100644
index 0000000..2cde66d
--- /dev/null
+++ b/genomix/HyracksCodeFormatProfile.xml
@@ -0,0 +1,279 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<profiles version="11">
+<profile kind="CodeFormatterProfile" name="HyracksCodeFormatProfile" version="11">
+<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.disabling_tag" value="@formatter:off"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.use_on_off_tags" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_ellipsis" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression" value="80"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_binary_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.continuation_indentation" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_binary_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package" value="0"/>
+<setting id="org.eclipse.jdt.core.compiler.source" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_line_comments" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.join_wrapped_lines" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.align_type_members_on_columns" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.indent_parameter_description" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.lineSplit" value="120"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.enabling_tag" value="@formatter:on"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_assignment" value="0"/>
+<setting id="org.eclipse.jdt.core.compiler.problem.assertIdentifier" value="error"/>
+<setting id="org.eclipse.jdt.core.formatter.tabulation.char" value="space"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_method_declaration" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.compiler.problem.enumIdentifier" value="error"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_ellipsis" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.compact_else_if" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.indent_root_tags" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.tabulation.size" value="4"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_empty_lines" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.compiler.compliance" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode" value="enabled"/>
+<setting id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_label" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant" value="48"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.line_length" value="9999"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_import_groups" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.wrap_before_binary_operator" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.join_lines_in_comments" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_html" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_source_code" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.compiler.codegen.targetPlatform" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_member" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_header" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_block_comments" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants" value="49"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line" value="false"/>
+</profile>
+</profiles>
diff --git a/genomix/genomix-data/.settings/org.eclipse.core.resources.prefs b/genomix/genomix-data/.settings/org.eclipse.core.resources.prefs
new file mode 100644
index 0000000..609d3ca
--- /dev/null
+++ b/genomix/genomix-data/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,4 @@
+eclipse.preferences.version=1
+encoding//src/main/resources=UTF-8
+encoding//src/test/resources=UTF-8
+encoding/<project>=UTF-8
diff --git a/genomix/genomix-data/.settings/org.eclipse.jdt.core.prefs b/genomix/genomix-data/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..ec4300d
--- /dev/null
+++ b/genomix/genomix-data/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,5 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
+org.eclipse.jdt.core.compiler.compliance=1.7
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.7
diff --git a/genomix/genomix-data/.settings/org.eclipse.m2e.core.prefs b/genomix/genomix-data/.settings/org.eclipse.m2e.core.prefs
new file mode 100644
index 0000000..f897a7f
--- /dev/null
+++ b/genomix/genomix-data/.settings/org.eclipse.m2e.core.prefs
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
diff --git a/genomix/genomix-data/HyracksCodeFormatProfile.xml b/genomix/genomix-data/HyracksCodeFormatProfile.xml
new file mode 100644
index 0000000..2cde66d
--- /dev/null
+++ b/genomix/genomix-data/HyracksCodeFormatProfile.xml
@@ -0,0 +1,279 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<profiles version="11">
+<profile kind="CodeFormatterProfile" name="HyracksCodeFormatProfile" version="11">
+<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.disabling_tag" value="@formatter:off"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.use_on_off_tags" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_ellipsis" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression" value="80"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_binary_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.continuation_indentation" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_binary_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package" value="0"/>
+<setting id="org.eclipse.jdt.core.compiler.source" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_line_comments" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.join_wrapped_lines" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.align_type_members_on_columns" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.indent_parameter_description" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.lineSplit" value="120"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.enabling_tag" value="@formatter:on"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_assignment" value="0"/>
+<setting id="org.eclipse.jdt.core.compiler.problem.assertIdentifier" value="error"/>
+<setting id="org.eclipse.jdt.core.formatter.tabulation.char" value="space"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_method_declaration" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.compiler.problem.enumIdentifier" value="error"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_ellipsis" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.compact_else_if" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.indent_root_tags" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.tabulation.size" value="4"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_empty_lines" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.compiler.compliance" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode" value="enabled"/>
+<setting id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_label" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant" value="48"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.line_length" value="9999"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_import_groups" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.wrap_before_binary_operator" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.join_lines_in_comments" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_html" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_source_code" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.compiler.codegen.targetPlatform" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_member" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_header" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_block_comments" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants" value="49"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line" value="false"/>
+</profile>
+</profiles>
diff --git a/genomix/genomix-data/pom.xml b/genomix/genomix-data/pom.xml
new file mode 100644
index 0000000..2dd44bb
--- /dev/null
+++ b/genomix/genomix-data/pom.xml
@@ -0,0 +1,47 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>genomix-data</artifactId>
+ <name>genomix-data</name>
+
+ <parent>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>genomix</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ </parent>
+
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ </properties>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+
+ <dependencies>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.8.1</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ <version>0.20.2</version>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/genomix/genomix-data/src/main/assembly/binary-assembly.xml b/genomix/genomix-data/src/main/assembly/binary-assembly.xml
new file mode 100644
index 0000000..68d424a
--- /dev/null
+++ b/genomix/genomix-data/src/main/assembly/binary-assembly.xml
@@ -0,0 +1,19 @@
+<assembly>
+ <id>binary-assembly</id>
+ <formats>
+ <format>zip</format>
+ <format>dir</format>
+ </formats>
+ <includeBaseDirectory>false</includeBaseDirectory>
+ <fileSets>
+ <fileSet>
+ <directory>target/appassembler/bin</directory>
+ <outputDirectory>bin</outputDirectory>
+ <fileMode>0755</fileMode>
+ </fileSet>
+ <fileSet>
+ <directory>target/appassembler/lib</directory>
+ <outputDirectory>lib</outputDirectory>
+ </fileSet>
+ </fileSets>
+</assembly>
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/data/KmerUtil.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/data/KmerUtil.java
new file mode 100644
index 0000000..866d6c5
--- /dev/null
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/data/KmerUtil.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.data;
+
+import edu.uci.ics.genomix.type.GeneCode;
+
+public class KmerUtil {
+ public static final String empty = "";
+
+ public static int getByteNumFromK(int k) {
+ int x = k / 4;
+ if (k % 4 != 0) {
+ x += 1;
+ }
+ return x;
+ }
+
+ public static byte reverseKmerByte(byte k) {
+ int x = (((k >> 2) & 0x33) | ((k << 2) & 0xcc));
+ return (byte) (((x >> 4) & 0x0f) | ((x << 4) & 0xf0));
+ }
+
+ public static String recoverKmerFrom(int k, byte[] keyData, int keyStart, int keyLength) {
+ StringBuilder strKmer = new StringBuilder();
+ int byteId = keyStart + keyLength - 1;
+ if (byteId < 0 || k < 1) {
+ return empty;
+ }
+ byte currentbyte = keyData[byteId];
+ for (int geneCount = 0; geneCount < k; geneCount++) {
+ if (geneCount % 4 == 0 && geneCount > 0) {
+ currentbyte = keyData[--byteId];
+ }
+ strKmer.append((char) GeneCode.GENE_SYMBOL[(currentbyte >> ((geneCount % 4) * 2)) & 0x03]);
+ }
+ return strKmer.toString();
+ }
+
+
+}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/data/Marshal.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/data/Marshal.java
new file mode 100644
index 0000000..90def56
--- /dev/null
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/data/Marshal.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.data;
+
+import java.nio.ByteBuffer;
+
+public class Marshal {
+ public static int getInt(byte[] bytes, int offset) {
+ return ((bytes[offset] & 0xff) << 24) + ((bytes[offset + 1] & 0xff) << 16) + ((bytes[offset + 2] & 0xff) << 8)
+ + ((bytes[offset + 3] & 0xff) << 0);
+ }
+
+ public static long getLong(byte[] bytes, int offset) {
+ long value = 0;
+ for (int i = offset; i < bytes.length && i < offset + 8; i++)
+ {
+ value = (value << 8) + (bytes[i] & 0xff);
+ }
+ return value;
+// return ((bytes[offset] & 0xff) << 56) + ((bytes[offset + 1] & 0xff) << 48) + ((bytes[offset + 2] & 0xff) << 40)
+// + ((bytes[offset + 3] & 0xff) << 32) + ((bytes[offset + 4] & 0xff) << 24) + ((bytes[offset + 5] & 0xff) << 16)
+// + ((bytes[offset + 6] & 0xff) << 8) + ((bytes[offset + 7] & 0xff) << 0);
+ }
+
+ public static float getFloat(byte[] bytes, int offset) {
+ return ByteBuffer.wrap(bytes, offset, 4).getFloat();
+ }
+
+ public static void putInt(int val, byte[] bytes, int offset) {
+ bytes[offset] = (byte)((val >>> 24) & 0xFF);
+ bytes[offset + 1] = (byte)((val >>> 16) & 0xFF);
+ bytes[offset + 2] = (byte)((val >>> 8) & 0xFF);
+ bytes[offset + 3] = (byte)((val >>> 0) & 0xFF);
+ }
+
+ public static void putLong(long val, byte[] bytes, int offset) {
+ ByteBuffer byteBuffer = ByteBuffer.allocate(8);
+ System.arraycopy(byteBuffer.putLong(val).array(), 0, bytes, offset, 8);
+// bytes[offset] = (byte)((val >>> 56) & 0xFF);
+// bytes[offset + 1] = (byte)((val >>> 48) & 0xFF);
+// bytes[offset + 2] = (byte)((val >>> 40) & 0xFF);
+// bytes[offset + 3] = (byte)((val >>> 32) & 0xFF);
+// bytes[offset + 4] = (byte)((val >>> 24) & 0xFF);
+// bytes[offset + 5] = (byte)((val >>> 16) & 0xFF);
+// bytes[offset + 6] = (byte)((val >>> 8) & 0xFF);
+// bytes[offset + 7] = (byte)((val >>> 0) & 0xFF);
+ }
+
+ public static void putFloat(float val, byte[] bytes, int offset) {
+ ByteBuffer byteBuffer = ByteBuffer.allocate(4);
+ System.arraycopy(byteBuffer.putFloat(val).array(), 0, bytes, offset, 4);
+ }
+
+ public static int hashBytes(byte[] bytes, int offset, int length) {
+ int hash = 1;
+ for (int i = offset; i < offset + length; i++)
+ hash = (31 * hash) + (int) bytes[i];
+ return hash;
+ }
+}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java
new file mode 100644
index 0000000..6a5ad7b
--- /dev/null
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.type;
+
+
+public class GeneCode {
+ public final static byte[] GENE_SYMBOL = { 'A', 'C', 'G', 'T' };
+ /**
+ * make sure this 4 ids equal to the sequence id of char in {@GENE_SYMBOL
+ * }
+ */
+ public static final byte A = 0;
+ public static final byte C = 1;
+ public static final byte G = 2;
+ public static final byte T = 3;
+
+ public static byte getCodeFromSymbol(byte ch) {
+ byte r = 0;
+ switch (ch) {
+ case 'A':
+ case 'a':
+ r = A;
+ break;
+ case 'C':
+ case 'c':
+ r = C;
+ break;
+ case 'G':
+ case 'g':
+ r = G;
+ break;
+ case 'T':
+ case 't':
+ r = T;
+ break;
+ }
+ return r;
+ }
+
+ public static byte getPairedGeneCode(byte genecode){
+ if ( genecode < 0 || genecode > 3){
+ throw new IllegalArgumentException("Invalid genecode: " + genecode);
+ }
+ return (byte) (3- genecode);
+ }
+
+ public static byte getPairedCodeFromSymbol(byte ch){
+ return getPairedGeneCode(getCodeFromSymbol(ch));
+ }
+
+ public static byte getSymbolFromCode(byte code) {
+ if (code > 3 || code < 0 ) {
+ throw new IllegalArgumentException("Invalid genecode");
+ }
+ return GENE_SYMBOL[code];
+ }
+
+ public static String reverseComplement(String kmer) {
+ StringBuilder sb = new StringBuilder();
+ for (char letter : kmer.toCharArray()) {
+ sb.append(complement(letter));
+ }
+ return sb.reverse().toString();
+ }
+
+ public static char complement(char ch) {
+ switch (ch) {
+ case 'A':
+ case 'a':
+ return 'T';
+ case 'C':
+ case 'c':
+ return 'G';
+ case 'G':
+ case 'g':
+ return 'C';
+ case 'T':
+ case 't':
+ return 'A';
+ }
+ throw new RuntimeException("Invalid character given in complement: " + ch);
+ }
+}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
new file mode 100644
index 0000000..bbba2fb
--- /dev/null
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
@@ -0,0 +1,414 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.type;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.Serializable;
+
+import org.apache.hadoop.io.BinaryComparable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableComparator;
+
+import edu.uci.ics.genomix.data.KmerUtil;
+import edu.uci.ics.genomix.data.Marshal;
+
+/**
+ * Fixed, static-length Kmer used as the key and edge values of each
+ * NodeWritable. Kmer length should be set once during configuration and should
+ * never change.
+ */
+public class KmerBytesWritable extends BinaryComparable implements Serializable, WritableComparable<BinaryComparable> {
+
+ private static final long serialVersionUID = 1L;
+ protected static final byte[] EMPTY_BYTES = {};
+
+ protected static int lettersInKmer;
+ protected static int bytesUsed;
+ protected byte[] bytes;
+ protected int offset;
+
+ /**
+ * set the *GLOBAL* kmer length to the given k value.
+ * NOTE: this will invalidate ALL previously created kmers. This function
+ * should be called before any kmers are created
+ */
+ public static void setGlobalKmerLength(int k) {
+ bytesUsed = KmerUtil.getByteNumFromK(k);
+ lettersInKmer = k;
+ }
+
+ /**
+ * Initialize as empty kmer
+ */
+ public KmerBytesWritable() {
+ bytes = new byte[bytesUsed];
+ offset = 0;
+ }
+
+ /**
+ * Copy contents of kmer string
+ */
+ public KmerBytesWritable(String kmer) {
+ this();
+ setByRead(kmer.getBytes(), 0);
+ }
+
+ /**
+ * Set as reference to existing data
+ */
+ public KmerBytesWritable(byte[] newStorage, int newOffset) {
+ setAsReference(newStorage, newOffset);
+ }
+
+ /**
+ * copy kmer in other
+ *
+ * @param other
+ */
+ public KmerBytesWritable(KmerBytesWritable other) {
+ this();
+ setAsCopy(other);
+ }
+
+ /**
+ * copy kmer in other
+ *
+ * @param other
+ */
+ public KmerBytesWritable(VKmerBytesWritable other) {
+ this();
+ setAsCopy(other);
+ }
+
+ /**
+ * Deep copy of the given kmer
+ *
+ * @param other
+ */
+ public void setAsCopy(KmerBytesWritable other) {
+ if (lettersInKmer > 0) {
+ System.arraycopy(other.bytes, other.offset, bytes, offset, bytesUsed);
+ }
+ }
+
+ /**
+ * Deep copy of the given kmer
+ *
+ * @param other
+ */
+ public void setAsCopy(VKmerBytesWritable other) {
+ if (other.lettersInKmer != lettersInKmer) {
+ throw new IllegalArgumentException("Provided VKmer (" + other + ") is of an incompatible length (was " + other.getKmerLetterLength() + ", should be " + lettersInKmer + ")!");
+ }
+ if (lettersInKmer > 0) {
+ System.arraycopy(other.bytes, other.kmerStartOffset, bytes, offset, bytesUsed);
+ }
+ }
+
+
+ /**
+ * Deep copy of the given bytes data
+ *
+ * @param newData
+ * @param newOffset
+ */
+ public void setAsCopy(byte[] newData, int newOffset) {
+ if (newData.length - newOffset < bytesUsed) {
+ throw new IllegalArgumentException("Requested " + bytesUsed + " bytes (k=" + lettersInKmer
+ + ") but buffer has only " + (newData.length - newOffset) + " bytes");
+ }
+ System.arraycopy(newData, newOffset, bytes, offset, bytesUsed);
+ }
+
+ /**
+ * Point this datablock to the given bytes array It works like the pointer
+ * to new datablock.
+ *
+ * @param newData
+ * @param newOffset
+ */
+ public void setAsReference(byte[] newData, int newOffset) {
+ if (newData.length - newOffset < bytesUsed) {
+ throw new IllegalArgumentException("Requested " + bytesUsed + " bytes (k=" + lettersInKmer
+ + ") but buffer has only " + (newData.length - newOffset) + " bytes");
+ }
+ bytes = newData;
+ offset = newOffset;
+ }
+
+ /**
+ * Point this datablock to the given kmer's byte array It works like the pointer
+ * to new datablock.
+ *
+ * @param newData
+ * @param offset
+ */
+ public void setAsReference(VKmerBytesWritable other) {
+ if (other.lettersInKmer != lettersInKmer) {
+ throw new IllegalArgumentException("Provided VKmer (" + other + ") is of an incompatible length (was " + other.getKmerLetterLength() + ", should be " + lettersInKmer + ")!");
+ }
+ bytes = other.bytes;
+ offset = other.kmerStartOffset;
+ }
+
+ /**
+ * Get one genecode (A|G|C|T) from the given kmer index e.g. Get the 4th
+ * gene of the kmer ACGTA will return T
+ *
+ * @param pos
+ * @return
+ */
+ public byte getGeneCodeAtPosition(int pos) {
+ if (pos >= lettersInKmer || pos < 0) {
+ throw new ArrayIndexOutOfBoundsException("Gene position (" + pos + ") out of bounds for k=" + lettersInKmer);
+ }
+ return geneCodeAtPosition(pos);
+ }
+
+ /**
+ * unchecked version of getGeneCodeAtPosition. Used when kmerlength is
+ * inaccurate (mid-merge)
+ */
+ private byte geneCodeAtPosition(int pos) {
+ int posByte = pos / 4;
+ int shift = (pos % 4) << 1;
+ return (byte) ((bytes[offset + bytesUsed - 1 - posByte] >> shift) & 0x3);
+ }
+
+ public static int getKmerLength() {
+ return lettersInKmer;
+ }
+
+ public static int getBytesPerKmer() {
+ return bytesUsed;
+ }
+
+ @Override
+ public byte[] getBytes() {
+ return bytes;
+ }
+
+ public int getOffset() {
+ return offset;
+ }
+
+ @Override
+ public int getLength() {
+ return bytesUsed;
+ }
+
+ /**
+ * Read Kmer from read text into bytes array e.g. AATAG will compress as
+ * [0x000G, 0xATAA]
+ *
+ * @param stringBytes
+ * @param start
+ */
+ public void setByRead(byte[] stringBytes, int start) {
+ byte l = 0;
+ int bytecount = 0;
+ int bcount = this.bytesUsed - 1;
+ for (int i = start; i < start + lettersInKmer && i < stringBytes.length; i++) {
+ byte code = GeneCode.getCodeFromSymbol(stringBytes[i]);
+ l |= (byte) (code << bytecount);
+ bytecount += 2;
+ if (bytecount == 8) {
+ bytes[offset + bcount--] = l;
+ l = 0;
+ bytecount = 0;
+ }
+ }
+ if (bcount >= 0) {
+ bytes[offset] = l;
+ }
+ }
+
+ /**
+ * Compress Reversed read into bytes array e.g. AATAG will paired to CTATT,
+ * and then compress as [0x000T,0xTATC]
+ *
+ * @param input
+ * array
+ * @param start
+ * position
+ */
+ public void setByReadReverse(byte[] array, int start) {
+ byte l = 0;
+ int bytecount = 0;
+ int bcount = bytesUsed - 1;
+ // for (int i = start + kmerlength - 1; i >= 0 && i < array.length; i--)
+ // {
+ for (int i = start + lettersInKmer - 1; i >= start && i < array.length; i--) {
+ byte code = GeneCode.getPairedCodeFromSymbol(array[i]);
+ l |= (byte) (code << bytecount);
+ bytecount += 2;
+ if (bytecount == 8) {
+ bytes[offset + bcount--] = l;
+ l = 0;
+ bytecount = 0;
+ }
+ }
+ if (bcount >= 0) {
+ bytes[offset] = l;
+ }
+ }
+
+ /**
+ * Shift Kmer to accept new char input
+ *
+ * @param c
+ * Input new gene character
+ * @return the shift out gene, in gene code format
+ */
+ public byte shiftKmerWithNextChar(byte c) {
+ return shiftKmerWithNextCode(GeneCode.getCodeFromSymbol(c));
+ }
+
+ /**
+ * Shift Kmer to accept new gene code
+ *
+ * @param c
+ * Input new gene code
+ * @return the shift out gene, in gene code format
+ */
+ public byte shiftKmerWithNextCode(byte c) {
+ byte output = (byte) (bytes[offset + bytesUsed - 1] & 0x03);
+ for (int i = bytesUsed - 1; i > 0; i--) {
+ byte in = (byte) (bytes[offset + i - 1] & 0x03);
+ bytes[offset + i] = (byte) (((bytes[offset + i] >>> 2) & 0x3f) | (in << 6));
+ }
+ int pos = ((lettersInKmer - 1) % 4) << 1;
+ byte code = (byte) (c << pos);
+ bytes[offset] = (byte) (((bytes[offset] >>> 2) & 0x3f) | code);
+ clearLeadBit();
+ return output;
+ }
+
+ /**
+ * Shift Kmer to accept new input char
+ *
+ * @param c
+ * Input new gene character
+ * @return the shiftout gene, in gene code format
+ */
+ public byte shiftKmerWithPreChar(byte c) {
+ return shiftKmerWithPreCode(GeneCode.getCodeFromSymbol(c));
+ }
+
+ /**
+ * Shift Kmer to accept new gene code
+ *
+ * @param c
+ * Input new gene code
+ * @return the shiftout gene, in gene code format
+ */
+ public byte shiftKmerWithPreCode(byte c) {
+ int pos = ((lettersInKmer - 1) % 4) << 1;
+ byte output = (byte) ((bytes[offset] >> pos) & 0x03);
+ for (int i = 0; i < bytesUsed - 1; i++) {
+ byte in = (byte) ((bytes[offset + i + 1] >> 6) & 0x03);
+ bytes[offset + i] = (byte) ((bytes[offset + i] << 2) | in);
+ }
+ bytes[offset + bytesUsed - 1] = (byte) ((bytes[offset + bytesUsed - 1] << 2) | c);
+ clearLeadBit();
+ return output;
+ }
+
+ public static void appendOneByteAtPosition(int k, byte onebyte, byte[] buffer, int start, int length) {
+ int position = start + length - 1 - k / 4;
+ if (position < start) {
+ throw new IllegalArgumentException("Buffer for kmer storage is invalid");
+ }
+ int shift = ((k) % 4) << 1;
+ int mask = shift == 0 ? 0 : ((1 << shift) - 1);
+
+ buffer[position] = (byte) ((buffer[position] & mask) | ((0xff & onebyte) << shift));
+ if (position > start && shift != 0) {
+ buffer[position - 1] = (byte) ((buffer[position - 1] & (0xff - mask)) | ((byte) ((0xff & onebyte) >>> (8 - shift))));
+ }
+ }
+
+ public static byte getOneByteFromKmerAtPosition(int k, byte[] buffer, int start, int length) {
+ int position = start + length - 1 - k / 4;
+ if (position < start) {
+ throw new IllegalArgumentException("Buffer of kmer storage is invalid");
+ }
+ int shift = (k % 4) << 1;
+ byte data = (byte) (((0xff) & buffer[position]) >>> shift);
+ if (shift != 0 && position > start) {
+ data |= 0xff & (buffer[position - 1] << (8 - shift));
+ }
+ return data;
+ }
+
+ protected void clearLeadBit() {
+ if (lettersInKmer % 4 != 0) {
+ bytes[offset] &= (1 << ((lettersInKmer % 4) << 1)) - 1;
+ }
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ in.readFully(bytes, offset, bytesUsed);
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.write(bytes, offset, bytesUsed);
+ }
+
+ @Override
+ public int hashCode() {
+ return Marshal.hashBytes(bytes, offset, bytesUsed);
+ }
+
+ @Override
+ public boolean equals(Object right_obj) {
+ if (right_obj instanceof KmerBytesWritable) {
+ // since these may be backed by storage of different sizes, we have to manually check each byte
+ KmerBytesWritable right = (KmerBytesWritable) right_obj;
+ for (int i=0; i < bytesUsed; i++) {
+ if (bytes[offset + i] != right.bytes[right.offset + i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public String toString() {
+ return KmerUtil.recoverKmerFrom(lettersInKmer, bytes, offset, bytesUsed);
+ }
+
+ public static class Comparator extends WritableComparator {
+ public Comparator() {
+ super(KmerBytesWritable.class);
+ }
+
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ return compareBytes(b1, s1, l1, b2, s2, l2);
+ }
+ }
+
+ static { // register this comparator
+ WritableComparator.define(KmerBytesWritable.class, new Comparator());
+ }
+
+}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritableFactory.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritableFactory.java
new file mode 100644
index 0000000..c287c1b
--- /dev/null
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritableFactory.java
@@ -0,0 +1,311 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.type;
+
+public class KmerBytesWritableFactory {
+ private VKmerBytesWritable kmer;
+
+ public KmerBytesWritableFactory(int k) {
+ kmer = new VKmerBytesWritable(k);
+ }
+
+ /**
+ * Read Kmer from read text into bytes array e.g. AATAG will compress as
+ * [0x000G, 0xATAA]
+ *
+ * @param k
+ * @param array
+ * @param start
+ */
+ public VKmerBytesWritable getKmerByRead(int k, byte[] array, int start) {
+ kmer.setByRead(k, array, start);
+ return kmer;
+ }
+
+ /**
+ * Compress Reversed Kmer into bytes array AATAG will compress as
+ * [0x000A,0xATAG]
+ *
+ * @param array
+ * @param start
+ */
+ public VKmerBytesWritable getKmerByReadReverse(int k, byte[] array, int start) {
+ kmer.setByReadReverse(k, array, start);
+ return kmer;
+ }
+
+ /**
+ * Get last kmer from kmer-chain.
+ * e.g. kmerChain is AAGCTA, if k =5, it will
+ * return AGCTA
+ *
+ * @param k
+ * @param kInChain
+ * @param kmerChain
+ * @return LastKmer bytes array
+ */
+ public VKmerBytesWritable getLastKmerFromChain(int lastK, final VKmerBytesWritable kmerChain) {
+ if (lastK > kmerChain.getKmerLetterLength()) {
+ return null;
+ }
+ if (lastK == kmerChain.getKmerLetterLength()) {
+ kmer.setAsCopy(kmerChain);
+ return kmer;
+ }
+ kmer.reset(lastK);
+
+ /** from end to start */
+ int byteInChain = kmerChain.getKmerByteLength() - 1 - (kmerChain.getKmerLetterLength() - lastK) / 4;
+ int posInByteOfChain = ((kmerChain.getKmerLetterLength() - lastK) % 4) << 1; // *2
+ int byteInKmer = kmer.getKmerByteLength() - 1;
+ for (; byteInKmer >= 0 && byteInChain > 0; byteInKmer--, byteInChain--) {
+ kmer.getBytes()[byteInKmer + kmer.getKmerOffset()] = (byte) ((0xff & kmerChain.getBytes()[byteInChain + kmerChain.getKmerOffset()]) >> posInByteOfChain);
+ kmer.getBytes()[byteInKmer + kmer.getKmerOffset()] |= ((kmerChain.getBytes()[byteInChain + kmerChain.getKmerOffset() - 1] << (8 - posInByteOfChain)));
+ }
+
+ /** last kmer byte */
+ if (byteInKmer == 0) {
+ kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) ((kmerChain.getBytes()[0 + kmerChain.getKmerOffset()] & 0xff) >> posInByteOfChain);
+ }
+ kmer.clearLeadBit();
+ return kmer;
+ }
+
+ /**
+ * Get first kmer from kmer-chain e.g. kmerChain is AAGCTA, if k=5, it will
+ * return AAGCT
+ *
+ * @param k
+ * @param kInChain
+ * @param kmerChain
+ * @return FirstKmer bytes array
+ */
+ public VKmerBytesWritable getFirstKmerFromChain(int firstK, final VKmerBytesWritable kmerChain) {
+ if (firstK > kmerChain.getKmerLetterLength()) {
+ return null;
+ }
+ if (firstK == kmerChain.getKmerLetterLength()) {
+ kmer.setAsCopy(kmerChain);
+ return kmer;
+ }
+ kmer.reset(firstK);
+
+ int i = 1;
+ for (; i < kmer.getKmerByteLength(); i++) {
+ kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i] = kmerChain.getBytes()[kmerChain.getKmerOffset() + kmerChain.getKmerByteLength() - i];
+ }
+ int posInByteOfChain = (firstK % 4) << 1; // *2
+ if (posInByteOfChain == 0) {
+ kmer.getBytes()[0 + kmer.getKmerOffset()] = kmerChain.getBytes()[kmerChain.getKmerOffset() + kmerChain.getKmerByteLength() - i];
+ } else {
+ kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) (kmerChain.getBytes()[kmerChain.getKmerOffset() + kmerChain.getKmerByteLength() - i] & ((1 << posInByteOfChain) - 1));
+ }
+ kmer.clearLeadBit();
+ return kmer;
+ }
+
+ public VKmerBytesWritable getSubKmerFromChain(int startK, int kSize, final VKmerBytesWritable kmerChain) {
+ if (startK + kSize > kmerChain.getKmerLetterLength()) {
+ return null;
+ }
+ if (startK == 0 && kSize == kmerChain.getKmerLetterLength()) {
+ kmer.setAsCopy(kmerChain);
+ return kmer;
+ }
+ kmer.reset(kSize);
+
+ /** from end to start */
+ int byteInChain = kmerChain.getKmerByteLength() - 1 - startK / 4;
+ int posInByteOfChain = startK % 4 << 1; // *2
+ int byteInKmer = kmer.getKmerByteLength() - 1;
+ for (; byteInKmer >= 0 && byteInChain > 0; byteInKmer--, byteInChain--) {
+ kmer.getBytes()[byteInKmer + kmer.getKmerOffset()] = (byte) ((0xff & kmerChain.getBytes()[byteInChain + kmerChain.getKmerOffset()]) >> posInByteOfChain);
+ kmer.getBytes()[byteInKmer + kmer.getKmerOffset()] |= ((kmerChain.getBytes()[byteInChain + kmerChain.getKmerOffset() - 1] << (8 - posInByteOfChain)));
+ }
+
+ /** last kmer byte */
+ if (byteInKmer == 0) {
+ kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) ((kmerChain.getBytes()[0 + kmerChain.getKmerOffset()] & 0xff) >> posInByteOfChain);
+ }
+ kmer.clearLeadBit();
+ return kmer;
+ }
+
+ /**
+ * Merge kmer with next neighbor in gene-code format.
+ * The k of new kmer will increase by 1
+ * e.g. AAGCT merge with A => AAGCTA
+ *
+ * @param k
+ * :input k of kmer
+ * @param kmer
+ * : input bytes of kmer
+ * @param nextCode
+ * : next neighbor in gene-code format
+ * @return the merged Kmer, this K of this Kmer is k+1
+ */
+ public VKmerBytesWritable mergeKmerWithNextCode(final VKmerBytesWritable kmer, byte nextCode) {
+ this.kmer.reset(kmer.getKmerLetterLength() + 1);
+ for (int i = 1; i <= kmer.getKmerByteLength(); i++) {
+ this.kmer.getBytes()[this.kmer.getKmerOffset() + this.kmer.getKmerByteLength() - i] = kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i];
+ }
+ if (this.kmer.getKmerByteLength() > kmer.getKmerByteLength()) {
+ this.kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) (nextCode & 0x3);
+ } else {
+ this.kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) (kmer.getBytes()[0 + kmer.getKmerOffset()] | ((nextCode & 0x3) << ((kmer.getKmerLetterLength() % 4) << 1)));
+ }
+ this.kmer.clearLeadBit();
+ return this.kmer;
+ }
+
+ /**
+ * Merge kmer with previous neighbor in gene-code format.
+ * The k of new kmer will increase by 1
+ * e.g. AAGCT merge with A => AAAGCT
+ *
+ * @param k
+ * :input k of kmer
+ * @param kmer
+ * : input bytes of kmer
+ * @param preCode
+ * : next neighbor in gene-code format
+ * @return the merged Kmer,this K of this Kmer is k+1
+ */
+ public VKmerBytesWritable mergeKmerWithPreCode(final VKmerBytesWritable kmer, byte preCode) {
+ this.kmer.reset(kmer.getKmerLetterLength() + 1);
+ int byteInMergedKmer = 0;
+ if (kmer.getKmerLetterLength() % 4 == 0) {
+ this.kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) ((kmer.getBytes()[0 + kmer.getKmerOffset()] >> 6) & 0x3);
+ byteInMergedKmer++;
+ }
+ for (int i = 0; i < kmer.getKmerByteLength() - 1; i++, byteInMergedKmer++) {
+ this.kmer.getBytes()[byteInMergedKmer + kmer.getKmerOffset()] = (byte) ((kmer.getBytes()[i + kmer.getKmerOffset()] << 2) | ((kmer.getBytes()[i + kmer.getKmerOffset() + 1] >> 6) & 0x3));
+ }
+ this.kmer.getBytes()[byteInMergedKmer + kmer.getKmerOffset()] = (byte) ((kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - 1] << 2) | (preCode & 0x3));
+ this.kmer.clearLeadBit();
+ return this.kmer;
+ }
+
+ /**
+ * Merge two kmer to one kmer
+ * e.g. ACTA + ACCGT => ACTAACCGT
+ *
+ * @param preK
+ * : previous k of kmer
+ * @param kmerPre
+ * : bytes array of previous kmer
+ * @param nextK
+ * : next k of kmer
+ * @param kmerNext
+ * : bytes array of next kmer
+ * @return merged kmer, the new k is @preK + @nextK
+ */
+ public VKmerBytesWritable mergeTwoKmer(final VKmerBytesWritable preKmer, final VKmerBytesWritable nextKmer) {
+ kmer.reset(preKmer.getKmerLetterLength() + nextKmer.getKmerLetterLength());
+ int i = 1;
+ for (; i <= preKmer.getKmerByteLength(); i++) {
+ kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i] = preKmer.getBytes()[preKmer.getKmerOffset() + preKmer.getKmerByteLength() - i];
+ }
+ if (i > 1) {
+ i--;
+ }
+ if (preKmer.getKmerLetterLength() % 4 == 0) {
+ for (int j = 1; j <= nextKmer.getKmerByteLength(); j++) {
+ kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i - j] = nextKmer.getBytes()[nextKmer.getKmerOffset() + nextKmer.getKmerByteLength() - j];
+ }
+ } else {
+ int posNeedToMove = ((preKmer.getKmerLetterLength() % 4) << 1);
+ kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i] |= nextKmer.getBytes()[nextKmer.getKmerOffset() + nextKmer.getKmerByteLength() - 1] << posNeedToMove;
+ for (int j = 1; j < nextKmer.getKmerByteLength(); j++) {
+ kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i - j] = (byte) (((nextKmer.getBytes()[nextKmer.getKmerOffset() + nextKmer.getKmerByteLength() - j] & 0xff) >> (8 - posNeedToMove)) | (nextKmer
+ .getBytes()[nextKmer.getKmerOffset() + nextKmer.getKmerByteLength() - j - 1] << posNeedToMove));
+ }
+ if (nextKmer.getKmerLetterLength() % 4 == 0 || (nextKmer.getKmerLetterLength() % 4) * 2 + posNeedToMove > 8) {
+ kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) ((0xff & nextKmer.getBytes()[0 + nextKmer.getKmerOffset()]) >> (8 - posNeedToMove));
+ }
+ }
+ kmer.clearLeadBit();
+ return kmer;
+ }
+
+ /**
+ * Safely shifted the kmer forward without change the input kmer
+ * e.g. AGCGC shift with T => GCGCT
+ *
+ * @param k
+ * : kmer length
+ * @param kmer
+ * : input kmer
+ * @param afterCode
+ * : input genecode
+ * @return new created kmer that shifted by afterCode, the K will not change
+ */
+ public VKmerBytesWritable shiftKmerWithNextCode(final VKmerBytesWritable kmer, byte afterCode) {
+ this.kmer.setAsCopy(kmer);
+ this.kmer.shiftKmerWithNextCode(afterCode);
+ return this.kmer;
+ }
+
+ /**
+ * Safely shifted the kmer backward without change the input kmer
+ * e.g. AGCGC shift with T => TAGCG
+ *
+ * @param k
+ * : kmer length
+ * @param kmer
+ * : input kmer
+ * @param preCode
+ * : input genecode
+ * @return new created kmer that shifted by preCode, the K will not change
+ */
+ public VKmerBytesWritable shiftKmerWithPreCode(final VKmerBytesWritable kmer, byte preCode) {
+ this.kmer.setAsCopy(kmer);
+ this.kmer.shiftKmerWithPreCode(preCode);
+ return this.kmer;
+ }
+
+ /**
+ * get the reverse sequence of given kmer
+ *
+ * @param kmer
+ */
+ public VKmerBytesWritable reverse(final VKmerBytesWritable kmer) {
+ this.kmer.reset(kmer.getKmerLetterLength());
+
+ int curPosAtKmer = ((kmer.getKmerLetterLength() - 1) % 4) << 1;
+ int curByteAtKmer = 0;
+
+ int curPosAtReverse = 0;
+ int curByteAtReverse = this.kmer.getKmerByteLength() - 1;
+ this.kmer.getBytes()[curByteAtReverse + this.kmer.getKmerOffset()] = 0;
+ for (int i = 0; i < kmer.getKmerLetterLength(); i++) {
+ byte gene = (byte) ((kmer.getBytes()[curByteAtKmer + kmer.getKmerOffset()] >> curPosAtKmer) & 0x03);
+ this.kmer.getBytes()[curByteAtReverse + this.kmer.getKmerOffset()] |= gene << curPosAtReverse;
+ curPosAtReverse += 2;
+ if (curPosAtReverse >= 8) {
+ curPosAtReverse = 0;
+ this.kmer.getBytes()[--curByteAtReverse + this.kmer.getKmerOffset()] = 0;
+ }
+ curPosAtKmer -= 2;
+ if (curPosAtKmer < 0) {
+ curPosAtKmer = 6;
+ curByteAtKmer++;
+ }
+ }
+ this.kmer.clearLeadBit();
+ return this.kmer;
+ }
+}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
new file mode 100644
index 0000000..807dc7f
--- /dev/null
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
@@ -0,0 +1,333 @@
+package edu.uci.ics.genomix.type;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Comparator;
+
+import org.apache.hadoop.io.WritableComparable;
+
+import edu.uci.ics.genomix.data.Marshal;
+
+
+public class NodeWritable implements WritableComparable<NodeWritable>, Serializable{
+
+ private static final long serialVersionUID = 1L;
+ public static final NodeWritable EMPTY_NODE = new NodeWritable();
+
+ private static final int SIZE_FLOAT = 4;
+
+ private PositionListWritable nodeIdList;
+ private VKmerListWritable forwardForwardList;
+ private VKmerListWritable forwardReverseList;
+ private VKmerListWritable reverseForwardList;
+ private VKmerListWritable reverseReverseList;
+ private VKmerBytesWritable kmer;
+ private float averageCoverage;
+
+ // merge/update directions
+ public static class DirectionFlag {
+ public static final byte DIR_FF = 0b00 << 0;
+ public static final byte DIR_FR = 0b01 << 0;
+ public static final byte DIR_RF = 0b10 << 0;
+ public static final byte DIR_RR = 0b11 << 0;
+ public static final byte DIR_MASK = 0b11 << 0;
+ }
+
+ public NodeWritable() {
+ nodeIdList = new PositionListWritable();
+ forwardForwardList = new VKmerListWritable();
+ forwardReverseList = new VKmerListWritable();
+ reverseForwardList = new VKmerListWritable();
+ reverseReverseList = new VKmerListWritable();
+ kmer = new VKmerBytesWritable(); // in graph construction - not set kmerlength Optimization: VKmer
+ averageCoverage = 0;
+ }
+
+ public NodeWritable(PositionListWritable nodeIdList, VKmerListWritable FFList, VKmerListWritable FRList,
+ VKmerListWritable RFList, VKmerListWritable RRList, VKmerBytesWritable kmer, float coverage) {
+ this();
+ set(nodeIdList, FFList, FRList, RFList, RRList, kmer, coverage);
+ }
+
+ public void set(NodeWritable node){
+ set(node.nodeIdList, node.forwardForwardList, node.forwardReverseList, node.reverseForwardList,
+ node.reverseReverseList, node.kmer, node.averageCoverage);
+ }
+
+ public void set(PositionListWritable nodeIdList, VKmerListWritable FFList, VKmerListWritable FRList,
+ VKmerListWritable RFList, VKmerListWritable RRList, VKmerBytesWritable kmer2, float coverage) {
+ this.nodeIdList.set(nodeIdList);
+ this.forwardForwardList.setCopy(FFList);
+ this.forwardReverseList.setCopy(FRList);
+ this.reverseForwardList.setCopy(RFList);
+ this.reverseReverseList.setCopy(RRList);
+ this.kmer.setAsCopy(kmer2);
+ this.averageCoverage = coverage;
+ }
+
+ public void reset() {
+ this.nodeIdList.reset();
+ this.forwardForwardList.reset();
+ this.forwardReverseList.reset();
+ this.reverseForwardList.reset();
+ this.reverseReverseList.reset();
+ this.kmer.reset(0);
+ averageCoverage = 0;
+ }
+
+ public PositionListWritable getNodeIdList() {
+ return nodeIdList;
+ }
+
+ public void setNodeIdList(PositionListWritable nodeIdList) {
+ this.nodeIdList.set(nodeIdList);
+ }
+
+ public VKmerBytesWritable getKmer() {
+ return kmer;
+ }
+
+ public void setKmer(VKmerBytesWritable kmer) {
+ this.kmer.setAsCopy(kmer);
+ }
+
+ public int getKmerLength() {
+ return kmer.getKmerLetterLength();
+ }
+
+ public VKmerListWritable getFFList() {
+ return forwardForwardList;
+ }
+
+ public VKmerListWritable getFRList() {
+ return forwardReverseList;
+ }
+
+ public VKmerListWritable getRFList() {
+ return reverseForwardList;
+ }
+
+ public VKmerListWritable getRRList() {
+ return reverseReverseList;
+ }
+
+ public void setFFList(VKmerListWritable forwardForwardList) {
+ this.forwardForwardList.setCopy(forwardForwardList);
+ }
+
+ public void setFRList(VKmerListWritable forwardReverseList) {
+ this.forwardReverseList.setCopy(forwardReverseList);
+ }
+
+ public void setRFList(VKmerListWritable reverseForwardList) {
+ this.reverseForwardList.setCopy(reverseForwardList);
+ }
+
+ public void setRRList(VKmerListWritable reverseReverseList) {
+ this.reverseReverseList.setCopy(reverseReverseList);
+ }
+
+ public VKmerListWritable getListFromDir(byte dir) {
+ switch (dir & DirectionFlag.DIR_MASK) {
+ case DirectionFlag.DIR_FF:
+ return getFFList();
+ case DirectionFlag.DIR_FR:
+ return getFRList();
+ case DirectionFlag.DIR_RF:
+ return getRFList();
+ case DirectionFlag.DIR_RR:
+ return getRRList();
+ default:
+ throw new RuntimeException("Unrecognized direction in getListFromDir: " + dir);
+ }
+ }
+
+ /**
+ * Update my coverage to be the average of this and other. Used when merging paths.
+ */
+ public void mergeCoverage(NodeWritable other) {
+ // sequence considered in the average doesn't include anything overlapping with other kmers
+ float adjustedLength = kmer.getKmerLetterLength() + other.kmer.getKmerLetterLength() - (KmerBytesWritable.getKmerLength() - 1) * 2;
+
+ float myCount = (kmer.getKmerLetterLength() - KmerBytesWritable.getKmerLength() - 1) * averageCoverage;
+ float otherCount = (other.kmer.getKmerLetterLength() - KmerBytesWritable.getKmerLength() - 1) * other.averageCoverage;
+ averageCoverage = (myCount + otherCount) / adjustedLength;
+ }
+
+ /**
+ * Update my coverage as if all the reads in other became my own
+ */
+ public void addCoverage(NodeWritable other) {
+ float myAdjustedLength = kmer.getKmerLetterLength() - KmerBytesWritable.getKmerLength() - 1;
+ float otherAdjustedLength = other.kmer.getKmerLetterLength() - KmerBytesWritable.getKmerLength() - 1;
+ averageCoverage += other.averageCoverage * (otherAdjustedLength / myAdjustedLength);
+ }
+
+ public void setAvgCoverage(float coverage) {
+ averageCoverage = coverage;
+ }
+
+ public float getAvgCoverage() {
+ return averageCoverage;
+ }
+
+ /**
+ * Returns the length of the byte-array version of this node
+ */
+ public int getSerializedLength() {
+ return nodeIdList.getLength() + forwardForwardList.getLength() + forwardReverseList.getLength() +
+ reverseForwardList.getLength() + reverseReverseList.getLength() + kmer.getLength() + SIZE_FLOAT;
+ }
+
+ /**
+ * Return this Node's representation as a new byte array
+ */
+ public byte[] marshalToByteArray() throws IOException {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream(getSerializedLength());
+ DataOutputStream out = new DataOutputStream(baos);
+ write(out);
+ return baos.toByteArray();
+ }
+
+ public void setAsCopy(byte[] data, int offset) {
+ int curOffset = offset;
+ nodeIdList.set(data, curOffset);
+
+ curOffset += nodeIdList.getLength();
+ forwardForwardList.setCopy(data, curOffset);
+ curOffset += forwardForwardList.getLength();
+ forwardReverseList.setCopy(data, curOffset);
+ curOffset += forwardReverseList.getLength();
+ reverseForwardList.setCopy(data, curOffset);
+ curOffset += reverseForwardList.getLength();
+ reverseReverseList.setCopy(data, curOffset);
+
+ curOffset += reverseReverseList.getLength();
+ kmer.setAsCopy(data, curOffset);
+
+ curOffset += kmer.getLength();
+ averageCoverage = Marshal.getFloat(data, curOffset);
+ }
+
+ public void setAsReference(byte[] data, int offset) {
+ int curOffset = offset;
+ nodeIdList.setNewReference(data, curOffset);
+
+ curOffset += nodeIdList.getLength();
+ forwardForwardList.setNewReference(data, curOffset);
+ curOffset += forwardForwardList.getLength();
+ forwardReverseList.setNewReference(data, curOffset);
+ curOffset += forwardReverseList.getLength();
+ reverseForwardList.setNewReference(data, curOffset);
+ curOffset += reverseForwardList.getLength();
+ reverseReverseList.setNewReference(data, curOffset);
+
+ curOffset += reverseReverseList.getLength();
+ kmer.setAsReference(data, curOffset);
+
+ curOffset += kmer.getLength();
+ averageCoverage = Marshal.getFloat(data, curOffset);
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ this.nodeIdList.write(out);
+ this.forwardForwardList.write(out);
+ this.forwardReverseList.write(out);
+ this.reverseForwardList.write(out);
+ this.reverseReverseList.write(out);
+ this.kmer.write(out);
+ out.writeFloat(averageCoverage);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ reset();
+ this.nodeIdList.readFields(in);
+ this.forwardForwardList.readFields(in);
+ this.forwardReverseList.readFields(in);
+ this.reverseForwardList.readFields(in);
+ this.reverseReverseList.readFields(in);
+ this.kmer.readFields(in);
+ averageCoverage = in.readFloat();
+ }
+
+ @Override
+ public int compareTo(NodeWritable other) {
+ return this.kmer.compareTo(other.kmer);
+ }
+
+ public class SortByCoverage implements Comparator<NodeWritable> {
+ @Override
+ public int compare(NodeWritable left, NodeWritable right) {
+ return Float.compare(left.averageCoverage, right.averageCoverage);
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ return this.kmer.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o instanceof NodeWritable) {
+ NodeWritable nw = (NodeWritable) o;
+ return (this.nodeIdList.equals(nw.nodeIdList)
+ && this.forwardForwardList.equals(nw.forwardForwardList)
+ && this.forwardReverseList.equals(nw.forwardReverseList)
+ && this.reverseForwardList.equals(nw.reverseForwardList)
+ && this.reverseReverseList.equals(nw.reverseReverseList) && this.kmer.equals(nw.kmer));
+ }
+ return false;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sbuilder = new StringBuilder();
+ sbuilder.append('{');
+ sbuilder.append(nodeIdList.toString()).append('\t');
+ sbuilder.append(forwardForwardList.toString()).append('\t');
+ sbuilder.append(forwardReverseList.toString()).append('\t');
+ sbuilder.append(reverseForwardList.toString()).append('\t');
+ sbuilder.append(reverseReverseList.toString()).append('\t');
+ sbuilder.append(kmer.toString()).append('\t');
+ sbuilder.append(averageCoverage).append('x').append('}');
+ return sbuilder.toString();
+ }
+
+ public void mergeForwardNext(final NodeWritable nextNode, int initialKmerSize) {
+ this.forwardForwardList.setCopy(nextNode.forwardForwardList);
+ this.forwardReverseList.setCopy(nextNode.forwardReverseList);
+ kmer.mergeWithFFKmer(initialKmerSize, nextNode.getKmer());
+ }
+
+ public void mergeForwardPre(final NodeWritable preNode, int initialKmerSize) {
+ this.reverseForwardList.setCopy(preNode.reverseForwardList);
+ this.reverseReverseList.setCopy(preNode.reverseReverseList);
+ kmer.mergeWithRRKmer(initialKmerSize, preNode.getKmer());
+ }
+
+ public int inDegree() {
+ return reverseReverseList.getCountOfPosition() + reverseForwardList.getCountOfPosition();
+ }
+
+ public int outDegree() {
+ return forwardForwardList.getCountOfPosition() + forwardReverseList.getCountOfPosition();
+ }
+
+ /*
+ * Return if this node is a "path" compressible node, that is, it has an in-degree and out-degree of 1
+ */
+ public boolean isPathNode() {
+ return inDegree() == 1 && outDegree() == 1;
+ }
+
+ public boolean isSimpleOrTerminalPath() {
+ return isPathNode() || (inDegree() == 0 && outDegree() == 1) || (inDegree() == 1 && outDegree() == 0);
+ }
+}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
new file mode 100644
index 0000000..d9a409d
--- /dev/null
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
@@ -0,0 +1,295 @@
+package edu.uci.ics.genomix.type;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.io.Writable;
+
+import edu.uci.ics.genomix.data.Marshal;
+import edu.uci.ics.genomix.type.PositionWritable;
+
+public class PositionListWritable implements Writable, Iterable<PositionWritable>, Serializable {
+ private static final long serialVersionUID = 1L;
+ protected static final byte[] EMPTY_BYTES = {0,0,0,0};
+ protected static final int HEADER_SIZE = 4;
+
+ protected byte[] storage;
+ protected int offset;
+ protected int valueCount;
+ protected int maxStorageSize;
+
+
+ protected PositionWritable posIter = new PositionWritable();
+
+ public PositionListWritable() {
+ storage = EMPTY_BYTES;
+ valueCount = 0;
+ offset = 0;
+ maxStorageSize = storage.length;
+ }
+
+ public PositionListWritable(byte[] data, int offset) {
+ setNewReference(data, offset);
+ }
+
+ public PositionListWritable(List<PositionWritable> posns) {
+ this();
+ setSize(posns.size() * PositionWritable.LENGTH + HEADER_SIZE); // reserve space for all elements
+ for (PositionWritable p : posns) {
+ append(p);
+ }
+ }
+
+ public void setNewReference(byte[] data, int offset) {
+ this.valueCount = Marshal.getInt(data, offset);
+ this.storage = data;
+ this.offset = offset;
+ maxStorageSize = valueCount * PositionWritable.LENGTH + HEADER_SIZE;
+ }
+
+ public void append(long uuid) {
+ setSize((1 + valueCount) * PositionWritable.LENGTH + HEADER_SIZE);
+ Marshal.putLong(uuid, storage, offset + valueCount * PositionWritable.LENGTH + HEADER_SIZE);
+ valueCount += 1;
+ Marshal.putInt(valueCount, storage, offset);
+ }
+
+ public void append(byte mateId, long readId, int posId) {
+ append(PositionWritable.makeUUID(mateId, readId, posId));
+ }
+
+ public void append(PositionWritable pos) {
+ if (pos != null)
+ append(pos.getUUID());
+ else
+ throw new RuntimeException("This position is null pointer!");
+ }
+
+ /*
+ * Append the otherList to the end of myList
+ */
+ public void appendList(PositionListWritable otherList) {
+ if (otherList.valueCount > 0) {
+ setSize((valueCount + otherList.valueCount) * PositionWritable.LENGTH + HEADER_SIZE);
+ // copy contents of otherList into the end of my storage
+ System.arraycopy(otherList.storage, otherList.offset + HEADER_SIZE, storage, offset + valueCount
+ * PositionWritable.LENGTH + HEADER_SIZE, otherList.valueCount * PositionWritable.LENGTH);
+ valueCount += otherList.valueCount;
+ Marshal.putInt(valueCount, storage, offset);
+ }
+ }
+
+ /**
+ * Save the union of my list and otherList. Uses a temporary HashSet for
+ * uniquefication
+ */
+ public void unionUpdate(PositionListWritable otherList) {
+ int newSize = valueCount + otherList.valueCount;
+ HashSet<PositionWritable> uniqueElements = new HashSet<PositionWritable>(newSize);
+ for (PositionWritable pos : this) {
+ uniqueElements.add(new PositionWritable(pos));
+ }
+ for (PositionWritable pos : otherList) {
+ uniqueElements.add(pos);
+ }
+ valueCount = 0;
+ setSize(newSize * PositionWritable.LENGTH + HEADER_SIZE);
+ for (PositionWritable pos : uniqueElements) {
+ append(pos);
+ }
+ }
+
+ public static int getCountByDataLength(int length) {
+ if (length % PositionWritable.LENGTH != 0) {
+ throw new IllegalArgumentException("Length of positionlist is invalid");
+ }
+ return length / PositionWritable.LENGTH;
+ }
+
+ public void set(PositionListWritable otherList) {
+ set(otherList.storage, otherList.offset);
+ }
+
+ public void set(byte[] newData, int newOffset) {
+ int newValueCount = Marshal.getInt(newData, newOffset);
+ setSize(newValueCount * PositionWritable.LENGTH + HEADER_SIZE);
+ if (newValueCount > 0) {
+ System.arraycopy(newData, newOffset + HEADER_SIZE, storage, this.offset + HEADER_SIZE, newValueCount * PositionWritable.LENGTH);
+ }
+ valueCount = newValueCount;
+ Marshal.putInt(valueCount, storage, this.offset);
+ }
+
+ public void reset() {
+ valueCount = 0;
+ Marshal.putInt(valueCount, storage, offset);
+ }
+
+ protected void setSize(int size) {
+ if (size > getCapacity()) {
+ setCapacity((size * 3 / 2));
+ }
+ }
+
+ protected int getCapacity() {
+ return maxStorageSize - offset;
+ }
+
+ public String printReadIdSet(){
+ String output = "";
+ for(int i = 0; i < valueCount - 1; i++)
+ output += getPosition(i).getReadId() + ",";
+ output += getPosition(valueCount - 1).getReadId();
+ return output;
+ }
+
+ protected void setCapacity(int new_cap) {
+ if (new_cap > getCapacity()) {
+ byte[] new_data = new byte[new_cap];
+ if (valueCount > 0) {
+ System.arraycopy(storage, offset, new_data, 0, valueCount * PositionWritable.LENGTH + HEADER_SIZE);
+ }
+ storage = new_data;
+ offset = 0;
+ maxStorageSize = storage.length;
+ }
+ }
+
+ public PositionWritable getPosition(int i) {
+ if (i >= valueCount) {
+ throw new ArrayIndexOutOfBoundsException("No such positions");
+ }
+ posIter.setNewReference(storage, offset + i * PositionWritable.LENGTH + HEADER_SIZE);
+ return posIter;
+ }
+
+ public void resetPosition(int i, long uuid) {
+ if (i >= valueCount) {
+ throw new ArrayIndexOutOfBoundsException("No such positions");
+ }
+ Marshal.putLong(uuid, storage, offset + i * PositionWritable.LENGTH + HEADER_SIZE);
+ }
+
+ public int getCountOfPosition() {
+ return valueCount;
+ }
+
+ public byte[] getByteArray() {
+ return storage;
+ }
+
+ public int getStartOffset() {
+ return offset;
+ }
+
+ public int getLength() {
+ return valueCount * PositionWritable.LENGTH + HEADER_SIZE;
+ }
+
+ @Override
+ public Iterator<PositionWritable> iterator() {
+ Iterator<PositionWritable> it = new Iterator<PositionWritable>() {
+
+ private int currentIndex = 0;
+
+ @Override
+ public boolean hasNext() {
+ return currentIndex < valueCount;
+ }
+
+ @Override
+ public PositionWritable next() {
+ return getPosition(currentIndex++);
+ }
+
+ @Override
+ public void remove() {
+ if (currentIndex < valueCount)
+ System.arraycopy(storage, offset + currentIndex * PositionWritable.LENGTH + HEADER_SIZE, storage, offset
+ + (currentIndex - 1) * PositionWritable.LENGTH + HEADER_SIZE, (valueCount - currentIndex)
+ * PositionWritable.LENGTH);
+ valueCount--;
+ currentIndex--;
+ Marshal.putInt(valueCount, storage, offset);
+ }
+ };
+ return it;
+ }
+
+ /*
+ * remove the first instance of @toRemove. Uses a linear scan. Throws an exception if not in this list.
+ */
+ public void remove(PositionWritable toRemove, boolean ignoreMissing) {
+ Iterator<PositionWritable> posIterator = this.iterator();
+ while (posIterator.hasNext()) {
+ if (toRemove.equals(posIterator.next())) {
+ posIterator.remove();
+ return; // found it. return early.
+ }
+ }
+ // element not found.
+ if (!ignoreMissing) {
+ throw new ArrayIndexOutOfBoundsException("the PositionWritable `" + toRemove.toString()
+ + "` was not found in this list.");
+ }
+ }
+
+ public void remove(PositionWritable toRemove) {
+ remove(toRemove, false);
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(valueCount);
+ out.write(storage, offset + HEADER_SIZE, valueCount * PositionWritable.LENGTH);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ int newValueCount = in.readInt();
+ setSize(newValueCount * PositionWritable.LENGTH + HEADER_SIZE);
+ in.readFully(storage, offset + HEADER_SIZE, newValueCount * PositionWritable.LENGTH);
+ valueCount = newValueCount;
+ Marshal.putInt(valueCount, storage, offset);
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sbuilder = new StringBuilder();
+ sbuilder.append('[');
+ for (PositionWritable pos : this) {
+ sbuilder.append(pos.toString());
+ sbuilder.append(',');
+ }
+ if (valueCount > 0) {
+ sbuilder.setCharAt(sbuilder.length() - 1, ']');
+ } else {
+ sbuilder.append(']');
+ }
+ return sbuilder.toString();
+ }
+
+ @Override
+ public int hashCode() {
+ return Marshal.hashBytes(getByteArray(), getStartOffset(), getLength());
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof PositionListWritable))
+ return false;
+ PositionListWritable other = (PositionListWritable) o;
+ if (this.valueCount != other.valueCount)
+ return false;
+ for (int i = 0; i < this.valueCount; i++) {
+ if (!this.getPosition(i).equals(other.getPosition(i)))
+ return false;
+ }
+ return true;
+ }
+}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
new file mode 100644
index 0000000..bcdd423
--- /dev/null
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
@@ -0,0 +1,132 @@
+package edu.uci.ics.genomix.type;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.Serializable;
+
+import org.apache.hadoop.io.WritableComparable;
+
+import edu.uci.ics.genomix.data.Marshal;
+import edu.uci.ics.genomix.type.PositionWritable;
+
+public class PositionWritable implements WritableComparable<PositionWritable>, Serializable{
+ private static final long serialVersionUID = 1L;
+ protected byte[] storage;
+ protected int offset;
+ public static final int LENGTH = 8;
+
+ public static final int totalBits = 64;
+ private static final int bitsForMate = 1;
+ private static final int bitsForPosition = 16;
+ private static final int readIdShift = bitsForPosition + bitsForMate;
+ private static final int positionIdShift = bitsForMate;
+
+ public PositionWritable() {
+ storage = new byte[LENGTH];
+ offset = 0;
+ }
+
+ public PositionWritable(byte mateId, long readId, int posId){
+ this();
+ set(mateId, readId, posId);
+ }
+
+ public PositionWritable(PositionWritable other) {
+ this();
+ set(other);
+ }
+ public PositionWritable(byte[] storage, int offset) {
+ setNewReference(storage, offset);
+ }
+
+ public void set(long uuid){
+ Marshal.putLong(uuid, storage, offset);
+ }
+
+ public static long makeUUID(byte mateId, long readId, int posId) {
+ return (readId << 17) + ((posId & 0xFFFF) << 1) + (mateId & 0b1);
+ }
+
+ public void set(byte mateId, long readId, int posId){
+ Marshal.putLong(makeUUID(mateId, readId, posId), storage, offset);
+ }
+
+ public void set(PositionWritable pos) {
+ set(pos.getMateId(),pos.getReadId(),pos.getPosId());
+ }
+
+ public void setNewReference(byte[] storage, int offset) {
+ this.storage = storage;
+ this.offset = offset;
+ }
+
+ public void reset(){
+ storage = new byte[LENGTH];
+ offset = 0;
+ }
+
+ public long getUUID(){
+ return Marshal.getLong(storage, offset);
+ }
+
+ public byte getMateId(){
+ return (byte) (Marshal.getLong(storage, offset) & 0b1);
+ }
+
+ public long getReadId(){
+ return Marshal.getLong(storage, offset) >>> readIdShift;
+ }
+
+ public int getPosId(){
+ return (int) ((Marshal.getLong(storage, offset) >>> positionIdShift) & 0xffff);
+ }
+
+ public byte[] getByteArray() {
+ return storage;
+ }
+
+ public int getStartOffset() {
+ return offset;
+ }
+
+ public int getLength() {
+ return LENGTH;
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ in.readFully(storage, offset, LENGTH);
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.write(storage, offset, LENGTH);
+ }
+
+ @Override
+ public int hashCode() {
+ return Marshal.hashBytes(getByteArray(), getStartOffset(), getLength());
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof PositionWritable))
+ return false;
+ PositionWritable other = (PositionWritable) o;
+ return this.getUUID() == other.getUUID();
+ }
+
+ @Override
+ public int compareTo(PositionWritable other) {
+ return (this.getUUID() < other.getUUID()) ? -1 : ((this.getUUID() == other.getUUID()) ? 0 : 1);
+ }
+
+ /*
+ * String of form "(readId-posID_mate)" where mate is _0 or _1
+ */
+ @Override
+ public String toString() {
+ return "(" + this.getReadId() + "-" + this.getPosId() + "_" + (this.getMateId()) + ")";
+ }
+}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
new file mode 100644
index 0000000..f9ebd15
--- /dev/null
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
@@ -0,0 +1,733 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.type;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.Serializable;
+
+import org.apache.hadoop.io.BinaryComparable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableComparator;
+
+import edu.uci.ics.genomix.data.KmerUtil;
+import edu.uci.ics.genomix.data.Marshal;
+import edu.uci.ics.genomix.type.NodeWritable.DirectionFlag;
+
+
+/**
+ * Variable-length kmer which stores its length internally.
+ * Note: `offset` as used in this class is the offset at which the *kmer*
+ * begins. There is a {@value HEADER_SIZE}-byte header preceding the kmer
+ */
+public class VKmerBytesWritable extends BinaryComparable implements Serializable, WritableComparable<BinaryComparable> {
+ private static final long serialVersionUID = 1L;
+ protected static final byte[] EMPTY_BYTES = { 0, 0, 0, 0 }; // int indicating 0 length
+ protected static final int HEADER_SIZE = 4; // number of bytes for header info
+
+ protected int lettersInKmer;
+ protected int bytesUsed;
+ protected byte[] bytes;
+ protected int kmerStartOffset;
+ protected int storageMaxSize; // since we may be a reference inside a larger datablock, we must track our maximum size
+
+ /**
+ * Initialize as empty kmer
+ */
+ public VKmerBytesWritable() {
+ this(EMPTY_BYTES, 0);
+ }
+
+ /**
+ * Copy contents of kmer string
+ */
+ public VKmerBytesWritable(String kmer) {
+ bytes = new byte[HEADER_SIZE + KmerUtil.getByteNumFromK(kmer.length())];
+ kmerStartOffset = HEADER_SIZE;
+ storageMaxSize = bytes.length;
+ setAsCopy(kmer);
+ }
+
+ /**
+ * Set as reference to given data
+ *
+ * @param storage
+ * : byte array with header
+ * @param offset
+ */
+ public VKmerBytesWritable(byte[] storage, int offset) {
+ setAsReference(storage, offset);
+ }
+
+ /**
+ * Reserve space for k letters
+ */
+ public VKmerBytesWritable(int k) {
+ if (k > 0) {
+ bytes = new byte[HEADER_SIZE + KmerUtil.getByteNumFromK(k)];
+ } else if (k == 0) {
+ bytes = EMPTY_BYTES;
+ } else {
+ throw new IllegalArgumentException("Invalid K (" + k + ").");
+ }
+ kmerStartOffset = HEADER_SIZE;
+ storageMaxSize = bytes.length;
+ setKmerLength(k);
+ }
+
+ /**
+ * deep copy of kmer in other
+ *
+ * @param other
+ */
+ public VKmerBytesWritable(VKmerBytesWritable other) {
+ this(other.lettersInKmer);
+ setAsCopy(other);
+ }
+
+ /**
+ * deep copy of kmer in other
+ *
+ * @param other
+ */
+ public VKmerBytesWritable(KmerBytesWritable other) {
+ this(other.lettersInKmer);
+ setAsCopy(other);
+ }
+
+ /**
+ * Deep copy of the given kmer
+ *
+ * @param other
+ */
+ public void setAsCopy(VKmerBytesWritable other) {
+ reset(other.lettersInKmer);
+ if (lettersInKmer > 0) {
+ System.arraycopy(other.bytes, other.kmerStartOffset, bytes, this.kmerStartOffset, bytesUsed);
+ }
+ }
+
+ /**
+ * Deep copy of the given kmer
+ *
+ * @param other
+ */
+ public void setAsCopy(KmerBytesWritable other) {
+ reset(other.lettersInKmer);
+ if (lettersInKmer > 0) {
+ System.arraycopy(other.bytes, other.offset, bytes, this.kmerStartOffset, bytesUsed);
+ }
+ }
+
+ /**
+ * set from String kmer
+ */
+ public void setAsCopy(String kmer) {
+ int k = kmer.length();
+ reset(k);
+ System.arraycopy(kmer.getBytes(), 0, bytes, kmerStartOffset, bytesUsed);
+ }
+
+ /**
+ * Deep copy of the given bytes data
+ *
+ * @param newData
+ * : byte array to copy (should have a header)
+ * @param offset
+ */
+ public void setAsCopy(byte[] newData, int offset) {
+ int k = Marshal.getInt(newData, offset);
+ reset(k);
+ System.arraycopy(newData, offset + HEADER_SIZE, bytes, this.kmerStartOffset, bytesUsed);
+ }
+
+
+ /**
+ * Point this datablock to the given bytes array It works like the pointer
+ * to new datablock.
+ *
+ * @param newData
+ * : byte array to copy (should have a header)
+ * @param blockOffset
+ */
+ public void setAsReference(byte[] newData, int blockOffset) {
+ bytes = newData;
+ kmerStartOffset = blockOffset + HEADER_SIZE;
+ int kRequested = Marshal.getInt(newData, blockOffset);
+ int bytesRequested = KmerUtil.getByteNumFromK(kRequested) + HEADER_SIZE;
+ if (newData.length - blockOffset < bytesRequested) {
+ throw new IllegalArgumentException("Requested " + bytesRequested + " bytes (k=" + kRequested
+ + ") but buffer has only " + (newData.length - blockOffset) + " bytes");
+ }
+ storageMaxSize = bytesRequested; // since we are a reference, store our max capacity
+ setKmerLength(kRequested);
+ }
+
+ /**
+ * Reset array by kmerlength
+ *
+ * @param k
+ */
+ public void reset(int k) {
+ int newByteLength = KmerUtil.getByteNumFromK(k);
+ if (bytesUsed < newByteLength) {
+ bytes = new byte[newByteLength + HEADER_SIZE];
+ kmerStartOffset = HEADER_SIZE;
+ storageMaxSize = bytes.length;
+ }
+ setKmerLength(k);
+ }
+
+ protected void clearLeadBit() {
+ if (lettersInKmer % 4 != 0) {
+ bytes[kmerStartOffset] &= (1 << ((lettersInKmer % 4) << 1)) - 1;
+ }
+ }
+
+ /**
+ * Get one genecode (A|G|C|T) from the given kmer index e.g. Get the 4th
+ * gene of the kmer ACGTA will return T
+ *
+ * @param pos
+ * @return
+ */
+ public byte getGeneCodeAtPosition(int pos) {
+ if (pos >= lettersInKmer || pos < 0) {
+ throw new ArrayIndexOutOfBoundsException("Gene position (" + pos + ") out of bounds for k=" + lettersInKmer);
+ }
+ return geneCodeAtPosition(pos);
+ }
+
+ /**
+ * unchecked version of getGeneCodeAtPosition. Used when kmerlength is
+ * inaccurate (mid-merge)
+ */
+ private byte geneCodeAtPosition(int pos) {
+ int posByte = pos / 4;
+ int shift = (pos % 4) << 1;
+ return (byte) ((bytes[kmerStartOffset + bytesUsed - 1 - posByte] >> shift) & 0x3);
+ }
+
+ /**
+ * Shift Kmer to accept new char input
+ *
+ * @param c
+ * Input new gene character
+ * @return the shift out gene, in gene code format
+ */
+ public byte shiftKmerWithNextChar(byte c) {
+ return shiftKmerWithNextCode(GeneCode.getCodeFromSymbol(c));
+ }
+
+ /**
+ * Shift Kmer to accept new gene code
+ *
+ * @param c
+ * Input new gene code
+ * @return the shift out gene, in gene code format
+ */
+ public byte shiftKmerWithNextCode(byte c) {
+ byte output = (byte) (bytes[kmerStartOffset + bytesUsed - 1] & 0x03);
+ for (int i = bytesUsed - 1; i > 0; i--) {
+ byte in = (byte) (bytes[kmerStartOffset + i - 1] & 0x03);
+ bytes[kmerStartOffset + i] = (byte) (((bytes[kmerStartOffset + i] >>> 2) & 0x3f) | (in << 6));
+ }
+ int pos = ((lettersInKmer - 1) % 4) << 1;
+ byte code = (byte) (c << pos);
+ bytes[kmerStartOffset] = (byte) (((bytes[kmerStartOffset] >>> 2) & 0x3f) | code);
+ clearLeadBit();
+ return output;
+ }
+
+ /**
+ * Shift Kmer to accept new input char
+ *
+ * @param c
+ * Input new gene character
+ * @return the shiftout gene, in gene code format
+ */
+ public byte shiftKmerWithPreChar(byte c) {
+ return shiftKmerWithPreCode(GeneCode.getCodeFromSymbol(c));
+ }
+
+ /**
+ * Shift Kmer to accept new gene code
+ *
+ * @param c
+ * Input new gene code
+ * @return the shiftout gene, in gene code format
+ */
+ public byte shiftKmerWithPreCode(byte c) {
+ int pos = ((lettersInKmer - 1) % 4) << 1;
+ byte output = (byte) ((bytes[kmerStartOffset] >> pos) & 0x03);
+ for (int i = 0; i < bytesUsed - 1; i++) {
+ byte in = (byte) ((bytes[kmerStartOffset + i + 1] >> 6) & 0x03);
+ bytes[kmerStartOffset + i] = (byte) ((bytes[kmerStartOffset + i] << 2) | in);
+ }
+ bytes[kmerStartOffset + bytesUsed - 1] = (byte) ((bytes[kmerStartOffset + bytesUsed - 1] << 2) | c);
+ clearLeadBit();
+ return output;
+ }
+
+ public int getKmerLetterLength() {
+ return lettersInKmer;
+ }
+
+ @Override
+ public byte[] getBytes() {
+ return bytes;
+ }
+
+ /**
+ * Return the (hyracks-specific) data block offset. This includes the header.
+ */
+ public int getBlockOffset() {
+ return kmerStartOffset - HEADER_SIZE;
+ }
+
+ /**
+ * Return the data block offset where the kmer data begins. This excludes the header.
+ */
+ public int getKmerOffset() {
+ return kmerStartOffset;
+ }
+
+ /**
+ * Return the number of bytes used by both header and kmer chain
+ */
+ @Override
+ public int getLength() {
+ return bytesUsed + HEADER_SIZE;
+ }
+
+ /**
+ * Return the number of bytes used by the kmer chain
+ */
+ public int getKmerByteLength() {
+ return bytesUsed;
+ }
+
+
+ public void setKmerLength(int k) {
+ this.bytesUsed = KmerUtil.getByteNumFromK(k);
+ this.lettersInKmer = k;
+ saveHeader(k);
+ }
+
+ protected int getKmerByteCapacity() {
+ return storageMaxSize - HEADER_SIZE;
+ }
+
+ protected void setKmerByteCapacity(int new_cap) {
+ if (new_cap != getKmerByteCapacity()) {
+ byte[] new_data = new byte[new_cap + HEADER_SIZE];
+ if (new_cap < bytesUsed) {
+ bytesUsed = new_cap;
+ }
+ if (bytesUsed != 0) {
+ System.arraycopy(bytes, kmerStartOffset, new_data, HEADER_SIZE, bytesUsed);
+ }
+ bytes = new_data;
+ kmerStartOffset = HEADER_SIZE;
+ storageMaxSize = bytes.length;
+ }
+ }
+
+ private void saveHeader(int length) {
+ Marshal.putInt(length, bytes, kmerStartOffset - HEADER_SIZE);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ lettersInKmer = in.readInt();
+ bytesUsed = KmerUtil.getByteNumFromK(lettersInKmer);
+ if (lettersInKmer > 0) {
+ if (getKmerByteCapacity() < this.bytesUsed) {
+ this.bytes = new byte[this.bytesUsed + HEADER_SIZE];
+ this.kmerStartOffset = HEADER_SIZE;
+ storageMaxSize = bytes.length;
+ }
+ in.readFully(bytes, kmerStartOffset, bytesUsed);
+ }
+ saveHeader(lettersInKmer);
+ }
+
+ /**
+ * write the entire byte array including the header
+ */
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.write(bytes, kmerStartOffset - HEADER_SIZE, bytesUsed + HEADER_SIZE);
+ }
+
+ @Override
+ public int hashCode() {
+ return Marshal.hashBytes(bytes, kmerStartOffset - HEADER_SIZE, bytesUsed + HEADER_SIZE);
+ }
+
+ @Override
+ public boolean equals(Object right_obj) {
+ if (right_obj instanceof VKmerBytesWritable) {
+ // since these may be backed by storage of different sizes, we have to manually check each byte, including the header
+ VKmerBytesWritable right = (VKmerBytesWritable) right_obj;
+ for (int i = -HEADER_SIZE; i < bytesUsed; i++) {
+ if (bytes[kmerStartOffset + i] != right.bytes[right.kmerStartOffset + i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public String toString() {
+ return KmerUtil.recoverKmerFrom(this.lettersInKmer, bytes, kmerStartOffset, bytesUsed);
+ }
+
+ public static class Comparator extends WritableComparator {
+
+ public Comparator() {
+ super(VKmerBytesWritable.class);
+ }
+
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ int kmerlength1 = Marshal.getInt(b1, s1);
+ int kmerlength2 = Marshal.getInt(b2, s2);
+ if (kmerlength1 == kmerlength2) {
+ return compareBytes(b1, s1 + HEADER_SIZE, l1 - HEADER_SIZE, b2, s2 + HEADER_SIZE, l2 - HEADER_SIZE);
+ }
+ return kmerlength1 - kmerlength2;
+ }
+ }
+
+ static { // register this comparator
+ WritableComparator.define(VKmerBytesWritable.class, new Comparator());
+ }
+
+ /**
+ * Ensures that there is space for at least `size` bytes of kmer (not
+ * including any header)
+ */
+ protected void setSize(int size) {
+ if (size > getKmerByteCapacity()) {
+ setKmerByteCapacity((size * 3 / 2));
+ }
+ this.bytesUsed = size;
+ }
+
+ public void setByRead(int k, byte[] stringBytes, int start) {
+ reset(k);
+ setByRead(stringBytes, start);
+ }
+
+ /**
+ * Read Kmer from read text into bytes array e.g. AATAG will compress as
+ * [0x000G, 0xATAA]
+ *
+ * @param stringBytes
+ * @param start
+ */
+ private void setByRead(byte[] stringBytes, int start) {
+ byte l = 0;
+ int bytecount = 0;
+ int bcount = this.bytesUsed - 1;
+ for (int i = start; i < start + lettersInKmer && i < stringBytes.length; i++) {
+ byte code = GeneCode.getCodeFromSymbol(stringBytes[i]);
+ l |= (byte) (code << bytecount);
+ bytecount += 2;
+ if (bytecount == 8) {
+ bytes[kmerStartOffset + bcount--] = l;
+ l = 0;
+ bytecount = 0;
+ }
+ }
+ if (bcount >= 0) {
+ bytes[kmerStartOffset] = l;
+ }
+ }
+
+ public void setByReadReverse(int k, byte[] stringBytes, int start) {
+ reset(k);
+ setByReadReverse(stringBytes, start);
+ }
+
+ /**
+ * Compress Reversed read into bytes array e.g. AATAG will paired to CTATT,
+ * and then compress as [0x000T,0xTATC]
+ *
+ * @param input
+ * array
+ * @param start
+ * position
+ */
+ private void setByReadReverse(byte[] array, int start) {
+ byte l = 0;
+ int bytecount = 0;
+ int bcount = bytesUsed - 1;
+ // for (int i = start + kmerlength - 1; i >= 0 && i < array.length; i--)
+ // {
+ for (int i = start + lettersInKmer - 1; i >= start && i < array.length; i--) {
+ byte code = GeneCode.getPairedCodeFromSymbol(array[i]);
+ l |= (byte) (code << bytecount);
+ bytecount += 2;
+ if (bytecount == 8) {
+ bytes[kmerStartOffset + bcount--] = l;
+ l = 0;
+ bytecount = 0;
+ }
+ }
+ if (bcount >= 0) {
+ bytes[kmerStartOffset] = l;
+ }
+ }
+
+ /**
+ * Merge Kmer with the next connected Kmer e.g. AAGCTAA merge with AACAACC,
+ * if the initial kmerSize = 3 then it will return AAGCTAACAACC
+ *
+ * @param initialKmerSize
+ * : the initial kmerSize
+ * @param kmer
+ * : the next kmer
+ */
+ public void mergeWithFFKmer(int initialKmerSize, VKmerBytesWritable kmer) {
+ if (lettersInKmer < initialKmerSize - 1 || kmer.lettersInKmer < initialKmerSize - 1) {
+ throw new IllegalArgumentException("Not enough letters in the kmers to perform a merge! Tried K=" + initialKmerSize + ", merge '" + this + "' with '" + kmer + "'.");
+ }
+ int preKmerLength = lettersInKmer;
+ int preSize = bytesUsed;
+ lettersInKmer += kmer.lettersInKmer - initialKmerSize + 1;
+ setSize(KmerUtil.getByteNumFromK(lettersInKmer));
+ for (int i = 1; i <= preSize; i++) {
+ bytes[kmerStartOffset + bytesUsed - i] = bytes[kmerStartOffset + preSize - i];
+ }
+ for (int k = initialKmerSize - 1; k < kmer.getKmerLetterLength(); k += 4) {
+ byte onebyte = KmerBytesWritable.getOneByteFromKmerAtPosition(k, kmer.bytes, kmer.kmerStartOffset,
+ kmer.bytesUsed);
+ KmerBytesWritable.appendOneByteAtPosition(preKmerLength + k - initialKmerSize + 1, onebyte, bytes,
+ kmerStartOffset, bytesUsed);
+ }
+ clearLeadBit();
+ saveHeader(lettersInKmer);
+ }
+
+ public void mergeWithFFKmer(int kmerSize, KmerBytesWritable kmer) {
+ // TODO make this more efficient
+ mergeWithFFKmer(kmerSize, new VKmerBytesWritable(kmer.toString()));
+ }
+
+ /**
+ * Merge Kmer with the next connected Kmer, when that Kmer needs to be
+ * reverse-complemented e.g. AAGCTAA merge with GGTTGTT, if the initial
+ * kmerSize = 3 then it will return AAGCTAACAACC A merge B => A B~
+ *
+ * @param initialKmerSize
+ * : the initial kmerSize
+ * @param kmer
+ * : the next kmer
+ */
+ public void mergeWithFRKmer(int initialKmerSize, VKmerBytesWritable kmer) {
+ if (lettersInKmer < initialKmerSize - 1 || kmer.lettersInKmer < initialKmerSize - 1) {
+ throw new IllegalArgumentException("Not enough letters in the kmers to perform a merge! Tried K=" + initialKmerSize + ", merge '" + this + "' with '" + kmer + "'.");
+ }
+ int preSize = bytesUsed;
+ int preKmerLength = lettersInKmer;
+ lettersInKmer += kmer.lettersInKmer - initialKmerSize + 1;
+ setSize(KmerUtil.getByteNumFromK(lettersInKmer));
+ // copy prefix into right-side of buffer
+ for (int i = 1; i <= preSize; i++) {
+ bytes[kmerStartOffset + bytesUsed - i] = bytes[kmerStartOffset + preSize - i];
+ }
+
+ int bytecount = (preKmerLength % 4) * 2;
+ int bcount = bytesUsed - preSize - bytecount / 8; // may overlap
+ // previous kmer
+ byte l = bcount == bytesUsed - preSize ? bytes[kmerStartOffset + bcount] : 0x00;
+ bytecount %= 8;
+ for (int i = kmer.lettersInKmer - initialKmerSize; i >= 0; i--) {
+ byte code = GeneCode.getPairedGeneCode(kmer.getGeneCodeAtPosition(i));
+ l |= (byte) (code << bytecount);
+ bytecount += 2;
+ if (bytecount == 8) {
+ bytes[kmerStartOffset + bcount--] = l;
+ l = 0;
+ bytecount = 0;
+ }
+ }
+ if (bcount >= 0) {
+ bytes[kmerStartOffset] = l;
+ }
+ saveHeader(lettersInKmer);
+ }
+
+ public void mergeWithFRKmer(int kmerSize, KmerBytesWritable kmer) {
+ // TODO make this more efficient
+ mergeWithFRKmer(kmerSize, new VKmerBytesWritable(kmer.toString()));
+ }
+
+ /**
+ * Merge Kmer with the previous connected Kmer, when that kmer needs to be
+ * reverse-complemented e.g. AACAACC merge with TTCTGCC, if the initial
+ * kmerSize = 3 then it will return GGCAGAACAACC
+ *
+ * @param initialKmerSize
+ * : the initial kmerSize
+ * @param preKmer
+ * : the previous kmer
+ */
+ public void mergeWithRFKmer(int initialKmerSize, VKmerBytesWritable preKmer) {
+ // TODO make this more efficient
+ VKmerBytesWritable reversed = new VKmerBytesWritable(preKmer.lettersInKmer);
+ reversed.setByReadReverse(preKmer.toString().getBytes(), 0);
+ mergeWithRRKmer(initialKmerSize, reversed);
+ }
+
+ public void mergeWithRFKmer(int kmerSize, KmerBytesWritable kmer) {
+ // TODO make this more efficient
+ mergeWithRFKmer(kmerSize, new VKmerBytesWritable(kmer.toString()));
+ }
+
+ /**
+ * Merge Kmer with the previous connected Kmer e.g. AACAACC merge with
+ * AAGCTAA, if the initial kmerSize = 3 then it will return AAGCTAACAACC
+ *
+ * @param initialKmerSize
+ * : the initial kmerSize
+ * @param preKmer
+ * : the previous kmer
+ */
+ public void mergeWithRRKmer(int initialKmerSize, VKmerBytesWritable preKmer) {
+ if (lettersInKmer < initialKmerSize - 1 || preKmer.lettersInKmer < initialKmerSize - 1) {
+ throw new IllegalArgumentException("Not enough letters in the kmers to perform a merge! Tried K=" + initialKmerSize + ", merge '" + this + "' with '" + preKmer + "'.");
+ }
+ int preKmerLength = lettersInKmer;
+ int preSize = bytesUsed;
+ lettersInKmer += preKmer.lettersInKmer - initialKmerSize + 1;
+ setSize(KmerUtil.getByteNumFromK(lettersInKmer));
+ byte cacheByte = KmerBytesWritable.getOneByteFromKmerAtPosition(0, bytes, kmerStartOffset, preSize);
+
+ // copy prekmer
+ for (int k = 0; k < preKmer.lettersInKmer - initialKmerSize + 1; k += 4) {
+ byte onebyte = KmerBytesWritable.getOneByteFromKmerAtPosition(k, preKmer.bytes, preKmer.kmerStartOffset,
+ preKmer.bytesUsed);
+ KmerBytesWritable.appendOneByteAtPosition(k, onebyte, bytes, kmerStartOffset, bytesUsed);
+ }
+
+ // copy current kmer
+ int k = 4;
+ for (; k < preKmerLength; k += 4) {
+ byte onebyte = KmerBytesWritable.getOneByteFromKmerAtPosition(k, bytes, kmerStartOffset, preSize);
+ KmerBytesWritable.appendOneByteAtPosition(preKmer.lettersInKmer - initialKmerSize + k - 4 + 1, cacheByte,
+ bytes, kmerStartOffset, bytesUsed);
+ cacheByte = onebyte;
+ }
+ KmerBytesWritable.appendOneByteAtPosition(preKmer.lettersInKmer - initialKmerSize + k - 4 + 1, cacheByte,
+ bytes, kmerStartOffset, bytesUsed);
+ clearLeadBit();
+ saveHeader(lettersInKmer);
+ }
+
+ public void mergeWithRRKmer(int kmerSize, KmerBytesWritable kmer) {
+ // TODO make this more efficient
+ mergeWithRRKmer(kmerSize, new VKmerBytesWritable(kmer.toString()));
+ }
+
+ public void mergeWithKmerInDir(byte dir, int initialKmerSize, VKmerBytesWritable kmer) {
+ switch (dir & DirectionFlag.DIR_MASK) {
+ case DirectionFlag.DIR_FF:
+ mergeWithFFKmer(initialKmerSize, kmer);
+ break;
+ case DirectionFlag.DIR_FR:
+ mergeWithFRKmer(initialKmerSize, kmer);
+ break;
+ case DirectionFlag.DIR_RF:
+ mergeWithRFKmer(initialKmerSize, kmer);
+ break;
+ case DirectionFlag.DIR_RR:
+ mergeWithRRKmer(initialKmerSize, kmer);
+ break;
+ default:
+ throw new RuntimeException("Direction not recognized: " + dir);
+ }
+ }
+ public void mergeWithKmerInDir(byte dir, int initialKmerSize, KmerBytesWritable kmer) {
+ // TODO make this more efficient
+ mergeWithKmerInDir(dir, initialKmerSize, new VKmerBytesWritable(kmer.toString()));
+ }
+
+ public KmerBytesWritable asFixedLengthKmer() {
+ if (lettersInKmer != KmerBytesWritable.getKmerLength()) {
+ throw new IllegalArgumentException("VKmer " + this.toString() + " is not of the same length as the fixed length Kmer (" + KmerBytesWritable.getKmerLength() + " )!");
+ }
+ return new KmerBytesWritable(bytes, kmerStartOffset);
+ }
+
+ /**
+ * return the edit distance required to transform kemr1 into kmer2 using substitutions, insertions, and deletions.
+ *
+ * This uses the classic dynamic programming algorithm and takes O(length_1 * length_2) time and space.
+ */
+ public static int editDistance(VKmerBytesWritable kmer1, VKmerBytesWritable kmer2) {
+ int rows = kmer1.getKmerLetterLength() + 1, columns = kmer2.getKmerLetterLength() + 1, r=0, c=0, match=0;
+ int[][] distMat = new int[rows][columns];
+
+ // initialize top row and left column
+ for (r = 0; r < rows; r++) {
+ distMat[r][0] = r;
+ }
+ for (c = 0; c < columns; c++) {
+ distMat[0][c] = c;
+ }
+
+ // fill out the matrix as the min of left+1, up+1, and diag+nomatch
+ for (r = 1; r < rows; r++) {
+ for (c = 1; c < columns; c++) {
+ match = kmer1.getGeneCodeAtPosition(r-1) == kmer2.getGeneCodeAtPosition(c-1) ? 0 : 1;
+ distMat[r][c] = min(distMat[r-1][c] + 1,
+ distMat[r][c-1] + 1,
+ distMat[r-1][c-1] + match);
+ }
+ }
+ return distMat[rows - 1][columns - 1];
+ }
+
+ private static int min(int a, int b, int c) {
+ return a <= b ? (a <= c ? a : c) : (b <= c ? b : c);
+ }
+ private static int min(int a, int b) {
+ return a <= b ? a : b;
+ }
+
+ public int editDistance(VKmerBytesWritable other) {
+ return editDistance(this, other);
+ }
+
+ /**
+ * return the fractional difference between the given kmers. This is the edit distance divided by the smaller length.
+ *
+ * Note: the fraction may be larger than 1 (when the edit distance is larger than the kmer)
+ */
+ public static float fracDissimilar(VKmerBytesWritable kmer1, VKmerBytesWritable kmer2) {
+ return editDistance(kmer1, kmer2) / (float) min(kmer1.getKmerLetterLength(), kmer2.getKmerLetterLength());
+ }
+
+ public float fracDissimilar(VKmerBytesWritable other) {
+ return fracDissimilar(this, other);
+ }
+
+}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerListWritable.java
new file mode 100644
index 0000000..80353c1
--- /dev/null
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerListWritable.java
@@ -0,0 +1,335 @@
+package edu.uci.ics.genomix.type;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.io.Writable;
+
+import edu.uci.ics.genomix.data.KmerUtil;
+import edu.uci.ics.genomix.data.Marshal;
+
+/**
+ * A list of fixed-length kmers. The length of this list is stored internally.
+ */
+public class VKmerListWritable implements Writable, Iterable<VKmerBytesWritable>, Serializable {
+ private static final long serialVersionUID = 1L;
+ protected static final byte[] EMPTY_BYTES = { 0, 0, 0, 0 };
+ protected static final int HEADER_SIZE = 4;
+
+ protected byte[] storage;
+ protected int offset;
+ protected int valueCount;
+ protected int storageMaxSize; // since we may be a reference inside a larger datablock, we must track our maximum size
+
+ private VKmerBytesWritable posIter = new VKmerBytesWritable();
+
+ public VKmerListWritable() {
+ storage = EMPTY_BYTES;
+ valueCount = 0;
+ offset = 0;
+ storageMaxSize = storage.length;
+ }
+
+ public VKmerListWritable(byte[] data, int offset) {
+ setNewReference(data, offset);
+ }
+
+ public VKmerListWritable(List<VKmerBytesWritable> kmers) {
+ this();
+ for (VKmerBytesWritable kmer : kmers) {
+ append(kmer);
+ }
+ }
+
+ public void setNewReference(byte[] data, int offset) {
+ valueCount = Marshal.getInt(data, offset);
+ this.storage = data;
+ this.offset = offset;
+ this.storageMaxSize = getLength();
+ }
+
+ public void append(VKmerBytesWritable kmer) {
+ setSize(getLength() + kmer.getLength());
+ System.arraycopy(kmer.getBytes(), kmer.kmerStartOffset - VKmerBytesWritable.HEADER_SIZE, storage, offset
+ + getLength(), kmer.getLength());
+ valueCount += 1;
+ Marshal.putInt(valueCount, storage, offset);
+ }
+
+ public void append(int k, KmerBytesWritable kmer) {
+ setSize(getLength() + HEADER_SIZE + kmer.getLength());
+ Marshal.putInt(k, storage, offset + getLength());
+ System.arraycopy(kmer.getBytes(), kmer.getOffset(), storage, offset + getLength() + HEADER_SIZE,
+ kmer.getLength());
+ valueCount += 1;
+ Marshal.putInt(valueCount, storage, offset);
+ }
+
+ public void append(KmerBytesWritable kmer) { // TODO optimize this into two separate containers...
+ setSize(getLength() + kmer.getLength() + VKmerBytesWritable.HEADER_SIZE);
+ int myLength = getLength();
+ Marshal.putInt(KmerBytesWritable.getKmerLength(), storage, offset + myLength); // write a new VKmer header
+ System.arraycopy(kmer.getBytes(), kmer.offset,
+ storage, offset + myLength + VKmerBytesWritable.HEADER_SIZE,
+ kmer.getLength());
+ valueCount += 1;
+ Marshal.putInt(valueCount, storage, offset);
+ }
+
+ /*
+ * Append the otherList to the end of myList
+ */
+ public void appendList(VKmerListWritable otherList) {
+ if (otherList.valueCount > 0) {
+ setSize(getLength() + otherList.getLength() - HEADER_SIZE); // one of the headers is redundant
+
+ // copy contents of otherList into the end of my storage
+ System.arraycopy(otherList.storage, otherList.offset + HEADER_SIZE, // skip other header
+ storage, offset + getLength(), // add to end
+ otherList.getLength() - HEADER_SIZE);
+ valueCount += otherList.valueCount;
+ Marshal.putInt(valueCount, storage, offset);
+ }
+ }
+
+ /**
+ * Save the union of my list and otherList. Uses a temporary HashSet for
+ * uniquefication
+ */
+ public void unionUpdate(VKmerListWritable otherList) {
+ int newSize = valueCount + otherList.valueCount;
+ HashSet<VKmerBytesWritable> uniqueElements = new HashSet<VKmerBytesWritable>(newSize);
+ for (VKmerBytesWritable kmer : this) {
+ // have to make copies of my own kmers since I may overwrite them
+ uniqueElements.add(new VKmerBytesWritable(kmer));
+ }
+ for (VKmerBytesWritable kmer : otherList) {
+ uniqueElements.add(kmer); // references okay
+ }
+ setSize(getLength() + otherList.getLength()); // upper bound on memory usage
+ valueCount = 0;
+ for (VKmerBytesWritable kmer : uniqueElements) {
+ append(kmer);
+ }
+ Marshal.putInt(valueCount, storage, offset);
+ }
+
+ protected void setSize(int size) {
+ if (size > getCapacity()) {
+ setCapacity((size * 3 / 2));
+ }
+ }
+
+ protected int getCapacity() {
+ return storageMaxSize - offset;
+ }
+
+ protected void setCapacity(int new_cap) {
+ if (new_cap > getCapacity()) {
+ byte[] new_data = new byte[new_cap];
+ if (valueCount > 0) {
+ System.arraycopy(storage, offset, new_data, 0, getLength());
+ }
+ storage = new_data;
+ offset = 0;
+ storageMaxSize = storage.length;
+ }
+ }
+
+ public void reset() {
+ valueCount = 0;
+ Marshal.putInt(valueCount, storage, offset);
+ }
+
+ public VKmerBytesWritable getPosition(int i) {
+ posIter.setAsReference(storage, getOffsetOfKmer(i));
+ return posIter;
+ }
+
+ /**
+ * Return the offset of the kmer at the i'th position
+ */
+ public int getOffsetOfKmer(int i) {
+ if (i >= valueCount) {
+ throw new ArrayIndexOutOfBoundsException("No such position " + i + " in list " + toString());
+ }
+ // seek to the given position
+ int posOffset = offset + HEADER_SIZE;
+ for (int curIndex = 0; curIndex < i; curIndex++) {
+ posOffset += KmerUtil.getByteNumFromK(Marshal.getInt(storage, posOffset)) + VKmerBytesWritable.HEADER_SIZE;
+ }
+ return posOffset;
+ }
+
+ public void setCopy(VKmerListWritable otherList) {
+ setCopy(otherList.storage, otherList.offset);
+ }
+
+ /**
+ * read a KmerListWritable from newData, which should include the header
+ */
+ public void setCopy(byte[] newData, int newOffset) {
+ int newValueCount = Marshal.getInt(newData, newOffset);
+ int newLength = getLength(newData, newOffset);
+ setSize(newLength);
+ if (newValueCount > 0) {
+ System.arraycopy(newData, newOffset + HEADER_SIZE, storage, this.offset + HEADER_SIZE, newLength
+ - HEADER_SIZE);
+ }
+ valueCount = newValueCount;
+ Marshal.putInt(valueCount, storage, this.offset);
+ }
+
+ @Override
+ public Iterator<VKmerBytesWritable> iterator() {
+ Iterator<VKmerBytesWritable> it = new Iterator<VKmerBytesWritable>() {
+
+ private int currentIndex = 0;
+ private int currentOffset = offset + HEADER_SIZE; // init as offset of first kmer
+
+ @Override
+ public boolean hasNext() {
+ return currentIndex < valueCount;
+ }
+
+ @Override
+ public VKmerBytesWritable next() {
+ posIter.setAsReference(storage, currentOffset);
+ currentOffset += KmerUtil.getByteNumFromK(Marshal.getInt(storage, currentOffset))
+ + VKmerBytesWritable.HEADER_SIZE;
+ currentIndex++;
+ return posIter;
+ }
+
+ @Override
+ public void remove() {
+ if (currentOffset <= 0) {
+ throw new IllegalStateException(
+ "You must advance the iterator using .next() before calling remove()!");
+ }
+ // we're removing the element at prevIndex
+ int prevIndex = currentIndex - 1;
+ int prevOffset = getOffsetOfKmer(prevIndex);
+
+ if (currentIndex < valueCount) { // if it's the last element, don't have to do any copying
+ System.arraycopy(storage, currentOffset, // from the "next" element
+ storage, prevOffset, // to the one just returned (overwriting it)
+ getLength() - currentOffset + offset); // remaining bytes except current element
+ }
+ valueCount--;
+ currentIndex--;
+ Marshal.putInt(valueCount, storage, offset);
+ currentOffset = prevOffset;
+ }
+ };
+ return it;
+ }
+
+ /*
+ * remove the first instance of `toRemove`. Uses a linear scan. Throws an
+ * exception if not in this list.
+ */
+ public void remove(VKmerBytesWritable toRemove, boolean ignoreMissing) {
+ Iterator<VKmerBytesWritable> posIterator = this.iterator();
+ while (posIterator.hasNext()) {
+ if (toRemove.equals(posIterator.next())) {
+ posIterator.remove();
+ return; // break as soon as the element is found
+ }
+ }
+ // element was not found
+ if (!ignoreMissing) {
+ throw new ArrayIndexOutOfBoundsException("the KmerBytesWritable `" + toRemove.toString()
+ + "` was not found in this list.");
+ }
+ }
+
+ public void remove(VKmerBytesWritable toRemove) {
+ remove(toRemove, false);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ reset();
+ int newValueCount = in.readInt();
+ int curOffset = offset + HEADER_SIZE;
+ int elemBytes = 0;
+ int elemLetters = 0;
+ int curLength = getLength();
+ for (int i = 0; i < newValueCount; i++) {
+ elemLetters = in.readInt();
+ elemBytes = KmerUtil.getByteNumFromK(elemLetters) + VKmerBytesWritable.HEADER_SIZE;
+ setSize(curLength + elemBytes); // make sure we have room for the new element
+ Marshal.putInt(elemLetters, storage, curOffset); // write header
+ in.readFully(storage, curOffset + VKmerBytesWritable.HEADER_SIZE, elemBytes
+ - VKmerBytesWritable.HEADER_SIZE); // write kmer
+ curOffset += elemBytes;
+ curLength += elemBytes;
+ valueCount++;
+ }
+ valueCount = newValueCount;
+ Marshal.putInt(valueCount, storage, offset);
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.write(storage, offset, getLength());
+ }
+
+ public int getCountOfPosition() {
+ return valueCount;
+ }
+
+ public byte[] getByteArray() {
+ return storage;
+ }
+
+ public int getStartOffset() {
+ return offset;
+ }
+
+ public int getLength() {
+ int totalSize = HEADER_SIZE;
+ for (int curCount = 0; curCount < valueCount; curCount++) {
+ totalSize += KmerUtil.getByteNumFromK(Marshal.getInt(storage, offset + totalSize))
+ + VKmerBytesWritable.HEADER_SIZE;
+ }
+ return totalSize;
+ }
+
+ public static int getLength(byte[] listStorage, int listOffset) {
+ int totalSize = HEADER_SIZE;
+ int listValueCount = Marshal.getInt(listStorage, listOffset);
+ for (int curCount = 0; curCount < listValueCount; curCount++) {
+ totalSize += KmerUtil.getByteNumFromK(Marshal.getInt(listStorage, listOffset + totalSize))
+ + VKmerBytesWritable.HEADER_SIZE;
+ }
+ return totalSize;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sbuilder = new StringBuilder();
+ sbuilder.append('[');
+ for (int i = 0; i < valueCount; i++) {
+ sbuilder.append(getPosition(i).toString());
+ sbuilder.append(',');
+ }
+ if (valueCount > 0) {
+ sbuilder.setCharAt(sbuilder.length() - 1, ']');
+ } else {
+ sbuilder.append(']');
+ }
+ return sbuilder.toString();
+ }
+
+ @Override
+ public int hashCode() {
+ return Marshal.hashBytes(getByteArray(), getStartOffset(), getLength());
+ }
+}
diff --git a/genomix/genomix-data/src/main/resources/conf/cluster.properties b/genomix/genomix-data/src/main/resources/conf/cluster.properties
new file mode 100644
index 0000000..eabd81b
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/conf/cluster.properties
@@ -0,0 +1,40 @@
+#The CC port for Hyracks clients
+CC_CLIENTPORT=3099
+
+#The CC port for Hyracks cluster management
+CC_CLUSTERPORT=1099
+
+#The directory of hyracks binaries
+HYRACKS_HOME="../../../../hyracks"
+
+#The tmp directory for cc to install jars
+CCTMP_DIR=/tmp/t1
+
+#The tmp directory for nc to install jars
+NCTMP_DIR=/tmp/t2
+
+#The directory to put cc logs
+CCLOGS_DIR=$CCTMP_DIR/logs
+
+#The directory to put nc logs
+NCLOGS_DIR=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS="/tmp/t3,/tmp/t4"
+
+#The JAVA_HOME
+JAVA_HOME=$JAVA_HOME
+
+#HADOOP_HOME
+CLASSPATH="${HADOOP_HOME}:${CLASSPATH}:."
+
+#The frame size of the internal dataflow engine
+FRAME_SIZE=65536
+
+#CC JAVA_OPTS
+CCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7001,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+# Yourkit option: -agentpath:/grid/0/dev/vborkar/tools/yjp-10.0.4/bin/linux-x86-64/libyjpagent.so=port=20001"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7002,server=y,suspend=n -Xmx10g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/genomix/genomix-data/src/main/resources/conf/debugnc.properties b/genomix/genomix-data/src/main/resources/conf/debugnc.properties
new file mode 100644
index 0000000..27afa26
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/conf/debugnc.properties
@@ -0,0 +1,12 @@
+#The tmp directory for nc to install jars
+NCTMP_DIR2=/tmp/t-1
+
+#The directory to put nc logs
+NCLOGS_DIR2=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS2="/tmp/t-2,/tmp/t-3"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS2="-Xdebug -Xrunjdwp:transport=dt_socket,address=7003,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/genomix/genomix-data/src/main/resources/conf/master b/genomix/genomix-data/src/main/resources/conf/master
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/conf/master
@@ -0,0 +1 @@
+localhost
diff --git a/genomix/genomix-data/src/main/resources/conf/slaves b/genomix/genomix-data/src/main/resources/conf/slaves
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/conf/slaves
@@ -0,0 +1 @@
+localhost
diff --git a/genomix/genomix-data/src/main/resources/scripts/genomix b/genomix/genomix-data/src/main/resources/scripts/genomix
new file mode 100644
index 0000000..bdd7f20
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/genomix
@@ -0,0 +1,113 @@
+#!/bin/sh
+# ----------------------------------------------------------------------------
+# Copyright 2001-2006 The Apache Software Foundation.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+#
+# Copyright (c) 2001-2006 The Apache Software Foundation. All rights
+# reserved.
+
+
+# resolve links - $0 may be a softlink
+PRG="$0"
+
+while [ -h "$PRG" ]; do
+ ls=`ls -ld "$PRG"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '/.*' > /dev/null; then
+ PRG="$link"
+ else
+ PRG=`dirname "$PRG"`/"$link"
+ fi
+done
+
+PRGDIR=`dirname "$PRG"`
+BASEDIR=`cd "$PRGDIR/.." >/dev/null; pwd`
+
+
+
+# OS specific support. $var _must_ be set to either true or false.
+cygwin=false;
+darwin=false;
+case "`uname`" in
+ CYGWIN*) cygwin=true ;;
+ Darwin*) darwin=true
+ if [ -z "$JAVA_VERSION" ] ; then
+ JAVA_VERSION="CurrentJDK"
+ else
+ echo "Using Java version: $JAVA_VERSION"
+ fi
+ if [ -z "$JAVA_HOME" ] ; then
+ JAVA_HOME=/System/Library/Frameworks/JavaVM.framework/Versions/${JAVA_VERSION}/Home
+ fi
+ ;;
+esac
+
+if [ -z "$JAVA_HOME" ] ; then
+ if [ -r /etc/gentoo-release ] ; then
+ JAVA_HOME=`java-config --jre-home`
+ fi
+fi
+
+# For Cygwin, ensure paths are in UNIX format before anything is touched
+if $cygwin ; then
+ [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
+ [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
+fi
+
+# If a specific java binary isn't specified search for the standard 'java' binary
+if [ -z "$JAVACMD" ] ; then
+ if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD="$JAVA_HOME/jre/sh/java"
+ else
+ JAVACMD="$JAVA_HOME/bin/java"
+ fi
+ else
+ JAVACMD=`which java`
+ fi
+fi
+
+if [ ! -x "$JAVACMD" ] ; then
+ echo "Error: JAVA_HOME is not defined correctly." 1>&2
+ echo " We cannot execute $JAVACMD" 1>&2
+ exit 1
+fi
+
+if [ -z "$REPO" ]
+then
+ REPO="$BASEDIR"/lib
+fi
+
+CLASSPATH=$CLASSPATH_PREFIX:"$BASEDIR"/etc:"$REPO"/hyracks-dataflow-std-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-api-0.2.3-SNAPSHOT.jar:"$REPO"/json-20090211.jar:"$REPO"/httpclient-4.1-alpha2.jar:"$REPO"/httpcore-4.1-beta1.jar:"$REPO"/commons-logging-1.1.1.jar:"$REPO"/commons-codec-1.4.jar:"$REPO"/args4j-2.0.12.jar:"$REPO"/commons-lang3-3.1.jar:"$REPO"/hyracks-dataflow-common-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-data-std-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-control-cc-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-control-common-0.2.3-SNAPSHOT.jar:"$REPO"/jetty-server-8.0.0.RC0.jar:"$REPO"/servlet-api-3.0.20100224.jar:"$REPO"/jetty-continuation-8.0.0.RC0.jar:"$REPO"/jetty-http-8.0.0.RC0.jar:"$REPO"/jetty-io-8.0.0.RC0.jar:"$REPO"/jetty-webapp-8.0.0.RC0.jar:"$REPO"/jetty-xml-8.0.0.RC0.jar:"$REPO"/jetty-util-8.0.0.RC0.jar:"$REPO"/jetty-servlet-8.0.0.RC0.jar:"$REPO"/jetty-security-8.0.0.RC0.jar:"$REPO"/wicket-core-1.5.2.jar:"$REPO"/wicket-util-1.5.2.jar:"$REPO"/wicket-request-1.5.2.jar:"$REPO"/slf4j-api-1.6.1.jar:"$REPO"/slf4j-jcl-1.6.3.jar:"$REPO"/hyracks-control-nc-0.2.3-SNAPSHOT.jar:"$REPO"/dcache-client-0.0.1.jar:"$REPO"/jetty-client-8.0.0.M0.jar:"$REPO"/hyracks-net-0.2.3-SNAPSHOT.jar:"$REPO"/commons-io-1.3.1.jar:"$REPO"/hyracks-ipc-0.2.3-SNAPSHOT.jar:"$REPO"/genomix-0.2.3-SNAPSHOT.pom
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin; then
+ [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
+ [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"`
+ [ -n "$HOME" ] && HOME=`cygpath --path --windows "$HOME"`
+ [ -n "$BASEDIR" ] && BASEDIR=`cygpath --path --windows "$BASEDIR"`
+ [ -n "$REPO" ] && REPO=`cygpath --path --windows "$REPO"`
+fi
+
+exec "$JAVACMD" $JAVA_OPTS \
+ -classpath "$CLASSPATH" \
+ -Dapp.name="genomix" \
+ -Dapp.pid="$$" \
+ -Dapp.repo="$REPO" \
+ -Dapp.home="$BASEDIR" \
+ -Dbasedir="$BASEDIR" \
+ edu.uci.ics.genomix.driver.Driver \
+ "$@"
diff --git a/genomix/genomix-data/src/main/resources/scripts/genomix.bat b/genomix/genomix-data/src/main/resources/scripts/genomix.bat
new file mode 100644
index 0000000..1bd2098
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/genomix.bat
@@ -0,0 +1,108 @@
+@REM ----------------------------------------------------------------------------
+@REM Copyright 2001-2006 The Apache Software Foundation.
+@REM
+@REM Licensed under the Apache License, Version 2.0 (the "License");
+@REM you may not use this file except in compliance with the License.
+@REM You may obtain a copy of the License at
+@REM
+@REM http://www.apache.org/licenses/LICENSE-2.0
+@REM
+@REM Unless required by applicable law or agreed to in writing, software
+@REM distributed under the License is distributed on an "AS IS" BASIS,
+@REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@REM See the License for the specific language governing permissions and
+@REM limitations under the License.
+@REM ----------------------------------------------------------------------------
+@REM
+@REM Copyright (c) 2001-2006 The Apache Software Foundation. All rights
+@REM reserved.
+
+@echo off
+
+set ERROR_CODE=0
+
+:init
+@REM Decide how to startup depending on the version of windows
+
+@REM -- Win98ME
+if NOT "%OS%"=="Windows_NT" goto Win9xArg
+
+@REM set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" @setlocal
+
+@REM -- 4NT shell
+if "%eval[2+2]" == "4" goto 4NTArgs
+
+@REM -- Regular WinNT shell
+set CMD_LINE_ARGS=%*
+goto WinNTGetScriptDir
+
+@REM The 4NT Shell from jp software
+:4NTArgs
+set CMD_LINE_ARGS=%$
+goto WinNTGetScriptDir
+
+:Win9xArg
+@REM Slurp the command line arguments. This loop allows for an unlimited number
+@REM of arguments (up to the command line limit, anyway).
+set CMD_LINE_ARGS=
+:Win9xApp
+if %1a==a goto Win9xGetScriptDir
+set CMD_LINE_ARGS=%CMD_LINE_ARGS% %1
+shift
+goto Win9xApp
+
+:Win9xGetScriptDir
+set SAVEDIR=%CD%
+%0\
+cd %0\..\..
+set BASEDIR=%CD%
+cd %SAVEDIR%
+set SAVE_DIR=
+goto repoSetup
+
+:WinNTGetScriptDir
+set BASEDIR=%~dp0\..
+
+:repoSetup
+
+
+if "%JAVACMD%"=="" set JAVACMD=java
+
+if "%REPO%"=="" set REPO=%BASEDIR%\lib
+
+set CLASSPATH="%BASEDIR%"\etc;"%REPO%"\hyracks-dataflow-std-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-api-0.2.3-SNAPSHOT.jar;"%REPO%"\json-20090211.jar;"%REPO%"\httpclient-4.1-alpha2.jar;"%REPO%"\httpcore-4.1-beta1.jar;"%REPO%"\commons-logging-1.1.1.jar;"%REPO%"\commons-codec-1.4.jar;"%REPO%"\args4j-2.0.12.jar;"%REPO%"\commons-lang3-3.1.jar;"%REPO%"\hyracks-dataflow-common-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-data-std-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-control-cc-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-control-common-0.2.3-SNAPSHOT.jar;"%REPO%"\jetty-server-8.0.0.RC0.jar;"%REPO%"\servlet-api-3.0.20100224.jar;"%REPO%"\jetty-continuation-8.0.0.RC0.jar;"%REPO%"\jetty-http-8.0.0.RC0.jar;"%REPO%"\jetty-io-8.0.0.RC0.jar;"%REPO%"\jetty-webapp-8.0.0.RC0.jar;"%REPO%"\jetty-xml-8.0.0.RC0.jar;"%REPO%"\jetty-util-8.0.0.RC0.jar;"%REPO%"\jetty-servlet-8.0.0.RC0.jar;"%REPO%"\jetty-security-8.0.0.RC0.jar;"%REPO%"\wicket-core-1.5.2.jar;"%REPO%"\wicket-util-1.5.2.jar;"%REPO%"\wicket-request-1.5.2.jar;"%REPO%"\slf4j-api-1.6.1.jar;"%REPO%"\slf4j-jcl-1.6.3.jar;"%REPO%"\hyracks-control-nc-0.2.3-SNAPSHOT.jar;"%REPO%"\dcache-client-0.0.1.jar;"%REPO%"\jetty-client-8.0.0.M0.jar;"%REPO%"\hyracks-net-0.2.3-SNAPSHOT.jar;"%REPO%"\commons-io-1.3.1.jar;"%REPO%"\hyracks-ipc-0.2.3-SNAPSHOT.jar;"%REPO%"\genomix-0.2.3-SNAPSHOT.pom
+goto endInit
+
+@REM Reaching here means variables are defined and arguments have been captured
+:endInit
+
+%JAVACMD% %JAVA_OPTS% -classpath %CLASSPATH_PREFIX%;%CLASSPATH% -Dapp.name="genomix" -Dapp.repo="%REPO%" -Dapp.home="%BASEDIR%" -Dbasedir="%BASEDIR%" edu.uci.ics.genomix.driver.Driver %CMD_LINE_ARGS%
+if ERRORLEVEL 1 goto error
+goto end
+
+:error
+if "%OS%"=="Windows_NT" @endlocal
+set ERROR_CODE=%ERRORLEVEL%
+
+:end
+@REM set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" goto endNT
+
+@REM For old DOS remove the set variables from ENV - we assume they were not set
+@REM before we started - at least we don't leave any baggage around
+set CMD_LINE_ARGS=
+goto postExec
+
+:endNT
+@REM If error code is set to 1 then the endlocal was done already in :error.
+if %ERROR_CODE% EQU 0 @endlocal
+
+
+:postExec
+
+if "%FORCE_EXIT_ON_ERROR%" == "on" (
+ if %ERROR_CODE% NEQ 0 exit %ERROR_CODE%
+)
+
+exit /B %ERROR_CODE%
diff --git a/genomix/genomix-data/src/main/resources/scripts/getip.sh b/genomix/genomix-data/src/main/resources/scripts/getip.sh
new file mode 100644
index 0000000..e0cdf73
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/getip.sh
@@ -0,0 +1,21 @@
+#get the OS
+OS_NAME=`uname -a|awk '{print $1}'`
+LINUX_OS='Linux'
+
+if [ $OS_NAME = $LINUX_OS ];
+then
+ #Get IP Address
+ IPADDR=`/sbin/ifconfig eth0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ if [ "$IPADDR" = "" ]
+ then
+ IPADDR=`/sbin/ifconfig lo | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ fi
+else
+ IPADDR=`/sbin/ifconfig en1 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ if [ "$IPADDR" = "" ]
+ then
+ IPADDR=`/sbin/ifconfig lo0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ fi
+
+fi
+echo $IPADDR
diff --git a/genomix/genomix-data/src/main/resources/scripts/startAllNCs.sh b/genomix/genomix-data/src/main/resources/scripts/startAllNCs.sh
new file mode 100644
index 0000000..5e38c40
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/startAllNCs.sh
@@ -0,0 +1,6 @@
+GENOMIX_PATH=`pwd`
+
+for i in `cat conf/slaves`
+do
+ ssh $i "cd ${GENOMIX_PATH}; bin/startnc.sh"
+done
diff --git a/genomix/genomix-data/src/main/resources/scripts/startCluster.sh b/genomix/genomix-data/src/main/resources/scripts/startCluster.sh
new file mode 100755
index 0000000..4727764
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/startCluster.sh
@@ -0,0 +1,19 @@
+bin/startcc.sh
+sleep 5
+bin/startAllNCs.sh
+
+. conf/cluster.properties
+# do we need to specify the version somewhere?
+hyrackcmd=`ls ${HYRACKS_HOME}/hyracks-cli/target/hyracks-cli-*-binary-assembly/bin/hyrackscli`
+# find zip file
+appzip=`ls $PWD/../genomix-*-binary-assembly.zip`
+
+[ -f $hyrackcmd ] || { echo "Hyracks commandline is missing"; exit -1;}
+[ -f $appzip ] || { echo "Genomix binary-assembly.zip is missing"; exit -1;}
+
+CCHOST_NAME=`cat conf/master`
+
+IPADDR=`bin/getip.sh`
+echo "connect to \"${IPADDR}:${CC_CLIENTPORT}\"; create application genomix \"$appzip\";" | $hyrackcmd
+echo ""
+
diff --git a/genomix/genomix-data/src/main/resources/scripts/startDebugNc.sh b/genomix/genomix-data/src/main/resources/scripts/startDebugNc.sh
new file mode 100644
index 0000000..fe6cf27
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/startDebugNc.sh
@@ -0,0 +1,50 @@
+hostname
+
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CURRENT_PATH=`pwd`
+CCHOST=`ssh ${CCHOST_NAME} "cd ${CURRENT_PATH}; bin/getip.sh"`
+
+#Import cluster properties
+. conf/cluster.properties
+. conf/debugnc.properties
+
+#Clean up temp dir
+
+rm -rf $NCTMP_DIR2
+mkdir $NCTMP_DIR2
+
+#Clean up log dir
+rm -rf $NCLOGS_DIR2
+mkdir $NCLOGS_DIR2
+
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS2 | tr "," "\n")
+for io_dir in $io_dirs
+do
+ rm -rf $io_dir
+ mkdir $io_dir
+done
+
+#Set JAVA_HOME
+export JAVA_HOME=$JAVA_HOME
+
+#Get OS
+IPADDR=`bin/getip.sh`
+
+#Get node ID
+NODEID=`hostname | cut -d '.' -f 1`
+NODEID=${NODEID}2
+
+#Set JAVA_OPTS
+export JAVA_OPTS=$NCJAVA_OPTS2
+
+cd $HYRACKS_HOME
+HYRACKS_HOME=`pwd`
+
+#Enter the temp dir
+cd $NCTMP_DIR2
+
+#Launch hyracks nc
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyracksnc -cc-host $CCHOST -cc-port $CC_CLUSTERPORT -cluster-net-ip-address $IPADDR -data-ip-address $IPADDR -node-id $NODEID -iodevices "${IO_DIRS2}" &> $NCLOGS_DIR2/$NODEID.log &
diff --git a/genomix/genomix-data/src/main/resources/scripts/startcc.sh b/genomix/genomix-data/src/main/resources/scripts/startcc.sh
new file mode 100644
index 0000000..fe2551d
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/startcc.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+hostname
+
+#Import cluster properties
+. conf/cluster.properties
+
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CCHOST=`bin/getip.sh`
+
+#Remove the temp dir
+rm -rf $CCTMP_DIR
+mkdir $CCTMP_DIR
+
+#Remove the logs dir
+rm -rf $CCLOGS_DIR
+mkdir $CCLOGS_DIR
+
+#Export JAVA_HOME and JAVA_OPTS
+export JAVA_HOME=$JAVA_HOME
+export JAVA_OPTS=$CCJAVA_OPTS
+
+#Launch hyracks cc script
+chmod -R 755 $HYRACKS_HOME
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyrackscc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -max-heartbeat-lapse-periods 999999 -default-max-job-attempts 0 -job-history-size 3 &> $CCLOGS_DIR/cc.log &
diff --git a/genomix/genomix-data/src/main/resources/scripts/startnc.sh b/genomix/genomix-data/src/main/resources/scripts/startnc.sh
new file mode 100644
index 0000000..6e0f90e
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/startnc.sh
@@ -0,0 +1,49 @@
+hostname
+
+MY_NAME=`hostname`
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CURRENT_PATH=`pwd`
+CCHOST=`ssh ${CCHOST_NAME} "cd ${CURRENT_PATH}; bin/getip.sh"`
+
+#Import cluster properties
+. conf/cluster.properties
+
+#Clean up temp dir
+
+rm -rf $NCTMP_DIR
+mkdir $NCTMP_DIR
+
+#Clean up log dir
+rm -rf $NCLOGS_DIR
+mkdir $NCLOGS_DIR
+
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS | tr "," "\n")
+for io_dir in $io_dirs
+do
+ rm -rf $io_dir
+ mkdir $io_dir
+done
+
+#Set JAVA_HOME
+export JAVA_HOME=$JAVA_HOME
+
+IPADDR=`bin/getip.sh`
+#echo $IPADDR
+
+#Get node ID
+NODEID=`hostname | cut -d '.' -f 1`
+
+#Set JAVA_OPTS
+export JAVA_OPTS=$NCJAVA_OPTS
+
+cd $HYRACKS_HOME
+HYRACKS_HOME=`pwd`
+
+#Enter the temp dir
+cd $NCTMP_DIR
+
+#Launch hyracks nc
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyracksnc -cc-host $CCHOST -cc-port $CC_CLUSTERPORT -cluster-net-ip-address $IPADDR -data-ip-address $IPADDR -node-id $NODEID -iodevices "${IO_DIRS}" &> $NCLOGS_DIR/$NODEID.log &
diff --git a/genomix/genomix-data/src/main/resources/scripts/stopAllNCs.sh b/genomix/genomix-data/src/main/resources/scripts/stopAllNCs.sh
new file mode 100644
index 0000000..66ed866
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/stopAllNCs.sh
@@ -0,0 +1,6 @@
+GENOMIX_PATH=`pwd`
+
+for i in `cat conf/slaves`
+do
+ ssh $i "cd ${GENOMIX_PATH}; bin/stopnc.sh"
+done
diff --git a/genomix/genomix-data/src/main/resources/scripts/stopCluster.sh b/genomix/genomix-data/src/main/resources/scripts/stopCluster.sh
new file mode 100644
index 0000000..4889934
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/stopCluster.sh
@@ -0,0 +1,3 @@
+bin/stopAllNCs.sh
+sleep 2
+bin/stopcc.sh
diff --git a/genomix/genomix-data/src/main/resources/scripts/stopcc.sh b/genomix/genomix-data/src/main/resources/scripts/stopcc.sh
new file mode 100644
index 0000000..1865054
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/stopcc.sh
@@ -0,0 +1,10 @@
+hostname
+. conf/cluster.properties
+
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep hyracks|awk '{print $2}'`
+echo $PID
+[ "$PID" != "" ] && kill -9 $PID
+
+#Clean up CC temp dir
+rm -rf $CCTMP_DIR/*
diff --git a/genomix/genomix-data/src/main/resources/scripts/stopnc.sh b/genomix/genomix-data/src/main/resources/scripts/stopnc.sh
new file mode 100644
index 0000000..3928bb7
--- /dev/null
+++ b/genomix/genomix-data/src/main/resources/scripts/stopnc.sh
@@ -0,0 +1,23 @@
+hostname
+. conf/cluster.properties
+
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=hyracksnc'|awk '{print $2}'`
+
+if [ "$PID" == "" ]; then
+ USERID=`id | sed 's/^uid=//;s/(.*$//'`
+ PID=`ps -ef|grep ${USERID}|grep java|grep 'Dapp.name=hyracksnc'|awk '{print $2}'`
+fi
+
+echo $PID
+[ "$PID" != "" ] && kill -9 $PID
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS | tr "," "\n")
+for io_dir in $io_dirs
+do
+ rm -rf $io_dir/*
+done
+
+#Clean up NC temp dir
+rm -rf $NCTMP_DIR/*
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableFactoryTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableFactoryTest.java
new file mode 100644
index 0000000..36115d3
--- /dev/null
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableFactoryTest.java
@@ -0,0 +1,207 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.data.test;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.KmerBytesWritableFactory;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+public class KmerBytesWritableFactoryTest {
+ static byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
+
+ KmerBytesWritableFactory kmerFactory = new KmerBytesWritableFactory(8);
+
+ @Test
+ public void TestGetLastKmer() {
+ VKmerBytesWritable kmer = new VKmerBytesWritable();
+ kmer.setByRead(9, array, 0);
+ Assert.assertEquals("AGCTGACCG", kmer.toString());
+ VKmerBytesWritable lastKmer;
+ for (int i = 8; i > 0; i--) {
+ lastKmer = kmerFactory.getLastKmerFromChain(i, kmer);
+ Assert.assertEquals("AGCTGACCG".substring(9 - i), lastKmer.toString());
+ lastKmer = kmerFactory.getSubKmerFromChain(9 - i, i, kmer);
+ Assert.assertEquals("AGCTGACCG".substring(9 - i), lastKmer.toString());
+ }
+ VKmerBytesWritable vlastKmer;
+ for (int i = 8; i > 0; i--) {
+ vlastKmer = kmerFactory.getLastKmerFromChain(i, kmer);
+ Assert.assertEquals("AGCTGACCG".substring(9 - i), vlastKmer.toString());
+ vlastKmer = kmerFactory.getSubKmerFromChain(9 - i, i, kmer);
+ Assert.assertEquals("AGCTGACCG".substring(9 - i), vlastKmer.toString());
+ }
+ }
+
+ @Test
+ public void TestGetFirstKmer() {
+ VKmerBytesWritable kmer = new VKmerBytesWritable();
+ kmer.setByRead(9, array, 0);
+ Assert.assertEquals("AGCTGACCG", kmer.toString());
+ VKmerBytesWritable firstKmer;
+ for (int i = 8; i > 0; i--) {
+ firstKmer = kmerFactory.getFirstKmerFromChain(i, kmer);
+ Assert.assertEquals("AGCTGACCG".substring(0, i), firstKmer.toString());
+ firstKmer = kmerFactory.getSubKmerFromChain(0, i, kmer);
+ Assert.assertEquals("AGCTGACCG".substring(0, i), firstKmer.toString());
+ }
+ VKmerBytesWritable vfirstKmer;
+ for (int i = 8; i > 0; i--) {
+ vfirstKmer = kmerFactory.getFirstKmerFromChain(i, kmer);
+ Assert.assertEquals("AGCTGACCG".substring(0, i), vfirstKmer.toString());
+ vfirstKmer = kmerFactory.getSubKmerFromChain(0, i, kmer);
+ Assert.assertEquals("AGCTGACCG".substring(0, i), vfirstKmer.toString());
+ }
+ }
+
+ @Test
+ public void TestGetSubKmer() {
+ VKmerBytesWritable kmer = new VKmerBytesWritable();
+ kmer.setByRead(9, array, 0);
+ Assert.assertEquals("AGCTGACCG", kmer.toString());
+ VKmerBytesWritable subKmer;
+ for (int istart = 0; istart < kmer.getKmerLetterLength() - 1; istart++) {
+ for (int isize = 1; isize + istart <= kmer.getKmerLetterLength(); isize++) {
+ subKmer = kmerFactory.getSubKmerFromChain(istart, isize, kmer);
+ Assert.assertEquals("AGCTGACCG".substring(istart, istart + isize), subKmer.toString());
+ }
+ }
+ }
+
+ @Test
+ public void TestMergeNext() {
+ VKmerBytesWritable kmer = new VKmerBytesWritable();
+ kmer.setByRead(9, array, 0);
+ Assert.assertEquals("AGCTGACCG", kmer.toString());
+
+ String text = "AGCTGACCG";
+ for (byte x = GeneCode.A; x <= GeneCode.T; x++) {
+ VKmerBytesWritable newkmer = kmerFactory.mergeKmerWithNextCode(kmer, x);
+ text = text + (char) GeneCode.GENE_SYMBOL[x];
+ Assert.assertEquals(text, newkmer.toString());
+ kmer = new VKmerBytesWritable(newkmer);
+ }
+ for (byte x = GeneCode.A; x <= GeneCode.T; x++) {
+ VKmerBytesWritable newkmer = kmerFactory.mergeKmerWithNextCode(kmer, x);
+ text = text + (char) GeneCode.GENE_SYMBOL[x];
+ Assert.assertEquals(text, newkmer.toString());
+ kmer = new VKmerBytesWritable(newkmer);
+ }
+ }
+
+ @Test
+ public void TestMergePre() {
+ VKmerBytesWritable kmer = new VKmerBytesWritable();
+ kmer.setByRead(9, array, 0);
+ Assert.assertEquals("AGCTGACCG", kmer.toString());
+ String text = "AGCTGACCG";
+ for (byte x = GeneCode.A; x <= GeneCode.T; x++) {
+ VKmerBytesWritable newkmer = kmerFactory.mergeKmerWithPreCode(kmer, x);
+ text = (char) GeneCode.GENE_SYMBOL[x] + text;
+ Assert.assertEquals(text, newkmer.toString());
+ kmer = new VKmerBytesWritable(newkmer);
+ }
+ for (byte x = GeneCode.A; x <= GeneCode.T; x++) {
+ VKmerBytesWritable newkmer = kmerFactory.mergeKmerWithPreCode(kmer, x);
+ text = (char) GeneCode.GENE_SYMBOL[x] + text;
+ Assert.assertEquals(text, newkmer.toString());
+ kmer = new VKmerBytesWritable(newkmer);
+ }
+ }
+
+ @Test
+ public void TestMergeTwoKmer() {
+ VKmerBytesWritable kmer1 = new VKmerBytesWritable();
+ kmer1.setByRead(9, array, 0);
+ String text1 = "AGCTGACCG";
+ VKmerBytesWritable kmer2 = new VKmerBytesWritable();
+ kmer2.setByRead(9, array, 1);
+ String text2 = "GCTGACCGT";
+ Assert.assertEquals(text1, kmer1.toString());
+ Assert.assertEquals(text2, kmer2.toString());
+
+ VKmerBytesWritable merged = kmerFactory.mergeTwoKmer(kmer1, kmer2);
+ Assert.assertEquals(text1 + text2, merged.toString());
+
+ VKmerBytesWritable kmer3 = new VKmerBytesWritable();
+ kmer3.setByRead(3, array, 1);
+ String text3 = "GCT";
+ Assert.assertEquals(text3, kmer3.toString());
+
+ merged = kmerFactory.mergeTwoKmer(kmer1, kmer3);
+ Assert.assertEquals(text1 + text3, merged.toString());
+ merged = kmerFactory.mergeTwoKmer(kmer3, kmer1);
+ Assert.assertEquals(text3 + text1, merged.toString());
+
+ VKmerBytesWritable kmer4 = new VKmerBytesWritable();
+ kmer4.setByRead(8, array, 0);
+ String text4 = "AGCTGACC";
+ Assert.assertEquals(text4, kmer4.toString());
+ merged = kmerFactory.mergeTwoKmer(kmer4, kmer3);
+ Assert.assertEquals(text4 + text3, merged.toString());
+
+ VKmerBytesWritable kmer5 = new VKmerBytesWritable();
+ kmer5.setByRead(7, array, 0);
+ String text5 = "AGCTGAC";
+ VKmerBytesWritable kmer6 = new VKmerBytesWritable();
+ kmer6.setByRead(9, array, 1);
+ String text6 = "GCTGACCGT";
+ merged = kmerFactory.mergeTwoKmer(kmer5, kmer6);
+ Assert.assertEquals(text5 + text6, merged.toString());
+
+ kmer6.setByRead(6, array, 1);
+ String text7 = "GCTGAC";
+ merged = kmerFactory.mergeTwoKmer(kmer5, kmer6);
+ Assert.assertEquals(text5 + text7, merged.toString());
+
+ kmer6.setByRead(4, array, 1);
+ String text8 = "GCTG";
+ merged = kmerFactory.mergeTwoKmer(kmer5, kmer6);
+ Assert.assertEquals(text5 + text8, merged.toString());
+ }
+
+ @Test
+ public void TestShift() {
+ VKmerBytesWritable kmer = new VKmerBytesWritable(kmerFactory.getKmerByRead(9, array, 0));
+ String text = "AGCTGACCG";
+ Assert.assertEquals(text, kmer.toString());
+
+ VKmerBytesWritable kmerForward = kmerFactory.shiftKmerWithNextCode(kmer, GeneCode.A);
+ Assert.assertEquals(text, kmer.toString());
+ Assert.assertEquals("GCTGACCGA", kmerForward.toString());
+ VKmerBytesWritable kmerBackward = kmerFactory.shiftKmerWithPreCode(kmer, GeneCode.C);
+ Assert.assertEquals(text, kmer.toString());
+ Assert.assertEquals("CAGCTGACC", kmerBackward.toString());
+
+ }
+
+ @Test
+ public void TestReverseKmer() {
+ VKmerBytesWritable kmer = new VKmerBytesWritable();
+ kmer.setByRead(7, array, 0);
+ Assert.assertEquals(kmer.toString(), "AGCTGAC");
+ VKmerBytesWritable reversed = kmerFactory.reverse(kmer);
+ Assert.assertEquals(reversed.toString(), "CAGTCGA");
+
+ kmer.setByRead(8, ("AATAGAAC").getBytes(), 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAAC");
+ reversed.reset(8);
+ reversed = kmerFactory.reverse(kmer);
+ Assert.assertEquals(reversed.toString(), "CAAGATAA");
+ }
+}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
new file mode 100644
index 0000000..4f7b90e
--- /dev/null
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.data.test;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+
+public class KmerBytesWritableTest {
+ static byte[] array = { 'A', 'A', 'T', 'A', 'G', 'A', 'A', 'G' };
+ static int k = 7;
+
+ @Test
+ public void TestCompressKmer() {
+ KmerBytesWritable.setGlobalKmerLength(k);
+ KmerBytesWritable kmer = new KmerBytesWritable();
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
+
+ kmer.setByRead(array, 1);
+ Assert.assertEquals(kmer.toString(), "ATAGAAG");
+ }
+
+ @Test
+ public void TestMoveKmer() {
+ KmerBytesWritable.setGlobalKmerLength(k);
+ KmerBytesWritable kmer = new KmerBytesWritable();
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
+
+ for (int i = k; i < array.length - 1; i++) {
+ kmer.shiftKmerWithNextCode(array[i]);
+ Assert.assertTrue(false);
+ }
+
+ byte out = kmer.shiftKmerWithNextChar(array[array.length - 1]);
+ Assert.assertEquals(out, GeneCode.getCodeFromSymbol((byte) 'A'));
+ Assert.assertEquals(kmer.toString(), "ATAGAAG");
+ }
+
+ @Test
+ public void TestCompressKmerReverse() {
+ KmerBytesWritable.setGlobalKmerLength(k);
+ KmerBytesWritable kmer = new KmerBytesWritable();
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
+
+ kmer.setByReadReverse(array, 1);
+ Assert.assertEquals(kmer.toString(), "CTTCTAT");
+ }
+
+ @Test
+ public void TestMoveKmerReverse() {
+ KmerBytesWritable.setGlobalKmerLength(k);
+ KmerBytesWritable kmer = new KmerBytesWritable();
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
+
+ for (int i = k; i < array.length - 1; i++) {
+ kmer.shiftKmerWithPreChar(array[i]);
+ Assert.assertTrue(false);
+ }
+
+ byte out = kmer.shiftKmerWithPreChar(array[array.length - 1]);
+ Assert.assertEquals(out, GeneCode.getCodeFromSymbol((byte) 'A'));
+ Assert.assertEquals(kmer.toString(), "GAATAGA");
+ }
+
+ @Test
+ public void TestGetGene() {
+ KmerBytesWritable.setGlobalKmerLength(9);
+ KmerBytesWritable kmer = new KmerBytesWritable();
+ String text = "AGCTGACCG";
+ byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G' };
+ kmer.setByRead(array, 0);
+
+ for (int i = 0; i < 9; i++) {
+ Assert.assertEquals(text.charAt(i), (char) (GeneCode.getSymbolFromCode(kmer.getGeneCodeAtPosition(i))));
+ }
+ }
+
+ @Test
+ public void TestGetOneByteFromKmer() {
+ byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
+ String string = "AGCTGACCGT";
+ for (int k = 3; k <= 10; k++) {
+ KmerBytesWritable.setGlobalKmerLength(k);
+ KmerBytesWritable kmer = new KmerBytesWritable();
+ KmerBytesWritable kmerAppend = new KmerBytesWritable();
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(string.substring(0, k), kmer.toString());
+ for (int b = 0; b < k; b++) {
+ byte byteActual = KmerBytesWritable.getOneByteFromKmerAtPosition(b, kmer.getBytes(), kmer.getOffset(),
+ kmer.getLength());
+ byte byteExpect = GeneCode.getCodeFromSymbol(array[b]);
+ for (int i = 1; i < 4 && b + i < k; i++) {
+ byteExpect += GeneCode.getCodeFromSymbol(array[b + i]) << (i * 2);
+ }
+ Assert.assertEquals(byteActual, byteExpect);
+ KmerBytesWritable.appendOneByteAtPosition(b, byteActual, kmerAppend.getBytes(), kmerAppend.getOffset(),
+ kmerAppend.getLength());
+ }
+ Assert.assertEquals(kmer.toString(), kmerAppend.toString());
+ }
+ }
+}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java.orig b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java.orig
new file mode 100644
index 0000000..8a0cb6d
--- /dev/null
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java.orig
@@ -0,0 +1,427 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.data.test;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+
+public class KmerBytesWritableTest {
+ static byte[] array = { 'A', 'A', 'T', 'A', 'G', 'A', 'A', 'G' };
+ static int k = 7;
+
+ @Test
+ public void TestCompressKmer() {
+ KmerBytesWritable.setGlobalKmerLength(k);
+ KmerBytesWritable kmer = new KmerBytesWritable();
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
+
+ kmer.setByRead(array, 1);
+ Assert.assertEquals(kmer.toString(), "ATAGAAG");
+ }
+
+ @Test
+ public void TestMoveKmer() {
+ KmerBytesWritable.setGlobalKmerLength(k);
+ KmerBytesWritable kmer = new KmerBytesWritable();
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
+
+ for (int i = k; i < array.length - 1; i++) {
+ kmer.shiftKmerWithNextCode(array[i]);
+ Assert.assertTrue(false);
+ }
+
+ byte out = kmer.shiftKmerWithNextChar(array[array.length - 1]);
+ Assert.assertEquals(out, GeneCode.getCodeFromSymbol((byte) 'A'));
+ Assert.assertEquals(kmer.toString(), "ATAGAAG");
+ }
+
+ @Test
+ public void TestCompressKmerReverse() {
+ KmerBytesWritable.setGlobalKmerLength(k);
+ KmerBytesWritable kmer = new KmerBytesWritable();
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
+
+ kmer.setByReadReverse(array, 1);
+ Assert.assertEquals(kmer.toString(), "CTTCTAT");
+ }
+
+ @Test
+ public void TestMoveKmerReverse() {
+ KmerBytesWritable.setGlobalKmerLength(k);
+ KmerBytesWritable kmer = new KmerBytesWritable();
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
+
+ for (int i = k; i < array.length - 1; i++) {
+ kmer.shiftKmerWithPreChar(array[i]);
+ Assert.assertTrue(false);
+ }
+
+ byte out = kmer.shiftKmerWithPreChar(array[array.length - 1]);
+ Assert.assertEquals(out, GeneCode.getCodeFromSymbol((byte) 'A'));
+ Assert.assertEquals(kmer.toString(), "GAATAGA");
+ }
+
+ @Test
+ public void TestGetGene() {
+ KmerBytesWritable.setGlobalKmerLength(9);
+ KmerBytesWritable kmer = new KmerBytesWritable();
+ String text = "AGCTGACCG";
+ byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G' };
+ kmer.setByRead(array, 0);
+
+ for (int i = 0; i < 9; i++) {
+ Assert.assertEquals(text.charAt(i), (char) (GeneCode.getSymbolFromCode(kmer.getGeneCodeAtPosition(i))));
+ }
+ }
+
+ @Test
+ public void TestGetOneByteFromKmer() {
+ byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
+ String string = "AGCTGACCGT";
+ for (int k = 3; k <= 10; k++) {
+ KmerBytesWritable.setGlobalKmerLength(k);
+ KmerBytesWritable kmer = new KmerBytesWritable();
+ KmerBytesWritable kmerAppend = new KmerBytesWritable();
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(string.substring(0, k), kmer.toString());
+ for (int b = 0; b < k; b++) {
+ byte byteActual = KmerBytesWritable.getOneByteFromKmerAtPosition(b, kmer.getBytes(), kmer.getOffset(),
+ kmer.getLength());
+ byte byteExpect = GeneCode.getCodeFromSymbol(array[b]);
+ for (int i = 1; i < 4 && b + i < k; i++) {
+ byteExpect += GeneCode.getCodeFromSymbol(array[b + i]) << (i * 2);
+ }
+ Assert.assertEquals(byteActual, byteExpect);
+ KmerBytesWritable.appendOneByteAtPosition(b, byteActual, kmerAppend.getBytes(), kmerAppend.getOffset(),
+ kmerAppend.getLength());
+ }
+ Assert.assertEquals(kmer.toString(), kmerAppend.toString());
+ }
+ }
+<<<<<<< HEAD
+=======
+
+ @Test
+ public void TestMergeFFKmer() {
+ byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
+ String text = "AGCTGACCGT";
+ KmerBytesWritable kmer1 = new KmerBytesWritable(8);
+ kmer1.setByRead(array, 0);
+ String text1 = "AGCTGACC";
+ KmerBytesWritable kmer2 = new KmerBytesWritable(8);
+ kmer2.setByRead(array, 1);
+ String text2 = "GCTGACCG";
+ Assert.assertEquals(text2, kmer2.toString());
+ KmerBytesWritable merge = new KmerBytesWritable(kmer1);
+ int kmerSize = 8;
+ merge.mergeWithFFKmer(kmerSize, kmer2);
+ Assert.assertEquals(text1 + text2.substring(kmerSize - 1), merge.toString());
+
+ for (int i = 1; i < 8; i++) {
+ merge.set(kmer1);
+ merge.mergeWithFFKmer(i, kmer2);
+ Assert.assertEquals(text1 + text2.substring(i - 1), merge.toString());
+ }
+
+ for (int ik = 1; ik <= 10; ik++) {
+ for (int jk = 1; jk <= 10; jk++) {
+ kmer1 = new KmerBytesWritable(ik);
+ kmer2 = new KmerBytesWritable(jk);
+ kmer1.setByRead(array, 0);
+ kmer2.setByRead(array, 0);
+ text1 = text.substring(0, ik);
+ text2 = text.substring(0, jk);
+ Assert.assertEquals(text1, kmer1.toString());
+ Assert.assertEquals(text2, kmer2.toString());
+ for (int x = 1; x < jk; x++) {
+ merge.set(kmer1);
+ merge.mergeWithFFKmer(x, kmer2);
+ Assert.assertEquals(text1 + text2.substring(x - 1), merge.toString());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void TestMergeFRKmer() {
+ int kmerSize = 3;
+ String result = "AAGCTAACAACC";
+ byte[] resultArray = result.getBytes();
+
+ String text1 = "AAGCTAA";
+ KmerBytesWritable kmer1 = new KmerBytesWritable(text1.length());
+ kmer1.setByRead(resultArray, 0);
+ Assert.assertEquals(text1, kmer1.toString());
+
+ // kmer2 is the rc of the end of the read
+ String text2 = "GGTTGTT";
+ KmerBytesWritable kmer2 = new KmerBytesWritable(text2.length());
+ kmer2.setByReadReverse(resultArray, result.length() - text2.length());
+ Assert.assertEquals(text2, kmer2.toString());
+
+ KmerBytesWritable merge = new KmerBytesWritable(kmer1);
+ merge.mergeWithFRKmer(kmerSize, kmer2);
+ Assert.assertEquals(result, merge.toString());
+
+ int i = 1;
+ merge.set(kmer1);
+ merge.mergeWithFRKmer(i, kmer2);
+ Assert.assertEquals("AAGCTAAAACAACC", merge.toString());
+
+ i = 2;
+ merge.set(kmer1);
+ merge.mergeWithFRKmer(i, kmer2);
+ Assert.assertEquals("AAGCTAAACAACC", merge.toString());
+
+ i = 3;
+ merge.set(kmer1);
+ merge.mergeWithFRKmer(i, kmer2);
+ Assert.assertEquals("AAGCTAACAACC", merge.toString());
+ }
+
+
+ @Test
+ public void TestMergeRFKmer() {
+ int kmerSize = 3;
+ String result = "GGCACAACAACCC";
+ byte[] resultArray = result.getBytes();
+
+ String text1 = "AACAACCC";
+ KmerBytesWritable kmer1 = new KmerBytesWritable(text1.length());
+ kmer1.setByRead(resultArray, 5);
+ Assert.assertEquals(text1, kmer1.toString());
+
+ // kmer2 is the rc of the end of the read
+ String text2 = "TTGTGCC";
+ KmerBytesWritable kmer2 = new KmerBytesWritable(text2.length());
+ kmer2.setByReadReverse(resultArray, 0);
+ Assert.assertEquals(text2, kmer2.toString());
+
+ KmerBytesWritable merge = new KmerBytesWritable(kmer1);
+ merge.mergeWithRFKmer(kmerSize, kmer2);
+ Assert.assertEquals(result, merge.toString());
+
+ int i = 1;
+ merge.set(kmer1);
+ merge.mergeWithRFKmer(i, kmer2);
+ Assert.assertEquals("GGCACAAAACAACCC", merge.toString());
+
+ i = 2;
+ merge.set(kmer1);
+ merge.mergeWithRFKmer(i, kmer2);
+ Assert.assertEquals("GGCACAAACAACCC", merge.toString());
+
+ i = 3;
+ merge.set(kmer1);
+ merge.mergeWithRFKmer(i, kmer2);
+ Assert.assertEquals("GGCACAACAACCC", merge.toString());
+
+ String test1;
+ String test2;
+ test1 = "CTA";
+ test2 = "AGA";
+ KmerBytesWritable k1 = new KmerBytesWritable(3);
+ KmerBytesWritable k2 = new KmerBytesWritable(3);
+ k1.setByRead(test1.getBytes(), 0);
+ k2.setByRead(test2.getBytes(), 0);
+ k1.mergeWithRFKmer(3, k2);
+ Assert.assertEquals("TCTA", k1.toString());
+
+ test1 = "CTA";
+ test2 = "ATA"; //TAT
+ k1 = new KmerBytesWritable(3);
+ k2 = new KmerBytesWritable(3);
+ k1.setByRead(test1.getBytes(), 0);
+ k2.setByRead(test2.getBytes(), 0);
+ k1.mergeWithFRKmer(3, k2);
+ Assert.assertEquals("CTAT", k1.toString());
+
+ test1 = "ATA";
+ test2 = "CTA"; //TAT
+ k1 = new KmerBytesWritable(3);
+ k2 = new KmerBytesWritable(3);
+ k1.setByRead(test1.getBytes(), 0);
+ k2.setByRead(test2.getBytes(), 0);
+ k1.mergeWithFRKmer(3, k2);
+ Assert.assertEquals("ATAG", k1.toString());
+
+ test1 = "TCTAT";
+ test2 = "GAAC";
+ k1 = new KmerBytesWritable(5);
+ k2 = new KmerBytesWritable(4);
+ k1.setByRead(test1.getBytes(), 0);
+ k2.setByRead(test2.getBytes(), 0);
+ k1.mergeWithRFKmer(3, k2);
+ Assert.assertEquals("GTTCTAT", k1.toString());
+ }
+
+
+
+ @Test
+ public void TestMergeRRKmer() {
+ byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
+ String text = "AGCTGACCGT";
+ KmerBytesWritable kmer1 = new KmerBytesWritable(8);
+ kmer1.setByRead(array, 0);
+ String text1 = "AGCTGACC";
+ KmerBytesWritable kmer2 = new KmerBytesWritable(8);
+ kmer2.setByRead(array, 1);
+ String text2 = "GCTGACCG";
+ Assert.assertEquals(text2, kmer2.toString());
+ KmerBytesWritable merge = new KmerBytesWritable(kmer2);
+ int kmerSize = 8;
+ merge.mergeWithRRKmer(kmerSize, kmer1);
+ Assert.assertEquals(text1 + text2.substring(kmerSize - 1), merge.toString());
+
+ for (int i = 1; i < 8; i++) {
+ merge.set(kmer2);
+ merge.mergeWithRRKmer(i, kmer1);
+ Assert.assertEquals(text1.substring(0, text1.length() - i + 1) + text2, merge.toString());
+ }
+
+ for (int ik = 1; ik <= 10; ik++) {
+ for (int jk = 1; jk <= 10; jk++) {
+ kmer1 = new KmerBytesWritable(ik);
+ kmer2 = new KmerBytesWritable(jk);
+ kmer1.setByRead(array, 0);
+ kmer2.setByRead(array, 0);
+ text1 = text.substring(0, ik);
+ text2 = text.substring(0, jk);
+ Assert.assertEquals(text1, kmer1.toString());
+ Assert.assertEquals(text2, kmer2.toString());
+ for (int x = 1; x < ik; x++) {
+ merge.set(kmer2);
+ merge.mergeWithRRKmer(x, kmer1);
+ Assert.assertEquals(text1.substring(0, text1.length() - x + 1) + text2, merge.toString());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void TestFinalMerge() {
+ String selfString;
+ String match;
+ String msgString;
+ int index;
+ KmerBytesWritable kmer = new KmerBytesWritable();
+ int kmerSize = 3;
+
+ String F1 = "AATAG";
+ String F2 = "TAGAA";
+ String R1 = "CTATT";
+ String R2 = "TTCTA";
+
+ //FF test
+ selfString = F1;
+ match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length());
+ msgString = F2;
+ index = msgString.indexOf(match);
+ kmer.reset(msgString.length() - index);
+ kmer.setByRead(msgString.substring(index).getBytes(), 0);
+ System.out.println(kmer.toString());
+
+ //FR test
+ selfString = F1;
+ match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length());
+ msgString = GeneCode.reverseComplement(R2);
+ index = msgString.indexOf(match);
+ kmer.reset(msgString.length() - index);
+ kmer.setByRead(msgString.substring(index).getBytes(), 0);
+ System.out.println(kmer.toString());
+
+ //RF test
+ selfString = R1;
+ match = selfString.substring(0,kmerSize - 1);
+ msgString = GeneCode.reverseComplement(F2);
+ index = msgString.lastIndexOf(match) + kmerSize - 2;
+ kmer.reset(index + 1);
+ kmer.setByReadReverse(msgString.substring(0, index + 1).getBytes(), 0);
+ System.out.println(kmer.toString());
+
+ //RR test
+ selfString = R1;
+ match = selfString.substring(0,kmerSize - 1);
+ msgString = R2;
+ index = msgString.lastIndexOf(match) + kmerSize - 2;
+ kmer.reset(index + 1);
+ kmer.setByRead(msgString.substring(0, index + 1).getBytes(), 0);
+ System.out.println(kmer.toString());
+
+ String[][] connectedTable = new String[][]{
+ {"FF", "RF"},
+ {"FF", "RR"},
+ {"FR", "RF"},
+ {"FR", "RR"}
+ };
+ System.out.println(connectedTable[0][1]);
+
+ Set<Long> s1 = new HashSet<Long>();
+ Set<Long> s2 = new HashSet<Long>();
+ s1.add((long) 1);
+ s1.add((long) 2);
+ s2.add((long) 2);
+ s2.add((long) 3);
+ Set<Long> intersection = new HashSet<Long>();
+ intersection.addAll(s1);
+ intersection.retainAll(s2);
+ System.out.println(intersection.toString());
+ Set<Long> difference = new HashSet<Long>();
+ difference.addAll(s1);
+ difference.removeAll(s2);
+ System.out.println(difference.toString());
+
+ Map<KmerBytesWritable, Set<Long>> map = new HashMap<KmerBytesWritable, Set<Long>>();
+ KmerBytesWritable k1 = new KmerBytesWritable(3);
+ Set<Long> set1 = new HashSet<Long>();
+ k1.setByRead(("CTA").getBytes(), 0);
+ set1.add((long)1);
+ map.put(k1, set1);
+ KmerBytesWritable k2 = new KmerBytesWritable(3);
+ k2.setByRead(("GTA").getBytes(), 0);
+ Set<Long> set2 = new HashSet<Long>();
+ set2.add((long) 2);
+ map.put(k2, set2);
+ KmerBytesWritable k3 = new KmerBytesWritable(3);
+ k3.setByRead(("ATG").getBytes(), 0);
+ Set<Long> set3 = new HashSet<Long>();
+ set3.add((long) 3);
+ map.put(k3, set3);
+ KmerBytesWritable k4 = new KmerBytesWritable(3);
+ k4.setByRead(("AAT").getBytes(), 0);
+ Set<Long> set4 = new HashSet<Long>();
+ set4.add((long) 4);
+ map.put(k4, set4);
+ System.out.println("CTA = " + map.get(k1).toString());
+ System.out.println("GTA = " + map.get(k2).toString());
+ System.out.println("ATG = " + map.get(k3).toString());
+ System.out.println("AAT = " + map.get(k4).toString());
+ }
+>>>>>>> 94e075b5c3db9aa613ef61c2581430a143b17bc8
+}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java
new file mode 100644
index 0000000..fc97664
--- /dev/null
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java
@@ -0,0 +1,131 @@
+package edu.uci.ics.genomix.data.test;
+
+import java.util.Iterator;
+import java.util.Random;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerListWritable;
+
+public class KmerListWritableTest {
+
+ @Test
+ public void TestInitial() {
+ VKmerListWritable kmerList = new VKmerListWritable();
+ Assert.assertEquals(kmerList.getCountOfPosition(), 0);
+
+ //one kmer in list and reset each time
+ VKmerBytesWritable kmer;
+ for (int i = 1; i < 200; i++) {
+ kmer = new VKmerBytesWritable(i);
+ String randomString = generaterRandomString(i);
+ byte[] array = randomString.getBytes();
+ kmer.setByRead(i, array, 0);
+ kmerList.reset();
+ kmerList.append(kmer);
+ Assert.assertEquals(randomString, kmerList.getPosition(0).toString());
+ Assert.assertEquals(1, kmerList.getCountOfPosition());
+ }
+
+ kmerList.reset();
+ //add one more kmer each time and fix kmerSize
+ for (int i = 0; i < 200; i++) {
+ kmer = new VKmerBytesWritable(5);
+ String randomString = generaterRandomString(5);
+ byte[] array = randomString.getBytes();
+ kmer.setByRead(5, array, 0);
+ kmerList.append(kmer);
+ Assert.assertEquals(kmerList.getPosition(i).toString(), randomString);
+ Assert.assertEquals(i + 1, kmerList.getCountOfPosition());
+ }
+
+ byte [] another = new byte [kmerList.getLength()*2];
+ int start = 20;
+ System.arraycopy(kmerList.getByteArray(), kmerList.getStartOffset(), another, start, kmerList.getLength());
+ VKmerListWritable plist2 = new VKmerListWritable(another, start);
+ for(int i = 0; i < plist2.getCountOfPosition(); i++){
+ Assert.assertEquals(kmerList.getPosition(i).toString(), plist2.getPosition(i).toString());
+ }
+ }
+
+ @Test
+ public void TestRemove() {
+ VKmerListWritable kmerList = new VKmerListWritable();
+ Assert.assertEquals(kmerList.getCountOfPosition(), 0);
+
+ int i;
+ VKmerBytesWritable kmer;
+ for (i = 0; i < 200; i++) {
+ kmer = new VKmerBytesWritable(5);
+ String randomString = generaterRandomString(5);
+ byte[] array = randomString.getBytes();
+ kmer.setByRead(5, array, 0);
+ kmerList.append(kmer);
+ Assert.assertEquals(randomString, kmerList.getPosition(i).toString());
+ Assert.assertEquals(i + 1, kmerList.getCountOfPosition());
+ }
+
+ //delete one element each time
+ VKmerBytesWritable tmpKmer = new VKmerBytesWritable(5);
+ i = 0;
+ VKmerListWritable copyList = new VKmerListWritable();
+ copyList.setCopy(kmerList);
+ Iterator<VKmerBytesWritable> iterator;
+ for(int j = 0; j < 5; j++){
+ iterator = copyList.iterator();
+ byte[] array = kmerList.getPosition(j).toString().getBytes();
+ VKmerBytesWritable deletePos = new VKmerBytesWritable(5);
+ deletePos.setByRead(5, array, 0);
+ boolean removed = false;
+ while(iterator.hasNext()){
+ tmpKmer = iterator.next();
+ if(tmpKmer.equals(deletePos)){
+ iterator.remove();
+ removed = true;
+ break;
+ }
+ }
+ Assert.assertTrue(removed);
+ Assert.assertEquals(200 - 1 - j, copyList.getCountOfPosition());
+ while(iterator.hasNext()){
+ tmpKmer = iterator.next();
+ Assert.assertTrue(!tmpKmer.getBytes().equals(deletePos.getBytes()));
+ i++;
+ }
+ }
+
+ //delete all the elements
+ i = 0;
+ iterator = kmerList.iterator();
+ while(iterator.hasNext()){
+ tmpKmer = iterator.next();
+ iterator.remove();
+ }
+
+ Assert.assertEquals(0, kmerList.getCountOfPosition());
+
+ VKmerListWritable edgeList = new VKmerListWritable();
+ VKmerBytesWritable k = new VKmerBytesWritable(3);
+ k.setByRead(3, ("AAA").getBytes(), 0);
+ edgeList.append(k);
+ k.setByRead(3, ("CCC").getBytes(), 0);
+ edgeList.append(k);
+ for(VKmerBytesWritable edge : edgeList){
+ System.out.println(edge.toString());
+ }
+ }
+
+ public String generaterRandomString(int n){
+ char[] chars = "ACGT".toCharArray();
+ StringBuilder sb = new StringBuilder();
+ Random random = new Random();
+ for (int i = 0; i < n; i++) {
+ char c = chars[random.nextInt(chars.length)];
+ sb.append(c);
+ }
+ return sb.toString();
+ }
+}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/PositionListWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/PositionListWritableTest.java
new file mode 100644
index 0000000..ac7322e
--- /dev/null
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/PositionListWritableTest.java
@@ -0,0 +1,117 @@
+package edu.uci.ics.genomix.data.test;
+
+import java.util.Iterator;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+import edu.uci.ics.genomix.type.PositionListWritable;
+import edu.uci.ics.genomix.type.PositionWritable;
+
+public class PositionListWritableTest {
+
+ @Test
+ public void TestInitial() {
+ PositionListWritable plist = new PositionListWritable();
+ Assert.assertEquals(plist.getCountOfPosition(), 0);
+
+ byte mateId;
+ long readId;
+ int posId;
+ for (int i = 0; i < 200; i++) {
+ mateId = (byte)1;
+ readId = (long)i;
+ posId = i;
+ plist.append(mateId, readId, posId);
+ Assert.assertEquals(plist.getPosition(i).getMateId(), mateId);
+ Assert.assertEquals(plist.getPosition(i).getReadId(), readId);
+ Assert.assertEquals(plist.getPosition(i).getPosId(), posId);
+ Assert.assertEquals(i + 1, plist.getCountOfPosition());
+ }
+
+ int i = 0;
+ for (PositionWritable pos : plist) {
+ Assert.assertEquals((byte)1, pos.getMateId());
+ Assert.assertEquals((long) i, pos.getReadId());
+ Assert.assertEquals(i, pos.getPosId());
+ i++;
+ }
+
+ byte [] another = new byte [plist.getLength()*2];
+ int start = 20;
+ System.arraycopy(plist.getByteArray(), 0, another, start, plist.getLength());
+ PositionListWritable plist2 = new PositionListWritable(another,start);
+ for( i = 0; i < plist2.getCountOfPosition(); i++){
+ Assert.assertEquals(plist.getPosition(i), plist2.getPosition(i));
+ }
+ }
+
+ @Test
+ public void TestRemove() {
+ PositionListWritable plist = new PositionListWritable();
+ Assert.assertEquals(plist.getCountOfPosition(), 0);
+
+ byte mateId;
+ long readId;
+ int posId;
+ for (int i = 0; i < 5; i++) {
+ mateId = (byte)1;
+ readId = (long)i;
+ posId = i;
+ plist.append(mateId, readId, posId);
+ Assert.assertEquals(plist.getPosition(i).getMateId(), mateId);
+ Assert.assertEquals(plist.getPosition(i).getReadId(), readId);
+ Assert.assertEquals(plist.getPosition(i).getPosId(), posId);
+ Assert.assertEquals(i + 1, plist.getCountOfPosition());
+ }
+
+ int i = 0;
+ for (PositionWritable pos : plist) {
+ Assert.assertEquals((byte)1, pos.getMateId());
+ Assert.assertEquals((long) i, pos.getReadId());
+ Assert.assertEquals(i, pos.getPosId());
+ i++;
+ }
+
+ //delete one element each time
+ i = 0;
+ PositionListWritable copyList = new PositionListWritable();
+ copyList.set(plist);
+ PositionWritable pos = new PositionWritable();
+ Iterator<PositionWritable> iterator;
+ for(int j = 0; j < 5; j++){
+ iterator = copyList.iterator();
+ PositionWritable deletePos = new PositionWritable();
+ deletePos.set((byte)1, (long)j, j);
+ boolean removed = false;
+ while(iterator.hasNext()){
+ pos = iterator.next();
+ if(pos.equals(deletePos)){
+ iterator.remove();
+ removed = true;
+ break;
+ }
+ }
+ Assert.assertTrue(removed);
+ Assert.assertEquals(5 - 1 - j, copyList.getCountOfPosition());
+ while(iterator.hasNext()){
+ pos = iterator.next();
+ Assert.assertTrue(! (pos.getUUID() == deletePos.getUUID() &&
+ pos.getReadId() == deletePos.getReadId() &&
+ pos.getPosId() == deletePos.getPosId()));
+ i++;
+ }
+ }
+
+ //delete all the elements
+ i = 0;
+ iterator = plist.iterator();
+ while(iterator.hasNext()){
+ pos = iterator.next();
+ iterator.remove();
+ }
+
+ Assert.assertEquals(0, plist.getCountOfPosition());
+ }
+}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/PositionWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/PositionWritableTest.java
new file mode 100644
index 0000000..003406d
--- /dev/null
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/PositionWritableTest.java
@@ -0,0 +1,47 @@
+package edu.uci.ics.genomix.data.test;
+
+import java.util.Random;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+import edu.uci.ics.genomix.data.Marshal;
+import edu.uci.ics.genomix.type.PositionWritable;
+
+public class PositionWritableTest {
+
+ @Test
+ public void TestInitial() {
+ PositionWritable pos = new PositionWritable();
+ PositionWritable pos1 = new PositionWritable();
+ byte mateId;
+ long readId;
+ int posId;
+ Random gen = new Random();
+ byte[] start = new byte[15];
+ for (long i = 0; i < (1 << 47); i++) {
+ mateId = (byte) (gen.nextBoolean() ? 1 : 0);
+ readId = i;
+ posId = (int) (i % (1 << 16));
+ pos = new PositionWritable(mateId, readId, posId);
+ Assert.assertEquals(pos.getMateId(), mateId);
+ Assert.assertEquals(pos.getReadId(), readId);
+ Assert.assertEquals(pos.getPosId(), posId);
+
+ long uuid = ((readId + 1) << 17) + ((posId & 0xFFFF) << 1) + (mateId & 0b1);
+ Marshal.putLong(uuid, start, 0);
+ pos1 = new PositionWritable(start, 0);
+ Assert.assertEquals(pos1.getMateId(), mateId);
+ Assert.assertEquals(pos1.getReadId(), readId + 1);
+ Assert.assertEquals(pos1.getPosId(), posId);
+
+ pos.setNewReference(start, 0);
+ Assert.assertEquals(pos.getMateId(), mateId);
+ Assert.assertEquals(pos.getReadId(), readId + 1);
+ Assert.assertEquals(pos.getPosId(), posId);
+
+ Assert.assertEquals(pos1.toString(), pos.toString());
+ }
+ }
+}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
new file mode 100644
index 0000000..9bf728d
--- /dev/null
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
@@ -0,0 +1,580 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.data.test;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerListWritable;
+
+
+public class VKmerBytesWritableTest {
+ static byte[] array = { 'A', 'A', 'T', 'A', 'G', 'A', 'A', 'G' };
+ static int k = 7;
+
+ @Test
+ public void TestCompressKmer() {
+ VKmerBytesWritable kmer = new VKmerBytesWritable(k);
+ kmer.setByRead(k, array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
+
+ kmer.setByRead(k, array, 1);
+ Assert.assertEquals(kmer.toString(), "ATAGAAG");
+ }
+
+ @Test
+ public void TestMoveKmer() {
+ VKmerBytesWritable kmer = new VKmerBytesWritable(k);
+ kmer.setByRead(k, array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
+
+ for (int i = k; i < array.length - 1; i++) {
+ kmer.shiftKmerWithNextCode(array[i]);
+ Assert.assertTrue(false);
+ }
+
+ byte out = kmer.shiftKmerWithNextChar(array[array.length - 1]);
+ Assert.assertEquals(out, GeneCode.getCodeFromSymbol((byte) 'A'));
+ Assert.assertEquals(kmer.toString(), "ATAGAAG");
+ }
+
+ @Test
+ public void TestCompressKmerReverse() {
+ VKmerBytesWritable kmer = new VKmerBytesWritable();
+ kmer.setByRead(k, array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
+
+ kmer.setByReadReverse(k, array, 1);
+ Assert.assertEquals(kmer.toString(), "CTTCTAT");
+ }
+
+ @Test
+ public void TestMoveKmerReverse() {
+ VKmerBytesWritable kmer = new VKmerBytesWritable();
+ kmer.setByRead(k, array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
+
+ for (int i = k; i < array.length - 1; i++) {
+ kmer.shiftKmerWithPreChar(array[i]);
+ Assert.assertTrue(false);
+ }
+
+ byte out = kmer.shiftKmerWithPreChar(array[array.length - 1]);
+ Assert.assertEquals(out, GeneCode.getCodeFromSymbol((byte) 'A'));
+ Assert.assertEquals(kmer.toString(), "GAATAGA");
+ }
+
+ @Test
+ public void TestGetGene() {
+ VKmerBytesWritable kmer = new VKmerBytesWritable();
+ String text = "AGCTGACCG";
+ byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G' };
+ kmer.setByRead(9, array, 0);
+
+ for (int i = 0; i < 9; i++) {
+ Assert.assertEquals(text.charAt(i), (char) (GeneCode.getSymbolFromCode(kmer.getGeneCodeAtPosition(i))));
+ }
+ }
+
+ @Test
+ public void TestGetOneByteFromKmer() {
+ byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
+ String string = "AGCTGACCGT";
+ for (int k = 3; k <= 10; k++) {
+ VKmerBytesWritable kmer = new VKmerBytesWritable();
+ VKmerBytesWritable kmerAppend = new VKmerBytesWritable(k);
+ kmer.setByRead(k, array, 0);
+ Assert.assertEquals(string.substring(0, k), kmer.toString());
+ for (int b = 0; b < k; b++) {
+ byte byteActual = KmerBytesWritable.getOneByteFromKmerAtPosition(b, kmer.getBytes(),
+ kmer.getKmerOffset(), kmer.getKmerByteLength());
+ byte byteExpect = GeneCode.getCodeFromSymbol(array[b]);
+ for (int i = 1; i < 4 && b + i < k; i++) {
+ byteExpect += GeneCode.getCodeFromSymbol(array[b + i]) << (i * 2);
+ }
+ Assert.assertEquals(byteActual, byteExpect);
+ KmerBytesWritable.appendOneByteAtPosition(b, byteActual, kmerAppend.getBytes(),
+ kmerAppend.getKmerOffset(), kmerAppend.getKmerByteLength());
+ }
+ Assert.assertEquals(kmer.toString(), kmerAppend.toString());
+ }
+ }
+
+ @Test
+ public void TestMergeFFKmer() {
+ byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
+ String text = "AGCTGACCGT";
+ VKmerBytesWritable kmer1 = new VKmerBytesWritable();
+ kmer1.setByRead(8, array, 0);
+ String text1 = "AGCTGACC";
+ Assert.assertEquals(text1, kmer1.toString());
+
+ VKmerBytesWritable kmer2 = new VKmerBytesWritable();
+ kmer2.setByRead(8, array, 1);
+ String text2 = "GCTGACCG";
+ Assert.assertEquals(text2, kmer2.toString());
+
+ VKmerBytesWritable merge = new VKmerBytesWritable(kmer1);
+ int kmerSize = 8;
+ merge.mergeWithFFKmer(kmerSize, kmer2);
+ Assert.assertEquals(text1 + text2.substring(kmerSize - 1), merge.toString());
+
+ for (int i = 1; i < 8; i++) {
+ merge.setAsCopy(kmer1);
+ merge.mergeWithFFKmer(i, kmer2);
+ Assert.assertEquals(text1 + text2.substring(i - 1), merge.toString());
+ }
+
+ for (int ik = 1; ik <= 10; ik++) {
+ for (int jk = 1; jk <= 10; jk++) {
+ kmer1 = new VKmerBytesWritable(ik);
+ kmer2 = new VKmerBytesWritable(jk);
+ kmer1.setByRead(ik, array, 0);
+ kmer2.setByRead(jk, array, 0);
+ text1 = text.substring(0, ik);
+ text2 = text.substring(0, jk);
+ Assert.assertEquals(text1, kmer1.toString());
+ Assert.assertEquals(text2, kmer2.toString());
+ for (int x = 1; x < (jk < ik ? jk : ik); x++) {
+ merge.setAsCopy(kmer1);
+ merge.mergeWithFFKmer(x, kmer2);
+ Assert.assertEquals(text1 + text2.substring(x - 1), merge.toString());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void TestMergeFRKmer() {
+ int kmerSize = 3;
+ String result = "AAGCTAACAACC";
+ byte[] resultArray = result.getBytes();
+
+ String text1 = "AAGCTAA";
+ VKmerBytesWritable kmer1 = new VKmerBytesWritable();
+ kmer1.setByRead(text1.length(), resultArray, 0);
+ Assert.assertEquals(text1, kmer1.toString());
+
+ // kmer2 is the rc of the end of the read
+ String text2 = "GGTTGTT";
+ VKmerBytesWritable kmer2 = new VKmerBytesWritable();
+ kmer2.setByReadReverse(text2.length(), resultArray, result.length() - text2.length());
+ Assert.assertEquals(text2, kmer2.toString());
+
+ VKmerBytesWritable merge = new VKmerBytesWritable();
+ merge.setAsCopy(kmer1);
+ merge.mergeWithFRKmer(kmerSize, kmer2);
+ Assert.assertEquals(result, merge.toString());
+
+ int i = 1;
+ merge.setAsCopy(kmer1);
+ merge.mergeWithFRKmer(i, kmer2);
+ Assert.assertEquals("AAGCTAAAACAACC", merge.toString());
+
+ i = 2;
+ merge.setAsCopy(kmer1);
+ merge.mergeWithFRKmer(i, kmer2);
+ Assert.assertEquals("AAGCTAAACAACC", merge.toString());
+
+ i = 3;
+ merge.setAsCopy(kmer1);
+ merge.mergeWithFRKmer(i, kmer2);
+ Assert.assertEquals("AAGCTAACAACC", merge.toString());
+ }
+
+ @Test
+ public void TestMergeRFKmer() {
+ int kmerSize = 3;
+ String result = "GGCACAACAACCC";
+ byte[] resultArray = result.getBytes();
+
+ String text1 = "AACAACCC";
+ VKmerBytesWritable kmer1 = new VKmerBytesWritable();
+ kmer1.setByRead(text1.length(), resultArray, 5);
+ Assert.assertEquals(text1, kmer1.toString());
+
+ // kmer2 is the rc of the end of the read
+ String text2 = "TTGTGCC";
+ VKmerBytesWritable kmer2 = new VKmerBytesWritable();
+ kmer2.setByReadReverse(text2.length(), resultArray, 0);
+ Assert.assertEquals(text2, kmer2.toString());
+
+ VKmerBytesWritable merge = new VKmerBytesWritable();
+ merge.setAsCopy(kmer1);
+ merge.mergeWithRFKmer(kmerSize, kmer2);
+ Assert.assertEquals(result, merge.toString());
+
+ int i = 1;
+ merge.setAsCopy(kmer1);
+ merge.mergeWithRFKmer(i, kmer2);
+ Assert.assertEquals("GGCACAAAACAACCC", merge.toString());
+
+ i = 2;
+ merge.setAsCopy(kmer1);
+ merge.mergeWithRFKmer(i, kmer2);
+ Assert.assertEquals("GGCACAAACAACCC", merge.toString());
+
+ i = 3;
+ merge.setAsCopy(kmer1);
+ merge.mergeWithRFKmer(i, kmer2);
+ Assert.assertEquals("GGCACAACAACCC", merge.toString());
+
+ // String test1 = "CTTAT";
+ // String test2 = "AGACC"; // rc = GGTCT
+ // VKmerBytesWritable k1 = new VKmerBytesWritable(5);
+ // VKmerBytesWritable k2 = new VKmerBytesWritable(5);
+ // k1.setByRead(test1.getBytes(), 0);
+ // k2.setByRead(test2.getBytes(), 0);
+ // k1.mergeWithRFKmer(3, k2);
+ // Assert.assertEquals("GGTCTTAT", k1.toString()); //GGTCGTCT ->
+ // AGACGACC ??
+
+ String test3 = "CTA";
+ String test4 = "AGA"; // rc = TCT
+ VKmerBytesWritable k3 = new VKmerBytesWritable();
+ VKmerBytesWritable k4 = new VKmerBytesWritable();
+ k3.setByRead(3, test3.getBytes(), 0);
+ k4.setByRead(3, test4.getBytes(), 0);
+ k3.mergeWithRFKmer(3, k4);
+ Assert.assertEquals("TCTA", k3.toString());
+ // Assert.assertEquals("CTAT", k3); // this is an incorrect test case--
+ // the merge always flips the passed-in kmer
+
+ String test1;
+ String test2;
+ test1 = "CTA";
+ test2 = "AGA";
+ VKmerBytesWritable k1 = new VKmerBytesWritable();
+ VKmerBytesWritable k2 = new VKmerBytesWritable();
+ k1.setByRead(3, test1.getBytes(), 0);
+ k2.setByRead(3, test2.getBytes(), 0);
+ k1.mergeWithRFKmer(3, k2);
+ Assert.assertEquals("TCTA", k1.toString());
+
+
+
+ test1 = "CTA";
+ test2 = "ATA"; //TAT
+ k1 = new VKmerBytesWritable();
+ k2 = new VKmerBytesWritable();
+ k1.setByRead(3, test1.getBytes(), 0);
+ k2.setByRead(3, test2.getBytes(), 0);
+ k1.mergeWithFRKmer(3, k2);
+ Assert.assertEquals("CTAT", k1.toString());
+
+ test1 = "ATA";
+ test2 = "CTA"; //TAT
+ k1 = new VKmerBytesWritable();
+ k2 = new VKmerBytesWritable();
+ k1.setByRead(3, test1.getBytes(), 0);
+ k2.setByRead(3, test2.getBytes(), 0);
+ k1.mergeWithFRKmer(3, k2);
+ Assert.assertEquals("ATAG", k1.toString());
+
+ test1 = "TCTAT";
+ test2 = "GAAC";
+ k1 = new VKmerBytesWritable();
+ k2 = new VKmerBytesWritable();
+ k1.setByRead(5, test1.getBytes(), 0);
+ k2.setByRead(4, test2.getBytes(), 0);
+ k1.mergeWithRFKmer(3, k2);
+ Assert.assertEquals("GTTCTAT", k1.toString());
+ }
+
+ @Test
+ public void TestMergeRRKmer() {
+ byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
+ String text = "AGCTGACCGT";
+ VKmerBytesWritable kmer1 = new VKmerBytesWritable();
+ kmer1.setByRead(8, array, 0);
+ String text1 = "AGCTGACC";
+ VKmerBytesWritable kmer2 = new VKmerBytesWritable();
+ kmer2.setByRead(8, array, 1);
+ String text2 = "GCTGACCG";
+ Assert.assertEquals(text2, kmer2.toString());
+ VKmerBytesWritable merge = new VKmerBytesWritable(kmer2);
+ int kmerSize = 8;
+ merge.mergeWithRRKmer(kmerSize, kmer1);
+ Assert.assertEquals(text1 + text2.substring(kmerSize - 1), merge.toString());
+
+ for (int i = 1; i < 8; i++) {
+ merge.setAsCopy(kmer2);
+ merge.mergeWithRRKmer(i, kmer1);
+ Assert.assertEquals(text1.substring(0, text1.length() - i + 1) + text2, merge.toString());
+ }
+
+ for (int ik = 1; ik <= 10; ik++) {
+ for (int jk = 1; jk <= 10; jk++) {
+ kmer1 = new VKmerBytesWritable();
+ kmer2 = new VKmerBytesWritable();
+ kmer1.setByRead(ik, array, 0);
+ kmer2.setByRead(jk, array, 0);
+ text1 = text.substring(0, ik);
+ text2 = text.substring(0, jk);
+ Assert.assertEquals(text1, kmer1.toString());
+ Assert.assertEquals(text2, kmer2.toString());
+ for (int x = 1; x < (ik < jk ? ik : jk); x++) {
+ merge.setAsCopy(kmer2);
+ merge.mergeWithRRKmer(x, kmer1);
+ Assert.assertEquals(text1.substring(0, text1.length() - x + 1) + text2, merge.toString());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void TestMergeRFAndRRKmer() {
+ String test1 = "TAGAT";
+ String test2 = "TCTAG"; // rc = CTAGA
+ String test3 = "GCTAG";
+ VKmerBytesWritable k1 = new VKmerBytesWritable();
+ VKmerBytesWritable k2 = new VKmerBytesWritable();
+ VKmerBytesWritable k3 = new VKmerBytesWritable();
+ k1.setByRead(5, test1.getBytes(), 0);
+ k2.setByRead(5, test2.getBytes(), 0);
+ k3.setByRead(5, test3.getBytes(), 0);
+ k1.mergeWithRFKmer(5, k2);
+ Assert.assertEquals("CTAGAT", k1.toString());
+ k1.mergeWithRRKmer(5, k3);
+ Assert.assertEquals("GCTAGAT", k1.toString());
+ }
+
+ @Test
+ public void TestMergeRFAndRFKmer() {
+ String test1 = "TAGAT";
+ String test2 = "TCTAG"; // rc = CTAGA
+ String test3 = "CTAGC"; // rc = GCTAG
+ VKmerBytesWritable k1 = new VKmerBytesWritable();
+ VKmerBytesWritable k2 = new VKmerBytesWritable();
+ VKmerBytesWritable k3 = new VKmerBytesWritable();
+ k1.setByRead(5, test1.getBytes(), 0);
+ k2.setByRead(5, test2.getBytes(), 0);
+ k3.setByRead(5, test3.getBytes(), 0);
+ k1.mergeWithRFKmer(5, k2);
+ Assert.assertEquals("CTAGAT", k1.toString());
+ k1.mergeWithRFKmer(5, k3);
+ Assert.assertEquals("GCTAGAT", k1.toString());
+ }
+
+ @Test
+ public void TestMergeRFAndFRKmer() {
+ String test1 = "TAGAT"; // rc = ATCTA
+ String test2 = "TCTAG"; // rc = CTAGA
+ String test3 = "GCTAG"; // rc = CTAGC
+ VKmerBytesWritable k1 = new VKmerBytesWritable();
+ VKmerBytesWritable k2 = new VKmerBytesWritable();
+ VKmerBytesWritable k3 = new VKmerBytesWritable();
+ k1.setByRead(5, test1.getBytes(), 0);
+ k2.setByRead(5, test2.getBytes(), 0);
+ k3.setByRead(5, test3.getBytes(), 0);
+ k2.mergeWithRFKmer(5, k1);
+ Assert.assertEquals("ATCTAG", k2.toString());
+ k2.mergeWithFRKmer(5, k3);
+ Assert.assertEquals("ATCTAGC", k2.toString());
+ }
+
+ @Test
+ public void TestMergeRFAndFFKmer() {
+ String test1 = "TAGAT"; // rc = ATCTA
+ String test2 = "TCTAG"; // rc = CTAGA
+ String test3 = "CTAGC"; // rc = GCTAG
+ VKmerBytesWritable k1 = new VKmerBytesWritable();
+ VKmerBytesWritable k2 = new VKmerBytesWritable();
+ VKmerBytesWritable k3 = new VKmerBytesWritable();
+ k1.setByRead(5, test1.getBytes(), 0);
+ k2.setByRead(5, test2.getBytes(), 0);
+ k3.setByRead(5, test3.getBytes(), 0);
+ k2.mergeWithRFKmer(5, k1);
+ Assert.assertEquals("ATCTAG", k2.toString());
+ k2.mergeWithFFKmer(5, k3);
+ Assert.assertEquals("ATCTAGC", k2.toString());
+ }
+
+ @Test
+ public void TestMergeThreeVKmersRF_FF() {
+ String test1 = "TAGAT"; // rc = ATCTA
+ String test2 = "TCTAG"; // rc = CTAGA
+ String test3 = "CTAGC"; // rc = GCTAG
+ VKmerBytesWritable k1 = new VKmerBytesWritable();
+ VKmerBytesWritable k2 = new VKmerBytesWritable();
+ VKmerBytesWritable k3 = new VKmerBytesWritable();
+ k1.setByRead(5, test1.getBytes(), 0);
+ k2.setByRead(5, test2.getBytes(), 0);
+ k3.setByRead(5, test3.getBytes(), 0);
+ k2.mergeWithRFKmer(5, k1);
+ Assert.assertEquals("ATCTAG", k2.toString());
+ k2.mergeWithFFKmer(5, k3);
+ Assert.assertEquals("ATCTAGC", k2.toString());
+ }
+
+ @Test
+ public void TestMergeThreeVKmerRF_RF() {
+ String test1 = "TAGAT";
+ String test2 = "TCTAG"; // rc = CTAGA
+ String test3 = "CTAGC"; // rc = GCTAG
+ VKmerBytesWritable k1 = new VKmerBytesWritable();
+ VKmerBytesWritable k2 = new VKmerBytesWritable();
+ VKmerBytesWritable k3 = new VKmerBytesWritable();
+ k1.setByRead(5, test1.getBytes(), 0);
+ k2.setByRead(5, test2.getBytes(), 0);
+ k3.setByRead(5, test3.getBytes(), 0);
+ k1.mergeWithRFKmer(5, k2);
+ Assert.assertEquals("CTAGAT", k1.toString());
+ k1.mergeWithRFKmer(5, k3);
+ Assert.assertEquals("GCTAGAT", k1.toString());
+ }
+
+ @Test
+ public void TestFinalMerge() {
+ String selfString;
+ String match;
+ String msgString;
+ int index;
+ VKmerBytesWritable kmer = new VKmerBytesWritable();
+ int kmerSize = 3;
+
+ String F1 = "AATAG";
+ String F2 = "TAGAA";
+ String R1 = "CTATT";
+ String R2 = "TTCTA";
+
+ //FF test
+ selfString = F1;
+ match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length());
+ msgString = F2;
+ index = msgString.indexOf(match);
+ // does this test belong in VKmer so it can have variable-length kmers?
+// kmer.reset(msgString.length() - index);
+ kmer.setByRead(kmerSize, msgString.substring(index).getBytes(), 0);
+ System.out.println(kmer.toString());
+
+ //FR test
+ selfString = F1;
+ match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length());
+ msgString = GeneCode.reverseComplement(R2);
+ index = msgString.indexOf(match);
+ kmer.reset(msgString.length() - index);
+ kmer.setByRead(kmerSize, msgString.substring(index).getBytes(), 0);
+ System.out.println(kmer.toString());
+
+ //RF test
+ selfString = R1;
+ match = selfString.substring(0,kmerSize - 1);
+ msgString = GeneCode.reverseComplement(F2);
+ index = msgString.lastIndexOf(match) + kmerSize - 2;
+ kmer.reset(index + 1);
+ kmer.setByReadReverse(kmerSize, msgString.substring(0, index + 1).getBytes(), 0);
+ System.out.println(kmer.toString());
+
+ //RR test
+ selfString = R1;
+ match = selfString.substring(0,kmerSize - 1);
+ msgString = R2;
+ index = msgString.lastIndexOf(match) + kmerSize - 2;
+ kmer.reset(index + 1);
+ kmer.setByRead(kmerSize, msgString.substring(0, index + 1).getBytes(), 0);
+ System.out.println(kmer.toString());
+
+ String[][] connectedTable = new String[][]{
+ {"FF", "RF"},
+ {"FF", "RR"},
+ {"FR", "RF"},
+ {"FR", "RR"}
+ };
+ System.out.println(connectedTable[0][1]);
+
+ Set<Long> s1 = new HashSet<Long>();
+ Set<Long> s2 = new HashSet<Long>();
+ s1.add((long) 1);
+ s1.add((long) 2);
+ s2.add((long) 2);
+ s2.add((long) 3);
+ Set<Long> intersection = new HashSet<Long>();
+ intersection.addAll(s1);
+ intersection.retainAll(s2);
+ System.out.println(intersection.toString());
+ Set<Long> difference = new HashSet<Long>();
+ difference.addAll(s1);
+ difference.removeAll(s2);
+ System.out.println(difference.toString());
+
+ Map<VKmerBytesWritable, Set<Long>> map = new HashMap<VKmerBytesWritable, Set<Long>>();
+ VKmerBytesWritable k1 = new VKmerBytesWritable();
+ Set<Long> set1 = new HashSet<Long>();
+ k1.setByRead(3, ("CTA").getBytes(), 0);
+ set1.add((long)1);
+ map.put(k1, set1);
+ VKmerBytesWritable k2 = new VKmerBytesWritable();
+ k2.setByRead(3, ("GTA").getBytes(), 0);
+ Set<Long> set2 = new HashSet<Long>();
+ set2.add((long) 2);
+ map.put(k2, set2);
+ VKmerBytesWritable k3 = new VKmerBytesWritable();
+ k3.setByRead(3, ("ATG").getBytes(), 0);
+ Set<Long> set3 = new HashSet<Long>();
+ set3.add((long) 2);
+ map.put(k3, set3);
+ VKmerBytesWritable k4 = new VKmerBytesWritable();
+ k4.setByRead(3, ("AAT").getBytes(), 0);
+ Set<Long> set4 = new HashSet<Long>();
+ set4.add((long) 1);
+ map.put(k4, set4);
+ VKmerListWritable kmerList = new VKmerListWritable();
+ kmerList.append(k1);
+ kmerList.append(k2);
+ System.out.println("CTA = " + map.get(k1).toString());
+ System.out.println("GTA = " + map.get(k2).toString());
+ System.out.println("ATG = " + map.get(k3).toString());
+ System.out.println("AAT = " + map.get(k4).toString());
+ System.out.println(k1.compareTo(k2));
+ System.out.println(k2.compareTo(k1));
+
+ System.out.println("CTA = " + kmerList.getPosition(0).toString());
+ System.out.println("GTA = " + kmerList.getPosition(1).toString());
+ System.out.println("CTA = " + map.get(kmerList.getPosition(0)).toString());
+ System.out.println("GTA = " + map.get(kmerList.getPosition(1)).toString());
+ }
+
+ @Test
+ public void TestEditDistance() {
+ VKmerBytesWritable kmer1 = new VKmerBytesWritable("ACGT");
+ VKmerBytesWritable kmer2 = new VKmerBytesWritable("AAAACGT");
+
+ Assert.assertEquals(kmer1.editDistance(kmer2), 3);
+ Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.editDistance(kmer1));
+ Assert.assertEquals(kmer1.fracDissimilar(kmer2), .75f);
+
+ kmer1.setAsCopy("");
+ Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.getKmerLetterLength());
+ Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.editDistance(kmer1));
+
+ kmer2.setAsCopy("");
+ Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.getKmerLetterLength());
+ Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.editDistance(kmer1));
+
+
+ }
+
+}
diff --git a/genomix/genomix-data/src/test/resources/data/0/text.txt b/genomix/genomix-data/src/test/resources/data/0/text.txt
new file mode 100755
index 0000000..f63a141
--- /dev/null
+++ b/genomix/genomix-data/src/test/resources/data/0/text.txt
@@ -0,0 +1,4 @@
+@625E1AAXX100810:1:100:10000:10271/1
+AATAGAAG
++
+EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?
diff --git a/genomix/genomix-data/src/test/resources/data/webmap/text.txt b/genomix/genomix-data/src/test/resources/data/webmap/text.txt
new file mode 100755
index 0000000..f63a141
--- /dev/null
+++ b/genomix/genomix-data/src/test/resources/data/webmap/text.txt
@@ -0,0 +1,4 @@
+@625E1AAXX100810:1:100:10000:10271/1
+AATAGAAG
++
+EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?
diff --git a/genomix/genomix-data/src/test/resources/expected/result2 b/genomix/genomix-data/src/test/resources/expected/result2
new file mode 100755
index 0000000..5e76458
--- /dev/null
+++ b/genomix/genomix-data/src/test/resources/expected/result2
@@ -0,0 +1,4 @@
+AATAG |A 1
+AGAAG T| 1
+ATAGA A|A 1
+TAGAA A|G 1
diff --git a/genomix/genomix-data/src/test/resources/hadoop/conf/core-site.xml b/genomix/genomix-data/src/test/resources/hadoop/conf/core-site.xml
new file mode 100644
index 0000000..3e5bacb
--- /dev/null
+++ b/genomix/genomix-data/src/test/resources/hadoop/conf/core-site.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+ <property>
+ <name>fs.default.name</name>
+ <value>hdfs://127.0.0.1:31888</value>
+ </property>
+ <property>
+ <name>hadoop.tmp.dir</name>
+ <value>/tmp/hadoop</value>
+ </property>
+
+
+</configuration>
diff --git a/genomix/genomix-data/src/test/resources/hadoop/conf/hdfs-site.xml b/genomix/genomix-data/src/test/resources/hadoop/conf/hdfs-site.xml
new file mode 100644
index 0000000..b1b1902
--- /dev/null
+++ b/genomix/genomix-data/src/test/resources/hadoop/conf/hdfs-site.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+ <property>
+ <name>dfs.replication</name>
+ <value>1</value>
+ </property>
+
+ <property>
+ <name>dfs.block.size</name>
+ <value>65536</value>
+ </property>
+
+</configuration>
diff --git a/genomix/genomix-data/src/test/resources/hadoop/conf/log4j.properties b/genomix/genomix-data/src/test/resources/hadoop/conf/log4j.properties
new file mode 100755
index 0000000..d5e6004
--- /dev/null
+++ b/genomix/genomix-data/src/test/resources/hadoop/conf/log4j.properties
@@ -0,0 +1,94 @@
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=FATAL,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshhold=FATAL
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# Rolling File Appender
+#
+
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+#
+# FSNamesystem Audit logging
+# All audit events are logged at INFO level
+#
+log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
diff --git a/genomix/genomix-data/src/test/resources/hadoop/conf/mapred-site.xml b/genomix/genomix-data/src/test/resources/hadoop/conf/mapred-site.xml
new file mode 100644
index 0000000..525e7d5
--- /dev/null
+++ b/genomix/genomix-data/src/test/resources/hadoop/conf/mapred-site.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+ <property>
+ <name>mapred.job.tracker</name>
+ <value>localhost:29007</value>
+ </property>
+ <property>
+ <name>mapred.tasktracker.map.tasks.maximum</name>
+ <value>20</value>
+ </property>
+ <property>
+ <name>mapred.tasktracker.reduce.tasks.maximum</name>
+ <value>20</value>
+ </property>
+ <property>
+ <name>mapred.max.split.size</name>
+ <value>2048</value>
+ </property>
+
+</configuration>
diff --git a/genomix/genomix-hadoop/data/webmap/AdjSplitRepeat.txt b/genomix/genomix-hadoop/data/webmap/AdjSplitRepeat.txt
new file mode 100644
index 0000000..f2e3942
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/AdjSplitRepeat.txt
@@ -0,0 +1,3 @@
+1 AATAG
+2 GCATA
+3 ATAGC
diff --git a/genomix/genomix-hadoop/data/webmap/MergeBubble.txt b/genomix/genomix-hadoop/data/webmap/MergeBubble.txt
new file mode 100644
index 0000000..087f43e
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/MergeBubble.txt
@@ -0,0 +1,2 @@
+1 AATAGAA
+2 AATACAA
diff --git a/genomix/genomix-hadoop/data/webmap/RemoveBridge.txt b/genomix/genomix-hadoop/data/webmap/RemoveBridge.txt
new file mode 100644
index 0000000..472a7dc
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/RemoveBridge.txt
@@ -0,0 +1,2 @@
+1 AATAG
+2 CACGC
diff --git a/genomix/genomix-hadoop/data/webmap/SplitOnce.txt b/genomix/genomix-hadoop/data/webmap/SplitOnce.txt
new file mode 100644
index 0000000..d8e2b7e
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/SplitOnce.txt
@@ -0,0 +1,2 @@
+1 AATAG
+2 GCATA
diff --git a/genomix/genomix-hadoop/data/webmap/SplitTwice.txt b/genomix/genomix-hadoop/data/webmap/SplitTwice.txt
new file mode 100644
index 0000000..bb03d70
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/SplitTwice.txt
@@ -0,0 +1,2 @@
+1 AATAG
+2 CATAC
diff --git a/genomix/genomix-hadoop/data/webmap/lastesttest/HighSplitRepeat/HighSplitRepeat.txt b/genomix/genomix-hadoop/data/webmap/lastesttest/HighSplitRepeat/HighSplitRepeat.txt
new file mode 100644
index 0000000..eca0a13
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/lastesttest/HighSplitRepeat/HighSplitRepeat.txt
@@ -0,0 +1,3 @@
+1 AGCCACA
+2 GCACTTT
+3 CGCCGTC
diff --git a/genomix/genomix-hadoop/data/webmap/lastesttest/LowSplitRepeat/LowSplitRepeat.txt b/genomix/genomix-hadoop/data/webmap/lastesttest/LowSplitRepeat/LowSplitRepeat.txt
new file mode 100644
index 0000000..259fd80
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/lastesttest/LowSplitRepeat/LowSplitRepeat.txt
@@ -0,0 +1,3 @@
+1 AGCCA
+2 AGCCG
+3 GCCTT
diff --git a/genomix/genomix-hadoop/data/webmap/lastesttest/MidSplitRepeat/MidSplitRepeat.txt b/genomix/genomix-hadoop/data/webmap/lastesttest/MidSplitRepeat/MidSplitRepeat.txt
new file mode 100644
index 0000000..e934e54
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/lastesttest/MidSplitRepeat/MidSplitRepeat.txt
@@ -0,0 +1,3 @@
+1 AGCCA
+2 CGCCT
+3 GCCGG
diff --git a/genomix/genomix-hadoop/data/webmap/lastesttest/Tips1/Tips1.txt b/genomix/genomix-hadoop/data/webmap/lastesttest/Tips1/Tips1.txt
new file mode 100644
index 0000000..1e16d68
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/lastesttest/Tips1/Tips1.txt
@@ -0,0 +1,2 @@
+1 CAGCCA
+2 GCCGTA
diff --git a/genomix/genomix-hadoop/data/webmap/lastesttest/Tips2/Tips2.txt b/genomix/genomix-hadoop/data/webmap/lastesttest/Tips2/Tips2.txt
new file mode 100644
index 0000000..8109730
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/lastesttest/Tips2/Tips2.txt
@@ -0,0 +1,2 @@
+1 ACAGCG
+2 GGCGAA
diff --git a/genomix/genomix-hadoop/data/webmap/lastesttest/Tips3/Tips3.txt b/genomix/genomix-hadoop/data/webmap/lastesttest/Tips3/Tips3.txt
new file mode 100644
index 0000000..a672034
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/lastesttest/Tips3/Tips3.txt
@@ -0,0 +1,2 @@
+1 CAGCCT
+2 CAGCCA
diff --git a/genomix/genomix-hadoop/data/webmap/lastesttest/Tips4/Tips4.txt b/genomix/genomix-hadoop/data/webmap/lastesttest/Tips4/Tips4.txt
new file mode 100644
index 0000000..499e8e6
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/lastesttest/Tips4/Tips4.txt
@@ -0,0 +1,2 @@
+1 CAGGCA
+2 CAGGCC
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/2/2 b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/2/2
new file mode 100644
index 0000000..0f501fe
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/2/2
@@ -0,0 +1 @@
+1 AATA
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/2~ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/2~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/2~
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/3/3 b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/3/3
new file mode 100644
index 0000000..b90246c
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/3/3
@@ -0,0 +1 @@
+1 AATAG
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/3~ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/3~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/3~
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/4/4 b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/4/4
new file mode 100644
index 0000000..3f1cd5c
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/4/4
@@ -0,0 +1 @@
+1 AATAGA
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/4~ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/4~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/4~
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/5/5 b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/5/5
new file mode 100644
index 0000000..a720dc4
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/5/5
@@ -0,0 +1 @@
+1 AATAGAA
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/5~ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/5~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/5~
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/6/6 b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/6/6
new file mode 100644
index 0000000..7a95b7c
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/6/6
@@ -0,0 +1 @@
+1 AATAGAAC
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/6~ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/6~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/6~
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/7/7 b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/7/7
new file mode 100644
index 0000000..ce4b8a8
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/7/7
@@ -0,0 +1 @@
+1 AATAGAACT
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/7~ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/7~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/7~
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/8/8 b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/8/8
new file mode 100644
index 0000000..3959d4d
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/8/8
@@ -0,0 +1 @@
+1 AATAGAACTT
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/8~ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/8~
new file mode 100644
index 0000000..89ead1e
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/8~
@@ -0,0 +1 @@
+1 AATAGAACTTA
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/9/9 b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/9/9
new file mode 100644
index 0000000..89ead1e
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/9/9
@@ -0,0 +1 @@
+1 AATAGAACTTA
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/9~ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/9~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/9~
diff --git a/genomix/genomix-hadoop/data/webmap/test.txt b/genomix/genomix-hadoop/data/webmap/test.txt
new file mode 100644
index 0000000..990dbd1
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/test.txt
@@ -0,0 +1,3 @@
+1 AATAGAAG
+2 TATAGACC
+3 CATAGATT
diff --git a/genomix/genomix-hadoop/data/webmap/text.txt b/genomix/genomix-hadoop/data/webmap/text.txt
new file mode 100755
index 0000000..01c49e5
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/text.txt
@@ -0,0 +1,6 @@
+1 AATAGAAG
+2 AATAGCTT
+3 AATAGAAG
+4 AATAGCTT
+5 AATAGAAG
+6 AGAAGAAG
diff --git a/genomix/genomix-hadoop/pom.xml b/genomix/genomix-hadoop/pom.xml
new file mode 100755
index 0000000..f305c49
--- /dev/null
+++ b/genomix/genomix-hadoop/pom.xml
@@ -0,0 +1,186 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>genomix-hadoop</artifactId>
+ <name>genomix-hadoop</name>
+
+ <parent>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>genomix</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ </parent>
+
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ </properties>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <configuration>
+ <descriptorRefs>
+ <descriptorRef>jar-with-dependencies</descriptorRef>
+ </descriptorRefs>
+ </configuration>
+ <executions>
+ <execution>
+ <id>make-my-jar-with-dependencies</id>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>appassembler-maven-plugin</artifactId>
+ <executions>
+ <execution>
+ <configuration>
+ <programs>
+ <program>
+ <mainClass>edu.uci.ics.maxclique.Driver</mainClass>
+ <name>maxclique</name>
+ </program>
+ </programs>
+ <repositoryLayout>flat</repositoryLayout>
+ <repositoryName>lib</repositoryName>
+ </configuration>
+ <phase>package</phase>
+ <goals>
+ <goal>assemble</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>2.7.2</version>
+ <configuration>
+ <forkMode>pertest</forkMode>
+ <argLine>-enableassertions -Xmx512m -XX:MaxPermSize=300m
+ -Dfile.encoding=UTF-8
+ -Djava.util.logging.config.file=src/test/resources/logging.properties</argLine>
+ <includes>
+ <include>**/*TestSuite.java</include>
+ <include>**/*Test.java</include>
+ </includes>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-clean-plugin</artifactId>
+ <configuration>
+ <filesets>
+ <fileset>
+ <directory>.</directory>
+ <includes>
+ <include>teststore*</include>
+ <include>edu*</include>
+ <include>build*</include>
+ <include>log*</include>
+ <include>ClusterController*</include>
+ </includes>
+ </fileset>
+ </filesets>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.8.1</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ <version>0.20.2</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>0.20.2</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.kenai.nbpwr</groupId>
+ <artifactId>org-apache-commons-io</artifactId>
+ <version>1.3.1-201002241208</version>
+ <type>nbm</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-jcl</artifactId>
+ <version>1.6.3</version>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ <version>1.6.3</version>
+ </dependency>
+ <dependency>
+ <groupId>args4j</groupId>
+ <artifactId>args4j</artifactId>
+ <version>2.0.16</version>
+ </dependency>
+ <dependency>
+ <groupId>com.kenai.nbpwr</groupId>
+ <artifactId>org-apache-commons-io</artifactId>
+ <version>1.3.1-201002241208</version>
+ <type>nbm</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>genomix-data</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>genomix-hyracks</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-hdfs-core</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-hdfs-core</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.mrunit</groupId>
+ <artifactId>mrunit</artifactId>
+ <version>1.0.0</version>
+ <classifier>hadoop1</classifier>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixDriver.java
new file mode 100644
index 0000000..d9e4876
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixDriver.java
@@ -0,0 +1,83 @@
+package edu.uci.ics.genomix.hadoop.contrailgraphbuilding;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+
+@SuppressWarnings("deprecation")
+public class GenomixDriver {
+
+ private static class Options {
+ @Option(name = "-inputpath", usage = "the input path", required = true)
+ public String inputPath;
+
+ @Option(name = "-outputpath", usage = "the output path", required = true)
+ public String outputPath;
+
+ @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+ public int numReducers;
+
+ @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+ public int sizeKmer;
+
+// @Option(name = "-read-length", usage = "the length of read", required = true)
+// public int readLength;
+ }
+
+ public void run(String inputPath, String outputPath, int numReducers, int sizeKmer,
+ boolean seqOutput, String defaultConfPath) throws IOException{
+ JobConf conf = new JobConf(GenomixDriver.class);
+ conf.setInt("sizeKmer", sizeKmer);
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+
+ conf.setJobName("Genomix Graph Building");
+ conf.setMapperClass(GenomixMapper.class);
+ conf.setReducerClass(GenomixReducer.class);
+
+ conf.setMapOutputKeyClass(VKmerBytesWritable.class);
+ conf.setMapOutputValueClass(NodeWritable.class);
+
+ //InputFormat and OutputFormat for Reducer
+ conf.setInputFormat(TextInputFormat.class);
+ if (seqOutput == true)
+ conf.setOutputFormat(SequenceFileOutputFormat.class);
+ else
+ conf.setOutputFormat(TextOutputFormat.class);
+
+ //Output Key/Value Class
+ conf.setOutputKeyClass(VKmerBytesWritable.class);
+ conf.setOutputValueClass(NodeWritable.class);
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath));
+ FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+ conf.setNumReduceTasks(numReducers);
+
+ FileSystem dfs = FileSystem.get(conf);
+ dfs.delete(new Path(outputPath), true);
+ JobClient.runJob(conf);
+ }
+
+ public static void main(String[] args) throws Exception {
+ Options options = new Options();
+ CmdLineParser parser = new CmdLineParser(options);
+ parser.parseArgument(args);
+ GenomixDriver driver = new GenomixDriver();
+ driver.run(options.inputPath, options.outputPath, options.numReducers, options.sizeKmer, true, null);
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
new file mode 100644
index 0000000..39a7535
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
@@ -0,0 +1,246 @@
+package edu.uci.ics.genomix.hadoop.contrailgraphbuilding;
+
+import java.io.IOException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerListWritable;
+import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.genomix.type.PositionListWritable;
+import edu.uci.ics.genomix.type.PositionWritable;
+
+@SuppressWarnings("deprecation")
+public class GenomixMapper extends MapReduceBase implements
+ Mapper<LongWritable, Text, VKmerBytesWritable, NodeWritable>{
+
+ public static enum KmerDir{
+ FORWARD,
+ REVERSE,
+ }
+
+ public static int KMER_SIZE;
+ private VKmerBytesWritable preForwardKmer;
+ private VKmerBytesWritable preReverseKmer;
+ private VKmerBytesWritable curForwardKmer;
+ private VKmerBytesWritable curReverseKmer;
+ private VKmerBytesWritable nextForwardKmer;
+ private VKmerBytesWritable nextReverseKmer;
+ private PositionWritable nodeId;
+ private PositionListWritable nodeIdList;
+ private VKmerListWritable edgeListForPreKmer;
+ private VKmerListWritable edgeListForNextKmer;
+ private NodeWritable outputNode;
+
+ private KmerDir preKmerDir;
+ private KmerDir curKmerDir;
+ private KmerDir nextKmerDir;
+
+ byte mateId = (byte)0;
+
+ @Override
+ public void configure(JobConf job) {
+ KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
+ preForwardKmer = new VKmerBytesWritable();
+ preReverseKmer = new VKmerBytesWritable();
+ curForwardKmer = new VKmerBytesWritable();
+ curReverseKmer = new VKmerBytesWritable();
+ nextForwardKmer = new VKmerBytesWritable();
+ nextReverseKmer = new VKmerBytesWritable();
+ nodeId = new PositionWritable();
+ nodeIdList = new PositionListWritable();
+ edgeListForPreKmer = new VKmerListWritable();
+ edgeListForNextKmer = new VKmerListWritable();
+ outputNode = new NodeWritable();
+ preKmerDir = KmerDir.FORWARD;
+ curKmerDir = KmerDir.FORWARD;
+ nextKmerDir = KmerDir.FORWARD;
+ }
+
+ @Override
+ public void map(LongWritable key, Text value, OutputCollector<VKmerBytesWritable, NodeWritable> output,
+ Reporter reporter) throws IOException {
+ String[] rawLine = value.toString().split("\\t"); // Read the Real Gene Line
+ if (rawLine.length != 2) {
+ throw new IOException("invalid data");
+ }
+ int readID = 0;
+ readID = Integer.parseInt(rawLine[0]);
+ String geneLine = rawLine[1];
+ Pattern genePattern = Pattern.compile("[AGCT]+");
+ Matcher geneMatcher = genePattern.matcher(geneLine);
+ boolean isValid = geneMatcher.matches();
+ if (isValid == true) {
+ byte[] array = geneLine.getBytes();
+ if (KMER_SIZE >= array.length) {
+ throw new IOException("short read");
+ }
+ /** first kmer **/
+ outputNode.reset();
+ curForwardKmer.setByRead(KMER_SIZE, array, 0);
+ curReverseKmer.setByReadReverse(KMER_SIZE, array, 0);
+ curKmerDir = curForwardKmer.compareTo(curReverseKmer) <= 0 ? KmerDir.FORWARD : KmerDir.REVERSE;
+ setNextKmer(array[KMER_SIZE]);
+ //set value.nodeId
+ setNodeId(mateId, readID, 1);
+ //set value.edgeList
+ setEdgeListForNextKmer();
+ //set coverage = 1
+ outputNode.setAvgCoverage(1);
+ //output mapper result
+ setMapperOutput(output);
+
+ /** middle kmer **/
+ for (int i = KMER_SIZE + 1; i < array.length; i++) {
+ outputNode.reset();
+ setPreKmerByOldCurKmer();
+ setCurKmerByOldNextKmer();
+ setNextKmer(array[i]);
+ //set value.nodeId
+ setNodeId(mateId, readID, i - KMER_SIZE + 1);
+ //set value.edgeList
+ setEdgeListForPreKmer();
+ setEdgeListForNextKmer();
+ //set coverage = 1
+ outputNode.setAvgCoverage(1);
+ //output mapper result
+ setMapperOutput(output);
+ }
+
+ /** last kmer **/
+ outputNode.reset();
+ setPreKmerByOldCurKmer();
+ setCurKmerByOldNextKmer();
+ //set value.nodeId
+ setNodeId(mateId, readID, array.length - KMER_SIZE + 1);
+ //set value.edgeList
+ setEdgeListForPreKmer();
+ //set coverage = 1
+ outputNode.setAvgCoverage(1);
+ //output mapper result
+ setMapperOutput(output);
+ }
+ }
+
+ public void setNodeId(byte mateId, long readID, int posId){
+ nodeId.set(mateId, readID, posId);
+ nodeIdList.reset();
+ nodeIdList.append(nodeId);
+ outputNode.setNodeIdList(nodeIdList);
+ }
+
+ public void setEdgeListForPreKmer(){
+ switch(curKmerDir){
+ case FORWARD:
+ switch(preKmerDir){
+ case FORWARD:
+ edgeListForPreKmer.reset();
+ edgeListForPreKmer.append(preForwardKmer);
+ outputNode.setRRList(edgeListForPreKmer);
+ break;
+ case REVERSE:
+ edgeListForPreKmer.reset();
+ edgeListForPreKmer.append(preReverseKmer);
+ outputNode.setRFList(edgeListForPreKmer);
+ break;
+ }
+ break;
+ case REVERSE:
+ switch(preKmerDir){
+ case FORWARD:
+ edgeListForPreKmer.reset();
+ edgeListForPreKmer.append(preForwardKmer);
+ outputNode.setFRList(edgeListForPreKmer);
+ break;
+ case REVERSE:
+ edgeListForPreKmer.reset();
+ edgeListForPreKmer.append(preReverseKmer);
+ outputNode.setFFList(edgeListForPreKmer);
+ break;
+ }
+ break;
+ }
+ }
+
+ public void setEdgeListForNextKmer(){
+ switch(curKmerDir){
+ case FORWARD:
+ switch(nextKmerDir){
+ case FORWARD:
+ edgeListForNextKmer.reset();
+ edgeListForNextKmer.append(nextForwardKmer);
+ outputNode.setFFList(edgeListForNextKmer);
+ break;
+ case REVERSE:
+ edgeListForNextKmer.reset();
+ edgeListForNextKmer.append(nextReverseKmer);
+ outputNode.setFRList(edgeListForNextKmer);
+ break;
+ }
+ break;
+ case REVERSE:
+ switch(nextKmerDir){
+ case FORWARD:
+ edgeListForNextKmer.reset();
+ edgeListForNextKmer.append(nextForwardKmer);
+ outputNode.setRFList(edgeListForNextKmer);
+ break;
+ case REVERSE:
+ edgeListForNextKmer.reset();
+ edgeListForNextKmer.append(nextReverseKmer);
+ outputNode.setRRList(edgeListForNextKmer);
+ break;
+ }
+ break;
+ }
+ }
+
+ //set preKmer by shifting curKmer with preChar
+ public void setPreKmer(byte preChar){
+ preForwardKmer.setAsCopy(curForwardKmer);
+ preForwardKmer.shiftKmerWithPreChar(preChar);
+ preReverseKmer.setByReadReverse(KMER_SIZE, preForwardKmer.toString().getBytes(), preForwardKmer.getBlockOffset());
+ preKmerDir = preForwardKmer.compareTo(preReverseKmer) <= 0 ? KmerDir.FORWARD : KmerDir.REVERSE;
+ }
+
+ //set nextKmer by shifting curKmer with nextChar
+ public void setNextKmer(byte nextChar){
+ nextForwardKmer.setAsCopy(curForwardKmer);
+ nextForwardKmer.shiftKmerWithNextChar(nextChar);
+ nextReverseKmer.setByReadReverse(KMER_SIZE, nextForwardKmer.toString().getBytes(), nextForwardKmer.getBlockOffset());
+ nextKmerDir = nextForwardKmer.compareTo(nextReverseKmer) <= 0 ? KmerDir.FORWARD : KmerDir.REVERSE;
+ }
+
+ //old curKmer becomes current preKmer
+ public void setPreKmerByOldCurKmer(){
+ preKmerDir = curKmerDir;
+ preForwardKmer.setAsCopy(curForwardKmer);
+ preReverseKmer.setAsCopy(curReverseKmer);
+ }
+
+ //old nextKmer becomes current curKmer
+ public void setCurKmerByOldNextKmer(){
+ curKmerDir = nextKmerDir;
+ curForwardKmer.setAsCopy(nextForwardKmer);
+ curReverseKmer.setAsCopy(nextReverseKmer);
+ }
+
+ public void setMapperOutput(OutputCollector<VKmerBytesWritable, NodeWritable> output) throws IOException{
+ switch(curKmerDir){
+ case FORWARD:
+ output.collect(curForwardKmer, outputNode);
+ break;
+ case REVERSE:
+ output.collect(curReverseKmer, outputNode);
+ break;
+ }
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
new file mode 100644
index 0000000..e8e41c4
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
@@ -0,0 +1,51 @@
+package edu.uci.ics.genomix.hadoop.contrailgraphbuilding;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+@SuppressWarnings("deprecation")
+public class GenomixReducer extends MapReduceBase implements
+ Reducer<VKmerBytesWritable, NodeWritable, VKmerBytesWritable, NodeWritable>{
+
+ public static int KMER_SIZE;
+ private NodeWritable outputNode;
+ private NodeWritable tmpNode;
+ private float averageCoverage;
+
+ @Override
+ public void configure(JobConf job) {
+ KMER_SIZE = GenomixMapper.KMER_SIZE;
+ outputNode = new NodeWritable();
+ tmpNode = new NodeWritable();
+ }
+
+ @Override
+ public void reduce(VKmerBytesWritable key, Iterator<NodeWritable> values,
+ OutputCollector<VKmerBytesWritable, NodeWritable> output,
+ Reporter reporter) throws IOException {
+ outputNode.reset();
+ averageCoverage = 0;
+
+ while (values.hasNext()) {
+ tmpNode.set(values.next());
+ outputNode.getNodeIdList().unionUpdate(tmpNode.getNodeIdList());
+ outputNode.getFFList().unionUpdate(tmpNode.getFFList()); //appendList need to check if insert node exists
+ outputNode.getFRList().unionUpdate(tmpNode.getFRList());
+ outputNode.getRFList().unionUpdate(tmpNode.getRFList());
+ outputNode.getRRList().unionUpdate(tmpNode.getRRList());
+ averageCoverage += tmpNode.getAvgCoverage();
+ }
+ outputNode.setAvgCoverage(averageCoverage);
+ output.collect(key,outputNode);
+ }
+
+}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestCase.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestCase.java
new file mode 100644
index 0000000..44f3168
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestCase.java
@@ -0,0 +1,70 @@
+package edu.uci.ics.genomix.hadoop.contrailgraphbuilding;
+
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.hadoop.graph.GenerateGraphViz;
+import edu.uci.ics.genomix.hadoop.pmcommon.HadoopMiniClusterTest;
+
+
+@SuppressWarnings({ "deprecation", "unused" })
+public class GraphBuildingTestCase extends TestCase{
+
+ private final String RESULT_PATH;
+ private final String HADOOP_CONF_PATH;
+ private final String HDFS_INPUTPATH;
+ private FileSystem dfs;
+ private JobConf conf;
+
+ private static final int COUNT_REDUCER = 1;
+ private final int SIZE_KMER;
+
+ public GraphBuildingTestCase(String resultFileDir, String hadoopConfPath,
+ String hdfsInputPath, int kmerSize, FileSystem dfs, JobConf conf){
+ super("test");
+ this.RESULT_PATH = resultFileDir;
+ this.HADOOP_CONF_PATH = hadoopConfPath;
+ this.HDFS_INPUTPATH = hdfsInputPath;
+ this.SIZE_KMER = kmerSize;
+ this.dfs = dfs;
+ this.conf = conf;
+ }
+
+ private void waitawhile() throws InterruptedException {
+ synchronized (this) {
+ this.wait(20);
+ }
+ }
+
+ @Test
+ public void test() throws Exception {
+ setUp();
+ TestMapKmerToNode();
+ tearDown();
+ waitawhile();
+ }
+
+ public void TestMapKmerToNode() throws Exception {
+ GenomixDriver driver = new GenomixDriver();
+ driver.run(HDFS_INPUTPATH, RESULT_PATH, COUNT_REDUCER, SIZE_KMER, true, HADOOP_CONF_PATH);
+ dumpResult();
+ }
+
+
+
+ private void dumpResult() throws Exception {
+// Path src = new Path(RESULT_PATH);
+// Path dest = new Path(RESULT_PATH);
+// dfs.copyToLocalFile(src, dest);
+ HadoopMiniClusterTest.copyResultsToLocal(RESULT_PATH, RESULT_PATH + "/test.txt", false, conf, true, dfs);
+ GenerateGraphViz.convertGraphBuildingOutputToGraphViz(RESULT_PATH + "/test.txt.bindir", RESULT_PATH + "/graphviz");
+ }
+}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestSuite.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestSuite.java
new file mode 100644
index 0000000..30e75cf
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestSuite.java
@@ -0,0 +1,129 @@
+package edu.uci.ics.genomix.hadoop.contrailgraphbuilding;
+
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import junit.framework.TestSuite;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+
+
+@SuppressWarnings("deprecation")
+public class GraphBuildingTestSuite extends TestSuite{
+
+ private static int SIZE_KMER = 3;
+ public static final String PreFix = "data/webmap/lastesttest";
+ public static final String[] TestDir = { PreFix + File.separator
+// + "2", PreFix + File.separator
+// + "3", PreFix + File.separator
+// + "4", PreFix + File.separator
+// + "5", PreFix + File.separator
+// + "6", PreFix + File.separator
+// + "7", PreFix + File.separator
+// + "8", PreFix + File.separator
+// + "9"};
+ + "HighSplitRepeat", PreFix + File.separator
+ + "LowSplitRepeat", PreFix + File.separator
+ + "MidSplitRepeat", PreFix + File.separator
+ + "Tips1", PreFix + File.separator
+ + "Tips2", PreFix + File.separator
+ + "Tips3", PreFix + File.separator
+ + "Tips4"};
+
+ private static JobConf conf = new JobConf();
+ private static final String ACTUAL_RESULT_DIR = "actual";
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private static final String HDFS_INPUTPATH = "/webmap";
+
+ private MiniDFSCluster dfsCluster;
+ private MiniMRCluster mrCluster;
+ private static FileSystem dfs;
+
+ public void setUp() throws Exception{
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHadoop();
+ }
+
+ private void startHadoop() throws IOException {
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, 1, true, null);
+ dfs = dfsCluster.getFileSystem();
+ mrCluster = new MiniMRCluster(1, dfs.getUri().toString(), 1);
+
+ for (String testDir : TestDir) {
+ File src = new File(testDir);
+ Path dest = new Path(HDFS_INPUTPATH + File.separator + src.getName());
+ dfs.mkdirs(dest);
+ for (File f : src.listFiles()) {
+ dfs.copyFromLocalFile(new Path(f.getAbsolutePath()), dest);
+ }
+ }
+//
+// Path src = new Path(DATA_PATH);
+// Path dest = new Path(HDFS_PATH + "/");
+// dfs.mkdirs(dest);
+// dfs.copyFromLocalFile(src, dest);
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ public void tearDown() throws Exception {
+ cleanupHadoop();
+ }
+
+ private void cleanupHadoop() throws IOException {
+ mrCluster.shutdown();
+ dfsCluster.shutdown();
+ }
+
+ public static Test suite() throws Exception {
+ GraphBuildingTestSuite testSuite = new GraphBuildingTestSuite();
+ testSuite.setUp();
+// FileSystem dfs = FileSystem.get(testSuite.conf);
+ for (String testPathStr : TestDir) {
+ File testDir = new File(testPathStr);
+ String resultFileName = ACTUAL_RESULT_DIR + File.separator +
+ "bin" + File.separator + testDir.getName();
+ testSuite.addTest(new GraphBuildingTestCase(resultFileName, HADOOP_CONF_PATH,
+ HDFS_INPUTPATH + File.separator + testDir.getName(), SIZE_KMER, dfs, conf));
+ }
+ return testSuite;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+ try {
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ // cleanupStores();
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+ tearDown();
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/SingleGraphBuildingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/SingleGraphBuildingTest.java
new file mode 100644
index 0000000..5ee5593
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/SingleGraphBuildingTest.java
@@ -0,0 +1,81 @@
+package edu.uci.ics.genomix.hadoop.contrailgraphbuilding;
+
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.hadoop.pmcommon.HadoopMiniClusterTest;
+
+@SuppressWarnings("deprecation")
+public class SingleGraphBuildingTest {
+
+ private JobConf conf = new JobConf();
+ private static final String ACTUAL_RESULT_DIR = "actual";
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private static final String DATA_PATH = "data/webmap/RemoveBridge.txt";
+ private static final String HDFS_PATH = "/webmap";
+ private static final String RESULT_PATH = "/result";
+
+ private static final int COUNT_REDUCER = 1;
+ private static final int SIZE_KMER = 3;
+
+ private MiniDFSCluster dfsCluster;
+ private MiniMRCluster mrCluster;
+ private FileSystem dfs;
+
+ @Test
+ public void test() throws Exception {
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHadoop();
+ TestMapKmerToNode();
+ cleanupHadoop();
+ }
+
+ public void TestMapKmerToNode() throws Exception {
+ GenomixDriver driver = new GenomixDriver();
+ driver.run(HDFS_PATH, RESULT_PATH, COUNT_REDUCER, SIZE_KMER, true, HADOOP_CONF_PATH);
+ dumpResult();
+ }
+
+ private void startHadoop() throws IOException {
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, 1, true, null);
+ dfs = dfsCluster.getFileSystem();
+ mrCluster = new MiniMRCluster(1, dfs.getUri().toString(), 1);
+
+ Path src = new Path(DATA_PATH);
+ Path dest = new Path(HDFS_PATH + "/");
+ dfs.mkdirs(dest);
+ dfs.copyFromLocalFile(src, dest);
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanupHadoop() throws IOException {
+ mrCluster.shutdown();
+ dfsCluster.shutdown();
+ }
+
+ private void dumpResult() throws IOException {
+ Path src = new Path(RESULT_PATH);
+ Path dest = new Path(ACTUAL_RESULT_DIR);
+ dfs.copyToLocalFile(src, dest);
+ HadoopMiniClusterTest.copyResultsToLocal(RESULT_PATH, "actual/test.txt", false, conf, true, dfs);
+ }
+}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java
new file mode 100644
index 0000000..3788a27
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java
@@ -0,0 +1,243 @@
+package edu.uci.ics.genomix.hadoop.pmcommon;
+
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.ArrayUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+//import edu.uci.ics.genomix.hadoop.velvetgraphbuilding.GraphBuildingDriver;
+import edu.uci.ics.genomix.hyracks.newgraph.test.TestUtils;
+
+/*
+ * A base class providing most of the boilerplate for Hadoop-based tests
+ */
+@SuppressWarnings("deprecation")
+public class HadoopMiniClusterTest {
+ protected int KMER_LENGTH = 5;
+ protected int READ_LENGTH = 8;
+
+ // subclass should modify this to include the HDFS directories that should be cleaned up
+ protected ArrayList<String> HDFS_PATHS = new ArrayList<String>();
+
+ protected static final String EXPECTED_ROOT = "src/test/resources/expected/";
+ protected static final String ACTUAL_ROOT = "src/test/resources/actual/";
+ protected static final String INPUT_ROOT = "src/test/resources/input/";
+
+ protected static String HADOOP_CONF_ROOT = "src/test/resources/hadoop/conf/";
+ protected static String HADOOP_CONF = HADOOP_CONF_ROOT + "conf.xml";
+
+ protected static MiniDFSCluster dfsCluster;
+ protected static MiniMRCluster mrCluster;
+ protected static FileSystem dfs;
+ protected static JobConf conf = new JobConf();
+ protected static int numberOfNC = 1;
+ protected static int numPartitionPerMachine = 1;
+
+ @BeforeClass
+ public static void setUpMiniCluster() throws Exception {
+ cleanupStores();
+ startHDFS();
+ FileUtils.forceMkdir(new File(ACTUAL_ROOT));
+ FileUtils.cleanDirectory(new File(ACTUAL_ROOT));
+ }
+
+ protected static void copyResultsToLocal(String hdfsSrcDir, String localDestFile, boolean resultsAreText,
+ Configuration conf) throws IOException {
+ copyResultsToLocal(hdfsSrcDir, localDestFile, resultsAreText, conf, true);
+ }
+
+ public static void copyResultsToLocal(String hdfsSrcDir, String localDestFile, boolean resultsAreText,
+ Configuration conf, boolean ignoreZeroOutputs) throws IOException {
+ copyResultsToLocal(hdfsSrcDir, localDestFile, resultsAreText,
+ conf, ignoreZeroOutputs, dfs);
+ }
+
+ /*
+ * Merge and copy a DFS directory to a local destination, converting to text if necessary.
+ * Also locally store the binary-formatted result if available.
+ */
+ public static void copyResultsToLocal(String hdfsSrcDir, String localDestFile, boolean resultsAreText,
+ Configuration conf, boolean ignoreZeroOutputs, FileSystem dfs) throws IOException {
+ if (resultsAreText) {
+ // for text files, just concatenate them together
+ FileUtil.copyMerge(FileSystem.get(conf), new Path(hdfsSrcDir), FileSystem.getLocal(new Configuration()),
+ new Path(localDestFile), false, conf, null);
+ } else {
+ // file is binary
+ // save the entire binary output dir
+ FileUtil.copy(FileSystem.get(conf), new Path(hdfsSrcDir), FileSystem.getLocal(new Configuration()),
+ new Path(localDestFile + ".bindir"), false, conf);
+
+ // chomp through output files
+ FileStatus[] files = ArrayUtils.addAll(dfs.globStatus(new Path(hdfsSrcDir + "*")), dfs.globStatus(new Path(hdfsSrcDir + "*/*")));
+ FileStatus validFile = null;
+ for (FileStatus f : files) {
+ if (f.getLen() != 0) {
+ validFile = f;
+ break;
+ }
+ }
+ if (validFile == null) {
+ if (ignoreZeroOutputs) {
+ // just make a dummy output dir
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.mkdirs(new Path(localDestFile).getParent());
+ return;
+ }
+ else {
+ throw new IOException("No non-zero outputs in source directory " + hdfsSrcDir);
+ }
+ }
+
+ // also load the Nodes and write them out as text locally.
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.mkdirs(new Path(localDestFile).getParent());
+ File filePathTo = new File(localDestFile);
+ if (filePathTo.exists() && filePathTo.isDirectory()) {
+ filePathTo = new File(localDestFile + "/data");
+ }
+ BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
+ SequenceFile.Reader reader = new SequenceFile.Reader(dfs, validFile.getPath(), conf);
+ SequenceFile.Writer writer = new SequenceFile.Writer(lfs, new JobConf(), new Path(localDestFile
+ + ".binmerge"), reader.getKeyClass(), reader.getValueClass());
+
+ Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
+
+ for (FileStatus f : files) {
+ if (f.getLen() == 0) {
+ continue;
+ }
+ reader = new SequenceFile.Reader(dfs, f.getPath(), conf);
+ while (reader.next(key, value)) {
+ if (key == null || value == null) {
+ break;
+ }
+ bw.write(key.toString() + "\t" + value.toString());
+ System.out.println(key.toString() + "\t" + value.toString());
+ bw.newLine();
+ writer.append(key, value);
+
+ }
+ reader.close();
+ }
+ writer.close();
+ bw.close();
+ }
+
+ }
+
+ protected static boolean checkResults(String expectedPath, String actualPath, int[] poslistField) throws Exception {
+ File dumped = new File(actualPath);
+ if (poslistField != null) {
+ TestUtils.compareWithUnSortedPosition(new File(expectedPath), dumped, poslistField);
+ } else {
+ TestUtils.compareWithSortedResult(new File(expectedPath), dumped);
+ }
+ return true;
+ }
+
+ protected static void cleanupStores() throws IOException {
+ FileUtils.forceMkdir(new File("teststore"));
+ FileUtils.forceMkdir(new File("build"));
+ FileUtils.cleanDirectory(new File("teststore"));
+ FileUtils.cleanDirectory(new File("build"));
+ }
+
+ protected static void startHDFS() throws IOException {
+// conf.addResource(new Path(HADOOP_CONF_ROOT + "core-site.xml"));
+ // conf.addResource(new Path(HADOOP_CONF_ROOT + "mapred-site.xml"));
+// conf.addResource(new Path(HADOOP_CONF_ROOT + "hdfs-site.xml"));
+
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
+ dfs = dfsCluster.getFileSystem();
+ mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
+ System.out.println(dfs.getUri().toString());
+
+ DataOutputStream confOutput = new DataOutputStream(
+ new FileOutputStream(new File(HADOOP_CONF)));
+ conf.writeXml(confOutput);
+ confOutput.close();
+ }
+
+ protected static void copyLocalToDFS(String localSrc, String hdfsDest) throws IOException {
+ Path dest = new Path(hdfsDest);
+ dfs.mkdirs(dest);
+ System.out.println("copying from " + localSrc + " to " + dest);
+ for (File f : new File(localSrc).listFiles()) {
+ dfs.copyFromLocalFile(new Path(f.getAbsolutePath()), dest);
+ }
+ }
+
+ /*
+ * Remove the local "actual" folder and any hdfs folders in use by this test
+ */
+ public void cleanUpOutput() throws IOException {
+ // local cleanup
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ if (lfs.exists(new Path(ACTUAL_ROOT))) {
+ lfs.delete(new Path(ACTUAL_ROOT), true);
+ }
+ // dfs cleanup
+ for (String path : HDFS_PATHS) {
+ if (dfs.exists(new Path(path))) {
+ dfs.delete(new Path(path), true);
+ }
+ }
+ }
+
+ @AfterClass
+ public static void tearDown() throws Exception {
+ cleanupHDFS();
+ }
+
+ protected static void cleanupHDFS() throws Exception {
+ dfsCluster.shutdown();
+ mrCluster.shutdown();
+ }
+
+// public void buildGraph() throws IOException {
+// JobConf buildConf = new JobConf(conf); // use a separate conf so we don't interfere with other jobs
+// FileInputFormat.setInputPaths(buildConf, SEQUENCE);
+// FileOutputFormat.setOutputPath(buildConf, new Path(INPUT_GRAPH));
+//
+// GraphBuildingDriver tldriver = new GraphBuildingDriver();
+// tldriver.run(SEQUENCE, INPUT_GRAPH, 2, kmerByteSize, READ_LENGTH, false, true, HADOOP_CONF_ROOT + "conf.xml");
+//
+// boolean resultsAreText = true;
+// copyResultsToLocal(INPUT_GRAPH, ACTUAL_ROOT + INPUT_GRAPH, resultsAreText, buildConf);
+// }
+//
+// private void prepareGraph() throws IOException {
+// if (regenerateGraph) {
+// copyLocalToDFS(LOCAL_SEQUENCE_FILE, SEQUENCE);
+// buildGraph();
+// copyLocalToDFS(ACTUAL_ROOT + INPUT_GRAPH + readsFile + ".binmerge", INPUT_GRAPH);
+// } else {
+// copyLocalToDFS(EXPECTED_ROOT + INPUT_GRAPH + readsFile + ".binmerge", INPUT_GRAPH);
+// }
+// }
+
+}
diff --git a/genomix/genomix-hadoop/src/test/python/convert_graphviz.py b/genomix/genomix-hadoop/src/test/python/convert_graphviz.py
new file mode 100755
index 0000000..39ca660
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/python/convert_graphviz.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+"""
+Convert a graph to graphviz format and run `dot` on it.
+
+Kmer sequences are included
+"""
+
+__author__ = "Jacob Biesinger"
+__copyright__ = "Copyright 2009-2013, The Regents of the University of California"
+__license__ = "Apache"
+
+
+import sys
+import os
+import re
+import string
+import argparse
+
+import pydot
+
+
+element_re = re.compile(r"\d+,\d+|\w+")
+#edge_colors = dict(FF='black', FR='red', RF='blue', RR='gray')
+edge_colors = dict(FF='#DD1E2F', FR='#EBB035', RF='#06A2CB', RR='#218559')
+
+
+def reverse_complement(kmer, _table=string.maketrans('ACGT', 'TGCA')):
+ return string.translate(kmer, _table)[::-1]
+
+
+def add_legend(graph):
+ legend = pydot.Subgraph('cluster_legend', splines='line', rankdir='LR', label='legend', rank='min')
+ for i, (edgetype, edgecolor) in enumerate(sorted(edge_colors.items())):
+ legend.add_node(pydot.Node('legend_0_' + str(i), label='', shape='point'))
+ legend.add_node(pydot.Node('legend_1_' + str(i), label='', shape='point'))
+ legend.add_edge(pydot.Edge('legend_0_' + str(i), 'legend_1_' + str(i), label=edgetype, color=edgecolor))
+ graph.add_subgraph(legend)
+ return graph
+
+
+def graph_from_file(filename, legend=True, kmers=True, flag=True):
+ graph_name = os.path.split(filename)[1].replace('.', '_')
+ graph = pydot.Dot(graph_name, graph_type='digraph', rankdir='LR', splines='ortho', weight='2')
+ if legend:
+ add_legend(graph)
+
+ # annoyingly, order matters. add nodes before any edges or else properties aren't set right
+ nodes = {}
+ edges = []
+ for line in open(filename):
+ nodeid, ff, fr, rf, rr, kmer, flag = map(element_re.findall, line.strip().split('\t'))
+ nodeid, kmer, flag = nodeid[0], kmer[0], flag[0]
+ readid = nodeid.split(',')[0]
+ flag = '--%s' % flag if flag else ''
+ FF_kmer = '<TR><TD BGCOLOR="%s">%s</TD></TR>' % (edge_colors['FF'], kmer) if kmers else ''
+ RR_kmer = '<TR><TD BGCOLOR="%s">%s</TD></TR>' % (edge_colors['RR'], reverse_complement(kmer)) if kmers else ''
+ node_label = r'''<<FONT POINT-SIZE="10"><TABLE ALIGN="CENTER" BORDER="0" CELLBORDER="0" CELLSPACING="0">
+ <TR><TD>{nodeid}{flag}</TD></TR>
+ {FF_kmer}
+ {RR_kmer}
+ </TABLE></FONT>>'''.format(**locals())
+ node = pydot.Node(nodeid, rank=readid, group=readid, label=node_label)
+ nodes.setdefault(readid, []).append(node)
+ for edgename, edgelist in [('FF', ff), ('FR', fr), ('RF', rf), ('RR', rr)]:
+ for e in edgelist:
+ edges.append(pydot.Edge(nodeid, e, color=edge_colors[edgename]))
+
+ for readid, subnodes in nodes.items():
+ subg = pydot.Subgraph('cluster_' + readid, fillcolor='lightgray')
+ for node in subnodes:
+ subg.add_node(node)
+ graph.add_subgraph(subg)
+
+ for e in edges:
+ graph.add_edge(e)
+
+ return graph
+
+def recursive_plot(topdir, suffix='.txt', **kwargs):
+ "Recursively plot any files matching `suffix`"
+ out_type = kwargs.get('out_type', 'svg')
+ for root, dirnames, filenames in os.walk(topdir):
+ for filename in filenames:
+ f = os.path.join(root, filename)
+ if not os.path.isfile(f) or not f.endswith(suffix):
+ continue
+ try:
+ graph = graph_from_file(f, **kwargs)
+ except Exception:
+ pass
+ else:
+ print 'plotting', f +'.' + out_type
+ graph.write(f + '.' + out_type, format=out_type)
+
+
+def get_parser():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--no-legend', action='store_true')
+ parser.add_argument('--no-kmers', action='store_true')
+ parser.add_argument('--no-flag', action='store_true')
+
+ parser.add_argument('txt_graphs', nargs='*')
+ parser.add_argument('--directory', '-d', help='Recurse here and plot all '
+ 'graphs that are found.', action='append', default=[])
+ parser.add_argument('--out-type', type=str, default='svg')
+ return parser
+
+
+def main(args):
+ parser = get_parser()
+ args = parser.parse_args(args)
+ kwargs = dict(legend=not args.no_legend, kmers=not args.no_kmers,
+ flag=not args.no_flag)
+ for filename in args.txt_graphs:
+ graph = graph_from_file(filename, **kwargs)
+ print 'plotting', filename + args.out_type
+ graph.write(filename + args.out_type, format=args.out_type)
+
+ for d in args.directory:
+ recursive_plot(d, **kwargs)
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff --git a/genomix/genomix-hadoop/src/test/python/generate_graph.py b/genomix/genomix-hadoop/src/test/python/generate_graph.py
new file mode 100644
index 0000000..2053bf9
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/python/generate_graph.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Generate a random smattering of reads
+"""
+
+import sys
+import argparse
+import random
+import itertools
+import string
+
+
+def get_parser():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--walk', '-w', action='store_true')
+ parser.add_argument('--coverage', '-c', type=float, required=True)
+ parser.add_argument('--genome-length', '-g', type=int,
+ required=True)
+ parser.add_argument('--read-length', '-l', type=int, required=True)
+ parser.add_argument('--no-rc', action='store_true')
+ parser.add_argument('--error-rate', type=float, default=.01)
+ parser.add_argument('--outreads', '-r', type=argparse.FileType('w'),
+ default='reads.txt')
+ parser.add_argument('--outgenome', '-o', type=argparse.FileType('w'),
+ default='genome.txt')
+ return parser
+
+
+def reverse_complement(kmer, _table=string.maketrans('ACGT', 'TGCA')):
+ return string.translate(kmer, _table)[::-1]
+
+
+def make_genome(length):
+ return ''.join(random.choice('ACGT') for i in xrange(length))
+
+
+def make_reads(genome, read_length, coverage, walk=False, no_rc=False,
+ error_rate=0.):
+ num_reads = int(coverage * len(genome)) / read_length
+ if walk:
+ step_size = max(1, int(len(genome) / num_reads))
+ next_starts = itertools.cycle(xrange(0, len(genome) - read_length + 1,
+ step_size))
+ else:
+ next_starts = (random.randrange(len(genome) - read_length) for i in itertools.cycle([None]))
+ num_errors = 0
+ for i in range(1, num_reads + 1):
+ start = next_starts.next()
+ seq = genome[start:start + read_length]
+ if not no_rc and random.choice([True, False]):
+ seq = reverse_complement(seq)
+ final_seq = []
+ for l in seq:
+ if random.random() < error_rate:
+ num_errors += 1
+ final_seq.append(random.choice(list(set('ATGC') - set(l))))
+ else:
+ final_seq.append(l)
+
+ yield '%s\t%s\n' % (i, ''.join(final_seq))
+ print >> sys.stderr, 'introduced', num_errors, 'errors'
+
+
+def main(args):
+ parser = get_parser()
+ args = parser.parse_args(args)
+ genome = make_genome(args.genome_length)
+ args.outgenome.write(genome)
+ args.outgenome.write('\n')
+ args.outreads.writelines(make_reads(genome, args.read_length,
+ args.coverage, args.walk, args.no_rc,
+ args.error_rate))
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff --git a/genomix/genomix-hadoop/src/test/resources/expected/pathmerge/singleread.txt b/genomix/genomix-hadoop/src/test/resources/expected/pathmerge/singleread.txt
new file mode 100644
index 0000000..be930c4
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/expected/pathmerge/singleread.txt
@@ -0,0 +1 @@
+((1,1) [] [] [] [] AATAGAAG) (null)
diff --git a/genomix/genomix-hadoop/src/test/resources/expected/pathmerge/singleread.txt.svg b/genomix/genomix-hadoop/src/test/resources/expected/pathmerge/singleread.txt.svg
new file mode 100644
index 0000000..31ebb54
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/expected/pathmerge/singleread.txt.svg
@@ -0,0 +1,85 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: singleread_txt Pages: 1 -->
+<svg width="206pt" height="321pt"
+ viewBox="0.00 0.00 206.00 321.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 317)">
+<title>singleread_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-317 203,-317 203,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="59,-8 59,-209 190,-209 190,-8 59,-8"/>
+<text text-anchor="middle" x="124.5" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-217 8,-305 130,-305 130,-217 8,-217"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="69" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="180" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M70.9266,-157C82.3997,-157 142.097,-157 167.994,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="168.02,-160.5 178.02,-157 168.02,-153.5 168.02,-160.5"/>
+<text text-anchor="middle" x="150" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="69" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="180" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M70.9266,-116C82.3997,-116 142.097,-116 167.994,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="168.02,-119.5 178.02,-116 168.02,-112.5 168.02,-119.5"/>
+<text text-anchor="middle" x="150" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="69" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="180" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M70.9266,-75C82.3997,-75 142.097,-75 167.994,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="168.02,-78.5001 178.02,-75 168.02,-71.5001 168.02,-78.5001"/>
+<text text-anchor="middle" x="150" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="69" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="180" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M70.9266,-34C82.3997,-34 142.097,-34 167.994,-34"/>
+<polygon fill="#218559" stroke="#218559" points="168.02,-37.5001 178.02,-34 168.02,-30.5001 168.02,-37.5001"/>
+<text text-anchor="middle" x="150" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="69" cy="-261" rx="51.8276" ry="36.0624"/>
+<text text-anchor="start" x="49" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="40,-253 40,-267 98,-267 98,-253 40,-253"/>
+<text text-anchor="start" x="42.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AATAGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="40,-239 40,-253 98,-253 98,-239 40,-239"/>
+<text text-anchor="start" x="46.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">CTTCTATT</text>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-hadoop/src/test/resources/expected/pathmerge/singleread/.part-0.crc b/genomix/genomix-hadoop/src/test/resources/expected/pathmerge/singleread/.part-0.crc
new file mode 100644
index 0000000..6b5048e
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/expected/pathmerge/singleread/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/expected/pathmerge/singleread/.part-1.crc b/genomix/genomix-hadoop/src/test/resources/expected/pathmerge/singleread/.part-1.crc
new file mode 100644
index 0000000..6de18c1
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/expected/pathmerge/singleread/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/expected/pathmerge/singleread/part-0 b/genomix/genomix-hadoop/src/test/resources/expected/pathmerge/singleread/part-0
new file mode 100755
index 0000000..335dd8d
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/expected/pathmerge/singleread/part-0
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/expected/pathmerge/singleread/part-1 b/genomix/genomix-hadoop/src/test/resources/expected/pathmerge/singleread/part-1
new file mode 100755
index 0000000..3a7991c
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/expected/pathmerge/singleread/part-1
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/hadoop/conf/core-site.xml b/genomix/genomix-hadoop/src/test/resources/hadoop/conf/core-site.xml
new file mode 100644
index 0000000..3e5bacb
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/hadoop/conf/core-site.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+ <property>
+ <name>fs.default.name</name>
+ <value>hdfs://127.0.0.1:31888</value>
+ </property>
+ <property>
+ <name>hadoop.tmp.dir</name>
+ <value>/tmp/hadoop</value>
+ </property>
+
+
+</configuration>
diff --git a/genomix/genomix-hadoop/src/test/resources/hadoop/conf/hdfs-site.xml b/genomix/genomix-hadoop/src/test/resources/hadoop/conf/hdfs-site.xml
new file mode 100644
index 0000000..b1b1902
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/hadoop/conf/hdfs-site.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+ <property>
+ <name>dfs.replication</name>
+ <value>1</value>
+ </property>
+
+ <property>
+ <name>dfs.block.size</name>
+ <value>65536</value>
+ </property>
+
+</configuration>
diff --git a/genomix/genomix-hadoop/src/test/resources/hadoop/conf/log4j.properties b/genomix/genomix-hadoop/src/test/resources/hadoop/conf/log4j.properties
new file mode 100755
index 0000000..d5e6004
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/hadoop/conf/log4j.properties
@@ -0,0 +1,94 @@
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=FATAL,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshhold=FATAL
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# Rolling File Appender
+#
+
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+#
+# FSNamesystem Audit logging
+# All audit events are logged at INFO level
+#
+log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
diff --git a/genomix/genomix-hadoop/src/test/resources/hadoop/conf/mapred-site.xml b/genomix/genomix-hadoop/src/test/resources/hadoop/conf/mapred-site.xml
new file mode 100644
index 0000000..525e7d5
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/hadoop/conf/mapred-site.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+ <property>
+ <name>mapred.job.tracker</name>
+ <value>localhost:29007</value>
+ </property>
+ <property>
+ <name>mapred.tasktracker.map.tasks.maximum</name>
+ <value>20</value>
+ </property>
+ <property>
+ <name>mapred.tasktracker.reduce.tasks.maximum</name>
+ <value>20</value>
+ </property>
+ <property>
+ <name>mapred.max.split.size</name>
+ <value>2048</value>
+ </property>
+
+</configuration>
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_ff_bubbles.txt b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_ff_bubbles.txt
new file mode 100644
index 0000000..2896107
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_ff_bubbles.txt
@@ -0,0 +1,16 @@
+((2,1) [(3,1),(4,1),(2,2)] [] [] [(1,1)] CGTCC) (null)
+((2,2) [(2,3)] [] [] [(2,1)] GTCCT) (null)
+((2,3) [(2,4)] [] [] [(2,2)] TCCTT) (null)
+((2,4) [(4,4),(3,4)] [] [] [(1,4),(2,3)] CCTTA) (null)
+((4,1) [(4,2)] [] [] [(1,2),(2,1)] GTCCT) (null)
+((4,2) [(4,3)] [] [] [(4,1)] TCCTT) (null)
+((4,3) [(4,4)] [] [] [(1,4),(4,2)] CCTTA) (null)
+((4,4) [] [] [] [(2,4),(4,3)] CTTAG) (null)
+((1,1) [(2,1),(1,2)] [] [] [] ACGTC) (null)
+((1,2) [(3,1),(4,1),(1,3)] [] [] [(1,1)] CGTCC) (null)
+((1,3) [(1,4)] [] [] [(1,2)] GTCCT) (null)
+((1,4) [(3,3),(4,3),(2,4)] [] [] [(1,3)] TCCTT) (null)
+((3,1) [(3,2)] [] [] [(1,2),(2,1)] GTCCT) (null)
+((3,2) [(3,3)] [] [] [(3,1)] TCCTT) (null)
+((3,3) [(3,4)] [] [] [(1,4),(3,2)] CCTTA) (null)
+((3,4) [] [] [] [(2,4),(3,3)] CTTAG) (null)
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_ff_bubbles.txt.svg b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_ff_bubbles.txt.svg
new file mode 100644
index 0000000..fef64c2
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_ff_bubbles.txt.svg
@@ -0,0 +1,449 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: five_ff_bubbles_txt Pages: 1 -->
+<svg width="764pt" height="520pt"
+ viewBox="0.00 0.00 764.00 520.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 516)">
+<title>five_ff_bubbles_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-516 761,-516 761,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-8 49,-209 211,-209 211,-8 49,-8"/>
+<text text-anchor="middle" x="130" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-217 8,-305 500,-305 500,-217 8,-217"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_3</title>
+<polygon fill="none" stroke="black" points="274,-416 274,-504 748,-504 748,-416 274,-416"/>
+</g>
+<g id="graph5" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="150,-313 150,-401 624,-401 624,-313 150,-313"/>
+</g>
+<g id="graph6" class="cluster"><title>cluster_4</title>
+<polygon fill="none" stroke="black" points="274,-121 274,-209 748,-209 748,-121 274,-121"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.0074,-157C74.8673,-157 156.744,-157 188.46,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="188.862,-160.5 198.862,-157 188.861,-153.5 188.862,-160.5"/>
+<text text-anchor="middle" x="130" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.0074,-116C74.8673,-116 156.744,-116 188.46,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="188.862,-119.5 198.862,-116 188.861,-112.5 188.862,-119.5"/>
+<text text-anchor="middle" x="130" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.0074,-75C74.8673,-75 156.744,-75 188.46,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="188.862,-78.5001 198.862,-75 188.861,-71.5001 188.862,-78.5001"/>
+<text text-anchor="middle" x="130" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.0074,-34C74.8673,-34 156.744,-34 188.46,-34"/>
+<polygon fill="#218559" stroke="#218559" points="188.862,-37.5001 198.862,-34 188.861,-30.5001 188.862,-37.5001"/>
+<text text-anchor="middle" x="130" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-253 37,-267 82,-267 82,-253 37,-253"/>
+<text text-anchor="start" x="43.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">ACGTC</text>
+<polygon fill="#218559" stroke="#218559" points="37,-239 37,-253 82,-253 82,-239 37,-239"/>
+<text text-anchor="start" x="42.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">GACGT</text>
+</g>
+<!-- 1,2 -->
+<g id="node16" class="node"><title>1,2</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-253 179,-267 224,-267 224,-253 179,-253"/>
+<text text-anchor="start" x="185" y="-257.667" font-family="Times Roman,serif" font-size="10.00">CGTCC</text>
+<polygon fill="#218559" stroke="#218559" points="179,-239 179,-253 224,-253 224,-239 179,-239"/>
+<text text-anchor="start" x="183.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">GGACG</text>
+</g>
+<!-- 1,1->1,2 -->
+<g id="edge61" class="edge"><title>1,1->1,2</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-254.755C116.207,-254.208 132.729,-254.105 148.049,-254.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-257.957 158.425,-254.756 148.534,-250.96 148.326,-257.957"/>
+</g>
+<!-- 2,1 -->
+<g id="node25" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-357" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-368.167" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-349 179,-363 224,-363 224,-349 179,-349"/>
+<text text-anchor="start" x="185" y="-353.667" font-family="Times Roman,serif" font-size="10.00">CGTCC</text>
+<polygon fill="#218559" stroke="#218559" points="179,-335 179,-349 224,-349 224,-335 179,-335"/>
+<text text-anchor="start" x="183.5" y="-339.667" font-family="Times Roman,serif" font-size="10.00">GGACG</text>
+</g>
+<!-- 1,1->2,1 -->
+<g id="edge59" class="edge"><title>1,1->2,1</title>
+<path fill="none" stroke="#dd1e2f" d="M92.652,-283.751C112.403,-297.103 137.544,-314.1 158.635,-328.359"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="156.853,-331.379 167.097,-334.08 160.773,-325.58 156.853,-331.379"/>
+</g>
+<!-- 1,2->1,1 -->
+<g id="edge69" class="edge"><title>1,2->1,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-267.244C143.825,-267.792 127.305,-267.895 111.982,-267.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-264.043 101.605,-267.245 111.497,-271.04 111.704,-264.043"/>
+</g>
+<!-- 1,3 -->
+<g id="node17" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-253 303,-267 348,-267 348,-253 303,-253"/>
+<text text-anchor="start" x="310" y="-257.667" font-family="Times Roman,serif" font-size="10.00">GTCCT</text>
+<polygon fill="#218559" stroke="#218559" points="303,-239 303,-253 348,-253 348,-239 303,-239"/>
+<text text-anchor="start" x="308" y="-243.667" font-family="Times Roman,serif" font-size="10.00">AGGAC</text>
+</g>
+<!-- 1,2->1,3 -->
+<g id="edge67" class="edge"><title>1,2->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-254.562C252.601,-254.258 262.531,-254.176 272.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-257.822 282.443,-254.554 272.527,-250.823 272.364,-257.822"/>
+</g>
+<!-- 3,1 -->
+<g id="node20" class="node"><title>3,1</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-460" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-471.167" font-family="Times Roman,serif" font-size="10.00">3,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-452 303,-466 348,-466 348,-452 303,-452"/>
+<text text-anchor="start" x="310" y="-456.667" font-family="Times Roman,serif" font-size="10.00">GTCCT</text>
+<polygon fill="#218559" stroke="#218559" points="303,-438 303,-452 348,-452 348,-438 303,-438"/>
+<text text-anchor="start" x="308" y="-442.667" font-family="Times Roman,serif" font-size="10.00">AGGAC</text>
+</g>
+<!-- 1,2->3,1 -->
+<g id="edge63" class="edge"><title>1,2->3,1</title>
+<path fill="none" stroke="#dd1e2f" d="M227.005,-290.029C236.061,-300.138 244,-309 244,-309 244,-309 274,-405 274,-405 274,-405 282.027,-413.657 291.712,-424.101"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="289.238,-426.58 298.604,-431.533 294.371,-421.821 289.238,-426.58"/>
+</g>
+<!-- 4,1 -->
+<g id="node30" class="node"><title>4,1</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-165" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">4,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-157 303,-171 348,-171 348,-157 303,-157"/>
+<text text-anchor="start" x="310" y="-161.667" font-family="Times Roman,serif" font-size="10.00">GTCCT</text>
+<polygon fill="#218559" stroke="#218559" points="303,-143 303,-157 348,-157 348,-143 303,-143"/>
+<text text-anchor="start" x="308" y="-147.667" font-family="Times Roman,serif" font-size="10.00">AGGAC</text>
+</g>
+<!-- 1,2->4,1 -->
+<g id="edge65" class="edge"><title>1,2->4,1</title>
+<path fill="none" stroke="#dd1e2f" d="M227.005,-231.971C236.061,-221.862 244,-213 244,-213 244,-213 262,-184 262,-184 262,-184 267.269,-182.411 274.758,-180.152"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="275.825,-183.486 284.389,-177.248 273.804,-176.784 275.825,-183.486"/>
+</g>
+<!-- 1,3->1,2 -->
+<g id="edge73" class="edge"><title>1,3->1,2</title>
+<path fill="none" stroke="#218559" d="M282.443,-267.446C273.158,-267.745 263.225,-267.824 253.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-264.175 243.327,-267.438 253.242,-271.173 253.407,-264.175"/>
+</g>
+<!-- 1,4 -->
+<g id="node18" class="node"><title>1,4</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-253 427,-267 472,-267 472,-253 427,-253"/>
+<text text-anchor="start" x="435" y="-257.667" font-family="Times Roman,serif" font-size="10.00">TCCTT</text>
+<polygon fill="#218559" stroke="#218559" points="427,-239 427,-253 472,-253 472,-239 427,-239"/>
+<text text-anchor="start" x="431.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">AAGGA</text>
+</g>
+<!-- 1,3->1,4 -->
+<g id="edge71" class="edge"><title>1,3->1,4</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-254.562C376.601,-254.258 386.531,-254.176 396.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-257.822 406.443,-254.554 396.527,-250.823 396.364,-257.822"/>
+</g>
+<!-- 1,4->1,3 -->
+<g id="edge81" class="edge"><title>1,4->1,3</title>
+<path fill="none" stroke="#218559" d="M406.443,-267.446C397.158,-267.745 387.225,-267.824 377.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-264.175 367.327,-267.438 377.242,-271.173 377.407,-264.175"/>
+</g>
+<!-- 3,3 -->
+<g id="node22" class="node"><title>3,3</title>
+<ellipse fill="none" stroke="black" cx="573" cy="-460" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="553.5" y="-471.167" font-family="Times Roman,serif" font-size="10.00">3,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="551,-452 551,-466 596,-466 596,-452 551,-452"/>
+<text text-anchor="start" x="558.5" y="-456.667" font-family="Times Roman,serif" font-size="10.00">CCTTA</text>
+<polygon fill="#218559" stroke="#218559" points="551,-438 551,-452 596,-452 596,-438 551,-438"/>
+<text text-anchor="start" x="556.5" y="-442.667" font-family="Times Roman,serif" font-size="10.00">TAAGG</text>
+</g>
+<!-- 1,4->3,3 -->
+<g id="edge75" class="edge"><title>1,4->3,3</title>
+<path fill="none" stroke="#dd1e2f" d="M477.779,-288.086C489.347,-298.974 500,-309 500,-309 500,-309 530,-405 530,-405 530,-405 535.671,-412.254 542.894,-421.492"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="540.32,-423.882 549.236,-429.604 545.834,-419.571 540.32,-423.882"/>
+</g>
+<!-- 2,4 -->
+<g id="node28" class="node"><title>2,4</title>
+<ellipse fill="none" stroke="black" cx="573" cy="-357" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="553.5" y="-368.167" font-family="Times Roman,serif" font-size="10.00">2,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="551,-349 551,-363 596,-363 596,-349 551,-349"/>
+<text text-anchor="start" x="558.5" y="-353.667" font-family="Times Roman,serif" font-size="10.00">CCTTA</text>
+<polygon fill="#218559" stroke="#218559" points="551,-335 551,-349 596,-349 596,-335 551,-335"/>
+<text text-anchor="start" x="556.5" y="-339.667" font-family="Times Roman,serif" font-size="10.00">TAAGG</text>
+</g>
+<!-- 1,4->2,4 -->
+<g id="edge79" class="edge"><title>1,4->2,4</title>
+<path fill="none" stroke="#dd1e2f" d="M479.857,-286.293C494.987,-298.695 510,-311 510,-311 510,-311 520.107,-318.38 532.225,-327.228"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="530.253,-330.121 540.393,-333.192 534.38,-324.468 530.253,-330.121"/>
+</g>
+<!-- 4,3 -->
+<g id="node32" class="node"><title>4,3</title>
+<ellipse fill="none" stroke="black" cx="573" cy="-165" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="553.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">4,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="551,-157 551,-171 596,-171 596,-157 551,-157"/>
+<text text-anchor="start" x="558.5" y="-161.667" font-family="Times Roman,serif" font-size="10.00">CCTTA</text>
+<polygon fill="#218559" stroke="#218559" points="551,-143 551,-157 596,-157 596,-143 551,-143"/>
+<text text-anchor="start" x="556.5" y="-147.667" font-family="Times Roman,serif" font-size="10.00">TAAGG</text>
+</g>
+<!-- 1,4->4,3 -->
+<g id="edge77" class="edge"><title>1,4->4,3</title>
+<path fill="none" stroke="#dd1e2f" d="M479.143,-235.305C494.511,-222.204 510,-209 510,-209 510,-209 519.601,-202.295 531.307,-194.119"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="533.461,-196.884 539.655,-188.288 529.453,-191.145 533.461,-196.884"/>
+</g>
+<!-- 3,1->1,2 -->
+<g id="edge85" class="edge"><title>3,1->1,2</title>
+<path fill="none" stroke="#218559" d="M298.604,-431.533C286.159,-418.113 274,-405 274,-405 274,-405 262,-384 262,-384 262,-384 244,-309 244,-309 244,-309 239.718,-304.221 233.857,-297.678"/>
+<polygon fill="#218559" stroke="#218559" points="236.285,-295.142 227.005,-290.029 231.071,-299.813 236.285,-295.142"/>
+</g>
+<!-- 3,2 -->
+<g id="node21" class="node"><title>3,2</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-460" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-471.167" font-family="Times Roman,serif" font-size="10.00">3,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-452 427,-466 472,-466 472,-452 427,-452"/>
+<text text-anchor="start" x="435" y="-456.667" font-family="Times Roman,serif" font-size="10.00">TCCTT</text>
+<polygon fill="#218559" stroke="#218559" points="427,-438 427,-452 472,-452 472,-438 427,-438"/>
+<text text-anchor="start" x="431.5" y="-442.667" font-family="Times Roman,serif" font-size="10.00">AAGGA</text>
+</g>
+<!-- 3,1->3,2 -->
+<g id="edge83" class="edge"><title>3,1->3,2</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-453.562C376.601,-453.258 386.531,-453.176 396.159,-453.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-456.822 406.443,-453.554 396.527,-449.823 396.364,-456.822"/>
+</g>
+<!-- 3,1->2,1 -->
+<g id="edge87" class="edge"><title>3,1->2,1</title>
+<path fill="none" stroke="#218559" d="M283.329,-450.078C271.613,-447.289 262,-445 262,-445 262,-445 244.717,-420.067 228.55,-396.744"/>
+<polygon fill="#218559" stroke="#218559" points="231.252,-394.498 222.678,-388.273 225.499,-398.486 231.252,-394.498"/>
+</g>
+<!-- 3,2->3,1 -->
+<g id="edge91" class="edge"><title>3,2->3,1</title>
+<path fill="none" stroke="#218559" d="M406.443,-466.446C397.158,-466.745 387.225,-466.824 377.602,-466.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-463.175 367.327,-466.438 377.242,-470.173 377.407,-463.175"/>
+</g>
+<!-- 3,2->3,3 -->
+<g id="edge89" class="edge"><title>3,2->3,3</title>
+<path fill="none" stroke="#dd1e2f" d="M491.327,-453.562C500.601,-453.258 510.531,-453.176 520.159,-453.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="520.364,-456.822 530.443,-453.554 520.527,-449.823 520.364,-456.822"/>
+</g>
+<!-- 3,3->1,4 -->
+<g id="edge95" class="edge"><title>3,3->1,4</title>
+<path fill="none" stroke="#218559" d="M549.236,-429.604C539.291,-416.884 530,-405 530,-405 530,-405 510,-359 510,-359 510,-359 500,-309 500,-309 500,-309 493.498,-302.881 485.137,-295.012"/>
+<polygon fill="#218559" stroke="#218559" points="487.46,-292.391 477.779,-288.086 482.662,-297.489 487.46,-292.391"/>
+</g>
+<!-- 3,3->3,2 -->
+<g id="edge97" class="edge"><title>3,3->3,2</title>
+<path fill="none" stroke="#218559" d="M530.443,-466.446C521.158,-466.745 511.225,-466.824 501.602,-466.681"/>
+<polygon fill="#218559" stroke="#218559" points="501.407,-463.175 491.327,-466.438 501.242,-470.173 501.407,-463.175"/>
+</g>
+<!-- 3,4 -->
+<g id="node23" class="node"><title>3,4</title>
+<ellipse fill="none" stroke="black" cx="697" cy="-460" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="677.5" y="-471.167" font-family="Times Roman,serif" font-size="10.00">3,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="675,-452 675,-466 720,-466 720,-452 675,-452"/>
+<text text-anchor="start" x="682" y="-456.667" font-family="Times Roman,serif" font-size="10.00">CTTAG</text>
+<polygon fill="#218559" stroke="#218559" points="675,-438 675,-452 720,-452 720,-438 675,-438"/>
+<text text-anchor="start" x="681" y="-442.667" font-family="Times Roman,serif" font-size="10.00">CTAAG</text>
+</g>
+<!-- 3,3->3,4 -->
+<g id="edge93" class="edge"><title>3,3->3,4</title>
+<path fill="none" stroke="#dd1e2f" d="M615.327,-453.562C624.601,-453.258 634.531,-453.176 644.159,-453.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="644.364,-456.822 654.443,-453.554 644.527,-449.823 644.364,-456.822"/>
+</g>
+<!-- 3,4->3,3 -->
+<g id="edge101" class="edge"><title>3,4->3,3</title>
+<path fill="none" stroke="#218559" d="M654.443,-466.446C645.158,-466.745 635.225,-466.824 625.602,-466.681"/>
+<polygon fill="#218559" stroke="#218559" points="625.407,-463.175 615.327,-466.438 625.242,-470.173 625.407,-463.175"/>
+</g>
+<!-- 3,4->2,4 -->
+<g id="edge99" class="edge"><title>3,4->2,4</title>
+<path fill="none" stroke="#218559" d="M667.572,-433.467C651.973,-419.402 636,-405 636,-405 636,-405 625.367,-396.899 612.837,-387.352"/>
+<polygon fill="#218559" stroke="#218559" points="614.945,-384.558 604.869,-381.281 610.702,-390.126 614.945,-384.558"/>
+</g>
+<!-- 2,1->1,1 -->
+<g id="edge21" class="edge"><title>2,1->1,1</title>
+<path fill="none" stroke="#218559" d="M163.346,-338.87C142.193,-328.685 120,-318 120,-318 120,-318 108.279,-307.047 95.112,-294.744"/>
+<polygon fill="#218559" stroke="#218559" points="97.41,-292.101 87.7138,-287.831 92.6308,-297.216 97.41,-292.101"/>
+</g>
+<!-- 2,1->3,1 -->
+<g id="edge15" class="edge"><title>2,1->3,1</title>
+<path fill="none" stroke="#dd1e2f" d="M225.811,-386.692C242.699,-406.902 262,-430 262,-430 262,-430 268.943,-433.306 278.219,-437.723"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="276.728,-440.89 287.262,-442.029 279.738,-434.57 276.728,-440.89"/>
+</g>
+<!-- 2,2 -->
+<g id="node26" class="node"><title>2,2</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-357" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-368.167" font-family="Times Roman,serif" font-size="10.00">2,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-349 303,-363 348,-363 348,-349 303,-349"/>
+<text text-anchor="start" x="310" y="-353.667" font-family="Times Roman,serif" font-size="10.00">GTCCT</text>
+<polygon fill="#218559" stroke="#218559" points="303,-335 303,-349 348,-349 348,-335 303,-335"/>
+<text text-anchor="start" x="308" y="-339.667" font-family="Times Roman,serif" font-size="10.00">AGGAC</text>
+</g>
+<!-- 2,1->2,2 -->
+<g id="edge19" class="edge"><title>2,1->2,2</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-350.562C252.601,-350.258 262.531,-350.176 272.159,-350.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-353.822 282.443,-350.554 272.527,-346.823 272.364,-353.822"/>
+</g>
+<!-- 2,1->4,1 -->
+<g id="edge17" class="edge"><title>2,1->4,1</title>
+<path fill="none" stroke="#dd1e2f" d="M227.005,-327.971C236.061,-317.862 244,-309 244,-309 244,-309 274,-213 274,-213 274,-213 280.502,-206.881 288.863,-199.012"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="291.338,-201.489 296.221,-192.086 286.54,-196.391 291.338,-201.489"/>
+</g>
+<!-- 2,2->2,1 -->
+<g id="edge25" class="edge"><title>2,2->2,1</title>
+<path fill="none" stroke="#218559" d="M282.443,-363.446C273.158,-363.745 263.225,-363.824 253.602,-363.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-360.175 243.327,-363.438 253.242,-367.173 253.407,-360.175"/>
+</g>
+<!-- 2,3 -->
+<g id="node27" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-357" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-368.167" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-349 427,-363 472,-363 472,-349 427,-349"/>
+<text text-anchor="start" x="435" y="-353.667" font-family="Times Roman,serif" font-size="10.00">TCCTT</text>
+<polygon fill="#218559" stroke="#218559" points="427,-335 427,-349 472,-349 472,-335 427,-335"/>
+<text text-anchor="start" x="431.5" y="-339.667" font-family="Times Roman,serif" font-size="10.00">AAGGA</text>
+</g>
+<!-- 2,2->2,3 -->
+<g id="edge23" class="edge"><title>2,2->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-350.562C376.601,-350.258 386.531,-350.176 396.159,-350.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-353.822 406.443,-350.554 396.527,-346.823 396.364,-353.822"/>
+</g>
+<!-- 2,3->2,2 -->
+<g id="edge29" class="edge"><title>2,3->2,2</title>
+<path fill="none" stroke="#218559" d="M406.443,-363.446C397.158,-363.745 387.225,-363.824 377.602,-363.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-360.175 367.327,-363.438 377.242,-367.173 377.407,-360.175"/>
+</g>
+<!-- 2,3->2,4 -->
+<g id="edge27" class="edge"><title>2,3->2,4</title>
+<path fill="none" stroke="#dd1e2f" d="M491.327,-350.562C500.601,-350.258 510.531,-350.176 520.159,-350.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="520.364,-353.822 530.443,-350.554 520.527,-346.823 520.364,-353.822"/>
+</g>
+<!-- 2,4->1,4 -->
+<g id="edge35" class="edge"><title>2,4->1,4</title>
+<path fill="none" stroke="#218559" d="M541.015,-332.238C525.051,-319.878 505.587,-304.81 488.68,-291.72"/>
+<polygon fill="#218559" stroke="#218559" points="490.661,-288.827 480.611,-285.473 486.376,-294.363 490.661,-288.827"/>
+</g>
+<!-- 2,4->3,4 -->
+<g id="edge33" class="edge"><title>2,4->3,4</title>
+<path fill="none" stroke="#dd1e2f" d="M603.652,-382.461C620.195,-396.202 640.78,-413.301 658.406,-427.942"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="656.342,-430.778 666.271,-434.475 660.815,-425.393 656.342,-430.778"/>
+</g>
+<!-- 2,4->2,3 -->
+<g id="edge37" class="edge"><title>2,4->2,3</title>
+<path fill="none" stroke="#218559" d="M530.443,-363.446C521.158,-363.745 511.225,-363.824 501.602,-363.681"/>
+<polygon fill="#218559" stroke="#218559" points="501.407,-360.175 491.327,-363.438 501.242,-367.173 501.407,-360.175"/>
+</g>
+<!-- 4,4 -->
+<g id="node33" class="node"><title>4,4</title>
+<ellipse fill="none" stroke="black" cx="697" cy="-165" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="677.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">4,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="675,-157 675,-171 720,-171 720,-157 675,-157"/>
+<text text-anchor="start" x="682" y="-161.667" font-family="Times Roman,serif" font-size="10.00">CTTAG</text>
+<polygon fill="#218559" stroke="#218559" points="675,-143 675,-157 720,-157 720,-143 675,-143"/>
+<text text-anchor="start" x="681" y="-147.667" font-family="Times Roman,serif" font-size="10.00">CTAAG</text>
+</g>
+<!-- 2,4->4,4 -->
+<g id="edge31" class="edge"><title>2,4->4,4</title>
+<path fill="none" stroke="#dd1e2f" d="M593.476,-325.295C614.656,-292.5 647.802,-241.178 670.862,-205.472"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="674.008,-207.051 676.494,-196.752 668.128,-203.254 674.008,-207.051"/>
+</g>
+<!-- 4,1->1,2 -->
+<g id="edge41" class="edge"><title>4,1->1,2</title>
+<path fill="none" stroke="#218559" d="M285.459,-179.436C272.801,-184.057 262,-188 262,-188 262,-188 244,-213 244,-213 244,-213 239.718,-217.779 233.857,-224.322"/>
+<polygon fill="#218559" stroke="#218559" points="231.071,-222.187 227.005,-231.971 236.285,-226.858 231.071,-222.187"/>
+</g>
+<!-- 4,1->2,1 -->
+<g id="edge43" class="edge"><title>4,1->2,1</title>
+<path fill="none" stroke="#218559" d="M296.221,-192.086C284.653,-202.974 274,-213 274,-213 274,-213 262,-243 262,-243 262,-243 244,-309 244,-309 244,-309 239.718,-313.779 233.857,-320.322"/>
+<polygon fill="#218559" stroke="#218559" points="231.071,-318.187 227.005,-327.971 236.285,-322.858 231.071,-318.187"/>
+</g>
+<!-- 4,2 -->
+<g id="node31" class="node"><title>4,2</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-165" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">4,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-157 427,-171 472,-171 472,-157 427,-157"/>
+<text text-anchor="start" x="435" y="-161.667" font-family="Times Roman,serif" font-size="10.00">TCCTT</text>
+<polygon fill="#218559" stroke="#218559" points="427,-143 427,-157 472,-157 472,-143 427,-143"/>
+<text text-anchor="start" x="431.5" y="-147.667" font-family="Times Roman,serif" font-size="10.00">AAGGA</text>
+</g>
+<!-- 4,1->4,2 -->
+<g id="edge39" class="edge"><title>4,1->4,2</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-158.562C376.601,-158.258 386.531,-158.176 396.159,-158.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-161.822 406.443,-158.554 396.527,-154.823 396.364,-161.822"/>
+</g>
+<!-- 4,2->4,1 -->
+<g id="edge47" class="edge"><title>4,2->4,1</title>
+<path fill="none" stroke="#218559" d="M406.443,-171.446C397.158,-171.745 387.225,-171.824 377.602,-171.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-168.175 367.327,-171.438 377.242,-175.173 377.407,-168.175"/>
+</g>
+<!-- 4,2->4,3 -->
+<g id="edge45" class="edge"><title>4,2->4,3</title>
+<path fill="none" stroke="#dd1e2f" d="M491.327,-158.562C500.601,-158.258 510.531,-158.176 520.159,-158.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="520.364,-161.822 530.443,-158.554 520.527,-154.823 520.364,-161.822"/>
+</g>
+<!-- 4,3->1,4 -->
+<g id="edge51" class="edge"><title>4,3->1,4</title>
+<path fill="none" stroke="#218559" d="M541.015,-189.762C525.051,-202.122 505.587,-217.19 488.68,-230.28"/>
+<polygon fill="#218559" stroke="#218559" points="486.376,-227.637 480.611,-236.527 490.661,-233.173 486.376,-227.637"/>
+</g>
+<!-- 4,3->4,2 -->
+<g id="edge53" class="edge"><title>4,3->4,2</title>
+<path fill="none" stroke="#218559" d="M530.443,-171.446C521.158,-171.745 511.225,-171.824 501.602,-171.681"/>
+<polygon fill="#218559" stroke="#218559" points="501.407,-168.175 491.327,-171.438 501.242,-175.173 501.407,-168.175"/>
+</g>
+<!-- 4,3->4,4 -->
+<g id="edge49" class="edge"><title>4,3->4,4</title>
+<path fill="none" stroke="#dd1e2f" d="M615.327,-158.562C624.601,-158.258 634.531,-158.176 644.159,-158.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="644.364,-161.822 654.443,-158.554 644.527,-154.823 644.364,-161.822"/>
+</g>
+<!-- 4,4->2,4 -->
+<g id="edge55" class="edge"><title>4,4->2,4</title>
+<path fill="none" stroke="#218559" d="M678.863,-197.706C660.882,-230.131 636,-275 636,-275 636,-275 618.994,-297.135 602.652,-318.405"/>
+<polygon fill="#218559" stroke="#218559" points="599.776,-316.404 596.459,-326.466 605.327,-320.669 599.776,-316.404"/>
+</g>
+<!-- 4,4->4,3 -->
+<g id="edge57" class="edge"><title>4,4->4,3</title>
+<path fill="none" stroke="#218559" d="M654.443,-171.446C645.158,-171.745 635.225,-171.824 625.602,-171.681"/>
+<polygon fill="#218559" stroke="#218559" points="625.407,-168.175 615.327,-171.438 625.242,-175.173 625.407,-168.175"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_ff_bubbles/.part-0.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_ff_bubbles/.part-0.crc
new file mode 100644
index 0000000..2085368
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_ff_bubbles/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_ff_bubbles/.part-1.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_ff_bubbles/.part-1.crc
new file mode 100644
index 0000000..69dcf7d
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_ff_bubbles/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_ff_bubbles/part-0 b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_ff_bubbles/part-0
new file mode 100755
index 0000000..36702f5
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_ff_bubbles/part-0
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_ff_bubbles/part-1 b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_ff_bubbles/part-1
new file mode 100755
index 0000000..9045432
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_ff_bubbles/part-1
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_length1_bubbles.txt b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_length1_bubbles.txt
new file mode 100644
index 0000000..a41ce56
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_length1_bubbles.txt
@@ -0,0 +1,18 @@
+((2,1) [(2,3)] [] [] [] AATAGA) (null)
+((2,3) [(6,1),(2,4)] [] [] [(2,1)] TAGAA) (null)
+((2,4) [(6,2)] [] [] [(2,3)] AGAAG) (null)
+((4,1) [(4,3)] [] [] [] AATAGA) (null)
+((4,3) [(6,1),(4,4)] [] [] [(4,1)] TAGAA) (null)
+((4,4) [(6,2)] [] [] [(4,3)] AGAAG) (null)
+((6,1) [(6,2)] [] [] [(2,3),(1,3),(3,3),(4,3),(5,3)] AGAAG) (null)
+((6,2) [(6,3)] [] [] [(2,4),(3,4),(1,4),(4,4),(5,4),(6,1)] GAAGA) (null)
+((6,3) [] [] [] [(6,2)] AAGAAG) (null)
+((1,1) [(1,3)] [] [] [] AATAGA) (null)
+((1,3) [(6,1),(1,4)] [] [] [(1,1)] TAGAA) (null)
+((1,4) [(6,2)] [] [] [(1,3)] AGAAG) (null)
+((3,1) [(3,3)] [] [] [] AATAGA) (null)
+((3,3) [(6,1),(3,4)] [] [] [(3,1)] TAGAA) (null)
+((3,4) [(6,2)] [] [] [(3,3)] AGAAG) (null)
+((5,1) [(5,3)] [] [] [] AATAGA) (null)
+((5,3) [(6,1),(5,4)] [] [] [(5,1)] TAGAA) (null)
+((5,4) [(6,2)] [] [] [(5,3)] AGAAG) (null)
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_length1_bubbles.txt.svg b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_length1_bubbles.txt.svg
new file mode 100644
index 0000000..ff38f7d
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_length1_bubbles.txt.svg
@@ -0,0 +1,473 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: five_length1_bubbles_txt Pages: 1 -->
+<svg width="646pt" height="844pt"
+ viewBox="0.00 0.00 646.00 844.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 840)">
+<title>five_length1_bubbles_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-840 643,-840 643,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-8 49,-209 211,-209 211,-8 49,-8"/>
+<text text-anchor="middle" x="130" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-548 8,-636 376,-636 376,-548 8,-548"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_3</title>
+<polygon fill="none" stroke="black" points="8,-320 8,-408 376,-408 376,-320 8,-320"/>
+</g>
+<g id="graph5" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="8,-740 8,-828 376,-828 376,-740 8,-740"/>
+</g>
+<g id="graph6" class="cluster"><title>cluster_5</title>
+<polygon fill="none" stroke="black" points="8,-644 8,-732 376,-732 376,-644 8,-644"/>
+</g>
+<g id="graph7" class="cluster"><title>cluster_4</title>
+<polygon fill="none" stroke="black" points="8,-217 8,-305 376,-305 376,-217 8,-217"/>
+</g>
+<g id="graph8" class="cluster"><title>cluster_6</title>
+<polygon fill="none" stroke="black" points="274,-426 274,-514 630,-514 630,-426 274,-426"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.0074,-157C74.8673,-157 156.744,-157 188.46,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="188.862,-160.5 198.862,-157 188.861,-153.5 188.862,-160.5"/>
+<text text-anchor="middle" x="130" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.0074,-116C74.8673,-116 156.744,-116 188.46,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="188.862,-119.5 198.862,-116 188.861,-112.5 188.862,-119.5"/>
+<text text-anchor="middle" x="130" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.0074,-75C74.8673,-75 156.744,-75 188.46,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="188.862,-78.5001 198.862,-75 188.861,-71.5001 188.862,-78.5001"/>
+<text text-anchor="middle" x="130" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.0074,-34C74.8673,-34 156.744,-34 188.46,-34"/>
+<polygon fill="#218559" stroke="#218559" points="188.862,-37.5001 198.862,-34 188.861,-30.5001 188.862,-37.5001"/>
+<text text-anchor="middle" x="130" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-592" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-603.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-584 37,-598 82,-598 82,-584 37,-584"/>
+<text text-anchor="start" x="40" y="-588.667" font-family="Times Roman,serif" font-size="10.00">AATAGA</text>
+<polygon fill="#218559" stroke="#218559" points="37,-570 37,-584 82,-584 82,-570 37,-570"/>
+<text text-anchor="start" x="43" y="-574.667" font-family="Times Roman,serif" font-size="10.00">TCTATT</text>
+</g>
+<!-- 1,3 -->
+<g id="node16" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-592" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-603.167" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-584 179,-598 224,-598 224,-584 179,-584"/>
+<text text-anchor="start" x="185" y="-588.667" font-family="Times Roman,serif" font-size="10.00">TAGAA</text>
+<polygon fill="#218559" stroke="#218559" points="179,-570 179,-584 224,-584 224,-570 179,-570"/>
+<text text-anchor="start" x="187.5" y="-574.667" font-family="Times Roman,serif" font-size="10.00">TTCTA</text>
+</g>
+<!-- 1,1->1,3 -->
+<g id="edge69" class="edge"><title>1,1->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-585.755C116.207,-585.208 132.729,-585.105 148.049,-585.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-588.957 158.425,-585.756 148.534,-581.96 148.326,-588.957"/>
+</g>
+<!-- 1,3->1,1 -->
+<g id="edge75" class="edge"><title>1,3->1,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-598.244C143.825,-598.792 127.305,-598.895 111.982,-598.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-595.043 101.605,-598.245 111.497,-602.04 111.704,-595.043"/>
+</g>
+<!-- 1,4 -->
+<g id="node17" class="node"><title>1,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-592" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-603.167" font-family="Times Roman,serif" font-size="10.00">1,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-584 303,-598 348,-598 348,-584 303,-584"/>
+<text text-anchor="start" x="307.5" y="-588.667" font-family="Times Roman,serif" font-size="10.00">AGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-570 303,-584 348,-584 348,-570 303,-570"/>
+<text text-anchor="start" x="311" y="-574.667" font-family="Times Roman,serif" font-size="10.00">CTTCT</text>
+</g>
+<!-- 1,3->1,4 -->
+<g id="edge73" class="edge"><title>1,3->1,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-585.562C252.601,-585.258 262.531,-585.176 272.159,-585.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-588.822 282.443,-585.554 272.527,-581.823 272.364,-588.822"/>
+</g>
+<!-- 6,1 -->
+<g id="node35" class="node"><title>6,1</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-470" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-481.167" font-family="Times Roman,serif" font-size="10.00">6,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-462 303,-476 348,-476 348,-462 303,-462"/>
+<text text-anchor="start" x="307.5" y="-466.667" font-family="Times Roman,serif" font-size="10.00">AGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-448 303,-462 348,-462 348,-448 303,-448"/>
+<text text-anchor="start" x="311" y="-452.667" font-family="Times Roman,serif" font-size="10.00">CTTCT</text>
+</g>
+<!-- 1,3->6,1 -->
+<g id="edge71" class="edge"><title>1,3->6,1</title>
+<path fill="none" stroke="#dd1e2f" d="M217.507,-558.714C235.573,-522.287 262,-469 262,-469 262,-469 264,-469 264,-469 264,-469 267.077,-469.05 271.85,-469.129"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="271.916,-472.63 281.972,-469.295 272.031,-465.631 271.916,-472.63"/>
+</g>
+<!-- 1,4->1,3 -->
+<g id="edge79" class="edge"><title>1,4->1,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-598.446C273.158,-598.745 263.225,-598.824 253.602,-598.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-595.175 243.327,-598.438 253.242,-602.173 253.407,-595.175"/>
+</g>
+<!-- 6,2 -->
+<g id="node36" class="node"><title>6,2</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-470" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-481.167" font-family="Times Roman,serif" font-size="10.00">6,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-462 427,-476 472,-476 472,-462 427,-462"/>
+<text text-anchor="start" x="431.5" y="-466.667" font-family="Times Roman,serif" font-size="10.00">GAAGA</text>
+<polygon fill="#218559" stroke="#218559" points="427,-448 427,-462 472,-462 472,-448 427,-448"/>
+<text text-anchor="start" x="435.5" y="-452.667" font-family="Times Roman,serif" font-size="10.00">TCTTC</text>
+</g>
+<!-- 1,4->6,2 -->
+<g id="edge77" class="edge"><title>1,4->6,2</title>
+<path fill="none" stroke="#dd1e2f" d="M353.138,-564.316C371.132,-546.612 394.616,-523.507 413.837,-504.596"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="416.3,-507.082 420.974,-497.574 411.391,-502.093 416.3,-507.082"/>
+</g>
+<!-- 3,1 -->
+<g id="node19" class="node"><title>3,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-364" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-375.167" font-family="Times Roman,serif" font-size="10.00">3,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-356 37,-370 82,-370 82,-356 37,-356"/>
+<text text-anchor="start" x="40" y="-360.667" font-family="Times Roman,serif" font-size="10.00">AATAGA</text>
+<polygon fill="#218559" stroke="#218559" points="37,-342 37,-356 82,-356 82,-342 37,-342"/>
+<text text-anchor="start" x="43" y="-346.667" font-family="Times Roman,serif" font-size="10.00">TCTATT</text>
+</g>
+<!-- 3,3 -->
+<g id="node20" class="node"><title>3,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-364" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-375.167" font-family="Times Roman,serif" font-size="10.00">3,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-356 179,-370 224,-370 224,-356 179,-356"/>
+<text text-anchor="start" x="185" y="-360.667" font-family="Times Roman,serif" font-size="10.00">TAGAA</text>
+<polygon fill="#218559" stroke="#218559" points="179,-342 179,-356 224,-356 224,-342 179,-342"/>
+<text text-anchor="start" x="187.5" y="-346.667" font-family="Times Roman,serif" font-size="10.00">TTCTA</text>
+</g>
+<!-- 3,1->3,3 -->
+<g id="edge81" class="edge"><title>3,1->3,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-357.755C116.207,-357.208 132.729,-357.105 148.049,-357.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-360.957 158.425,-357.756 148.534,-353.96 148.326,-360.957"/>
+</g>
+<!-- 3,3->3,1 -->
+<g id="edge87" class="edge"><title>3,3->3,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-370.244C143.825,-370.792 127.305,-370.895 111.982,-370.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-367.043 101.605,-370.245 111.497,-374.04 111.704,-367.043"/>
+</g>
+<!-- 3,4 -->
+<g id="node21" class="node"><title>3,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-364" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-375.167" font-family="Times Roman,serif" font-size="10.00">3,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-356 303,-370 348,-370 348,-356 303,-356"/>
+<text text-anchor="start" x="307.5" y="-360.667" font-family="Times Roman,serif" font-size="10.00">AGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-342 303,-356 348,-356 348,-342 303,-342"/>
+<text text-anchor="start" x="311" y="-346.667" font-family="Times Roman,serif" font-size="10.00">CTTCT</text>
+</g>
+<!-- 3,3->3,4 -->
+<g id="edge85" class="edge"><title>3,3->3,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-357.562C252.601,-357.258 262.531,-357.176 272.159,-357.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-360.822 282.443,-357.554 272.527,-353.823 272.364,-360.822"/>
+</g>
+<!-- 3,3->6,1 -->
+<g id="edge83" class="edge"><title>3,3->6,1</title>
+<path fill="none" stroke="#dd1e2f" d="M231.334,-389.931C247.997,-404.175 268.832,-421.985 286.618,-437.19"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="284.674,-440.132 294.55,-443.97 289.223,-434.812 284.674,-440.132"/>
+</g>
+<!-- 3,4->3,3 -->
+<g id="edge91" class="edge"><title>3,4->3,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-370.446C273.158,-370.745 263.225,-370.824 253.602,-370.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-367.175 243.327,-370.438 253.242,-374.173 253.407,-367.175"/>
+</g>
+<!-- 3,4->6,2 -->
+<g id="edge89" class="edge"><title>3,4->6,2</title>
+<path fill="none" stroke="#dd1e2f" d="M356.869,-388.787C372.495,-400.941 388,-413 388,-413 388,-413 399.721,-423.953 412.888,-436.256"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="410.59,-438.899 420.286,-443.169 415.369,-433.784 410.59,-438.899"/>
+</g>
+<!-- 2,1 -->
+<g id="node23" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-784" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-795.167" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-776 37,-790 82,-790 82,-776 37,-776"/>
+<text text-anchor="start" x="40" y="-780.667" font-family="Times Roman,serif" font-size="10.00">AATAGA</text>
+<polygon fill="#218559" stroke="#218559" points="37,-762 37,-776 82,-776 82,-762 37,-762"/>
+<text text-anchor="start" x="43" y="-766.667" font-family="Times Roman,serif" font-size="10.00">TCTATT</text>
+</g>
+<!-- 2,3 -->
+<g id="node24" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-784" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-795.167" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-776 179,-790 224,-790 224,-776 179,-776"/>
+<text text-anchor="start" x="185" y="-780.667" font-family="Times Roman,serif" font-size="10.00">TAGAA</text>
+<polygon fill="#218559" stroke="#218559" points="179,-762 179,-776 224,-776 224,-762 179,-762"/>
+<text text-anchor="start" x="187.5" y="-766.667" font-family="Times Roman,serif" font-size="10.00">TTCTA</text>
+</g>
+<!-- 2,1->2,3 -->
+<g id="edge17" class="edge"><title>2,1->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-777.755C116.207,-777.208 132.729,-777.105 148.049,-777.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-780.957 158.425,-777.756 148.534,-773.96 148.326,-780.957"/>
+</g>
+<!-- 2,3->2,1 -->
+<g id="edge23" class="edge"><title>2,3->2,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-790.244C143.825,-790.792 127.305,-790.895 111.982,-790.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-787.043 101.605,-790.245 111.497,-794.04 111.704,-787.043"/>
+</g>
+<!-- 2,4 -->
+<g id="node25" class="node"><title>2,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-784" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-795.167" font-family="Times Roman,serif" font-size="10.00">2,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-776 303,-790 348,-790 348,-776 303,-776"/>
+<text text-anchor="start" x="307.5" y="-780.667" font-family="Times Roman,serif" font-size="10.00">AGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-762 303,-776 348,-776 348,-762 303,-762"/>
+<text text-anchor="start" x="311" y="-766.667" font-family="Times Roman,serif" font-size="10.00">CTTCT</text>
+</g>
+<!-- 2,3->2,4 -->
+<g id="edge21" class="edge"><title>2,3->2,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-777.562C252.601,-777.258 262.531,-777.176 272.159,-777.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-780.822 282.443,-777.554 272.527,-773.823 272.364,-780.822"/>
+</g>
+<!-- 2,3->6,1 -->
+<g id="edge19" class="edge"><title>2,3->6,1</title>
+<path fill="none" stroke="#dd1e2f" d="M227.005,-754.971C236.061,-744.862 244,-736 244,-736 244,-736 274,-544 274,-544 274,-544 285.426,-527.421 297.461,-509.958"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="300.579,-511.603 303.371,-501.383 294.815,-507.63 300.579,-511.603"/>
+</g>
+<!-- 2,4->2,3 -->
+<g id="edge27" class="edge"><title>2,4->2,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-790.446C273.158,-790.745 263.225,-790.824 253.602,-790.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-787.175 243.327,-790.438 253.242,-794.173 253.407,-787.175"/>
+</g>
+<!-- 2,4->6,2 -->
+<g id="edge25" class="edge"><title>2,4->6,2</title>
+<path fill="none" stroke="#dd1e2f" d="M353.779,-756.914C365.347,-746.026 376,-736 376,-736 376,-736 415.795,-590.993 436.621,-515.107"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="440.035,-515.892 439.306,-505.323 433.284,-514.04 440.035,-515.892"/>
+</g>
+<!-- 5,1 -->
+<g id="node27" class="node"><title>5,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-688" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-699.167" font-family="Times Roman,serif" font-size="10.00">5,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-680 37,-694 82,-694 82,-680 37,-680"/>
+<text text-anchor="start" x="40" y="-684.667" font-family="Times Roman,serif" font-size="10.00">AATAGA</text>
+<polygon fill="#218559" stroke="#218559" points="37,-666 37,-680 82,-680 82,-666 37,-666"/>
+<text text-anchor="start" x="43" y="-670.667" font-family="Times Roman,serif" font-size="10.00">TCTATT</text>
+</g>
+<!-- 5,3 -->
+<g id="node28" class="node"><title>5,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-688" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-699.167" font-family="Times Roman,serif" font-size="10.00">5,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-680 179,-694 224,-694 224,-680 179,-680"/>
+<text text-anchor="start" x="185" y="-684.667" font-family="Times Roman,serif" font-size="10.00">TAGAA</text>
+<polygon fill="#218559" stroke="#218559" points="179,-666 179,-680 224,-680 224,-666 179,-666"/>
+<text text-anchor="start" x="187.5" y="-670.667" font-family="Times Roman,serif" font-size="10.00">TTCTA</text>
+</g>
+<!-- 5,1->5,3 -->
+<g id="edge93" class="edge"><title>5,1->5,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-681.755C116.207,-681.208 132.729,-681.105 148.049,-681.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-684.957 158.425,-681.756 148.534,-677.96 148.326,-684.957"/>
+</g>
+<!-- 5,3->5,1 -->
+<g id="edge99" class="edge"><title>5,3->5,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-694.244C143.825,-694.792 127.305,-694.895 111.982,-694.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-691.043 101.605,-694.245 111.497,-698.04 111.704,-691.043"/>
+</g>
+<!-- 5,4 -->
+<g id="node29" class="node"><title>5,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-688" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-699.167" font-family="Times Roman,serif" font-size="10.00">5,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-680 303,-694 348,-694 348,-680 303,-680"/>
+<text text-anchor="start" x="307.5" y="-684.667" font-family="Times Roman,serif" font-size="10.00">AGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-666 303,-680 348,-680 348,-666 303,-666"/>
+<text text-anchor="start" x="311" y="-670.667" font-family="Times Roman,serif" font-size="10.00">CTTCT</text>
+</g>
+<!-- 5,3->5,4 -->
+<g id="edge97" class="edge"><title>5,3->5,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-681.562C252.601,-681.258 262.531,-681.176 272.159,-681.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-684.822 282.443,-681.554 272.527,-677.823 272.364,-684.822"/>
+</g>
+<!-- 5,3->6,1 -->
+<g id="edge95" class="edge"><title>5,3->6,1</title>
+<path fill="none" stroke="#dd1e2f" d="M227.005,-658.971C236.061,-648.862 244,-640 244,-640 244,-640 262,-514 262,-514 262,-514 271.601,-507.295 283.307,-499.119"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="285.461,-501.884 291.655,-493.288 281.453,-496.145 285.461,-501.884"/>
+</g>
+<!-- 5,4->5,3 -->
+<g id="edge103" class="edge"><title>5,4->5,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-694.446C273.158,-694.745 263.225,-694.824 253.602,-694.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-691.175 243.327,-694.438 253.242,-698.173 253.407,-691.175"/>
+</g>
+<!-- 5,4->6,2 -->
+<g id="edge101" class="edge"><title>5,4->6,2</title>
+<path fill="none" stroke="#dd1e2f" d="M353.779,-660.914C365.347,-650.026 376,-640 376,-640 376,-640 408.743,-563.749 430.371,-513.382"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="433.673,-514.563 434.403,-503.993 427.241,-511.801 433.673,-514.563"/>
+</g>
+<!-- 4,1 -->
+<g id="node31" class="node"><title>4,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">4,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-253 37,-267 82,-267 82,-253 37,-253"/>
+<text text-anchor="start" x="40" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AATAGA</text>
+<polygon fill="#218559" stroke="#218559" points="37,-239 37,-253 82,-253 82,-239 37,-239"/>
+<text text-anchor="start" x="43" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TCTATT</text>
+</g>
+<!-- 4,3 -->
+<g id="node32" class="node"><title>4,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">4,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-253 179,-267 224,-267 224,-253 179,-253"/>
+<text text-anchor="start" x="185" y="-257.667" font-family="Times Roman,serif" font-size="10.00">TAGAA</text>
+<polygon fill="#218559" stroke="#218559" points="179,-239 179,-253 224,-253 224,-239 179,-239"/>
+<text text-anchor="start" x="187.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TTCTA</text>
+</g>
+<!-- 4,1->4,3 -->
+<g id="edge29" class="edge"><title>4,1->4,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-254.755C116.207,-254.208 132.729,-254.105 148.049,-254.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-257.957 158.425,-254.756 148.534,-250.96 148.326,-257.957"/>
+</g>
+<!-- 4,3->4,1 -->
+<g id="edge35" class="edge"><title>4,3->4,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-267.244C143.825,-267.792 127.305,-267.895 111.982,-267.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-264.043 101.605,-267.245 111.497,-271.04 111.704,-264.043"/>
+</g>
+<!-- 4,4 -->
+<g id="node33" class="node"><title>4,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">4,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-253 303,-267 348,-267 348,-253 303,-253"/>
+<text text-anchor="start" x="307.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-239 303,-253 348,-253 348,-239 303,-239"/>
+<text text-anchor="start" x="311" y="-243.667" font-family="Times Roman,serif" font-size="10.00">CTTCT</text>
+</g>
+<!-- 4,3->4,4 -->
+<g id="edge33" class="edge"><title>4,3->4,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-254.562C252.601,-254.258 262.531,-254.176 272.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-257.822 282.443,-254.554 272.527,-250.823 272.364,-257.822"/>
+</g>
+<!-- 4,3->6,1 -->
+<g id="edge31" class="edge"><title>4,3->6,1</title>
+<path fill="none" stroke="#dd1e2f" d="M238.738,-278.971C252.156,-285.36 264,-291 264,-291 264,-291 274,-412 274,-412 274,-412 282.58,-421.757 292.701,-433.268"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="290.268,-435.801 299.5,-441 295.525,-431.179 290.268,-435.801"/>
+</g>
+<!-- 4,4->4,3 -->
+<g id="edge39" class="edge"><title>4,4->4,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-267.446C273.158,-267.745 263.225,-267.824 253.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-264.175 243.327,-267.438 253.242,-271.173 253.407,-264.175"/>
+</g>
+<!-- 4,4->6,2 -->
+<g id="edge37" class="edge"><title>4,4->6,2</title>
+<path fill="none" stroke="#dd1e2f" d="M351.718,-289.414C368.888,-307.675 388,-328 388,-328 388,-328 412.394,-384.787 430.33,-426.538"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427.226,-428.181 434.389,-435.987 433.658,-425.418 427.226,-428.181"/>
+</g>
+<!-- 6,1->1,3 -->
+<g id="edge45" class="edge"><title>6,1->1,3</title>
+<path fill="none" stroke="#218559" d="M286.368,-453.534C274.262,-448.374 264,-444 264,-444 264,-444 262,-444 262,-444 262,-444 236.975,-504.716 218.982,-548.371"/>
+<polygon fill="#218559" stroke="#218559" points="215.723,-547.095 215.148,-557.674 222.195,-549.762 215.723,-547.095"/>
+</g>
+<!-- 6,1->3,3 -->
+<g id="edge47" class="edge"><title>6,1->3,3</title>
+<path fill="none" stroke="#218559" d="M299.5,-441C286.75,-426.5 274,-412 274,-412 274,-412 264,-394 264,-394 264,-394 257.057,-390.694 247.781,-386.277"/>
+<polygon fill="#218559" stroke="#218559" points="249.272,-383.11 238.738,-381.971 246.262,-389.43 249.272,-383.11"/>
+</g>
+<!-- 6,1->2,3 -->
+<g id="edge43" class="edge"><title>6,1->2,3</title>
+<path fill="none" stroke="#218559" d="M303.371,-501.383C289.471,-521.551 274,-544 274,-544 274,-544 264,-629 264,-629 264,-629 244,-736 244,-736 244,-736 239.718,-740.779 233.857,-747.322"/>
+<polygon fill="#218559" stroke="#218559" points="231.071,-745.187 227.005,-754.971 236.285,-749.858 231.071,-745.187"/>
+</g>
+<!-- 6,1->5,3 -->
+<g id="edge51" class="edge"><title>6,1->5,3</title>
+<path fill="none" stroke="#218559" d="M293.131,-494.787C277.505,-506.941 262,-519 262,-519 262,-519 244,-640 244,-640 244,-640 239.718,-644.779 233.857,-651.322"/>
+<polygon fill="#218559" stroke="#218559" points="231.071,-649.187 227.005,-658.971 236.285,-653.858 231.071,-649.187"/>
+</g>
+<!-- 6,1->4,3 -->
+<g id="edge49" class="edge"><title>6,1->4,3</title>
+<path fill="none" stroke="#218559" d="M299.5,-441C286.75,-426.5 274,-412 274,-412 274,-412 264,-316 264,-316 264,-316 252.076,-305.59 238.602,-293.828"/>
+<polygon fill="#218559" stroke="#218559" points="240.859,-291.151 231.024,-287.211 236.255,-296.425 240.859,-291.151"/>
+</g>
+<!-- 6,1->6,2 -->
+<g id="edge41" class="edge"><title>6,1->6,2</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-463.562C376.601,-463.258 386.531,-463.176 396.159,-463.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-466.822 406.443,-463.554 396.527,-459.823 396.364,-466.822"/>
+</g>
+<!-- 6,2->1,4 -->
+<g id="edge59" class="edge"><title>6,2->1,4</title>
+<path fill="none" stroke="#218559" d="M423.484,-499.281C406.768,-518.463 388,-540 388,-540 388,-540 376.647,-549.37 363.581,-560.156"/>
+<polygon fill="#218559" stroke="#218559" points="361.246,-557.544 355.762,-566.609 365.702,-562.943 361.246,-557.544"/>
+</g>
+<!-- 6,2->3,4 -->
+<g id="edge57" class="edge"><title>6,2->3,4</title>
+<path fill="none" stroke="#218559" d="M418.55,-443.97C401.867,-429.709 381.029,-411.896 363.258,-396.704"/>
+<polygon fill="#218559" stroke="#218559" points="365.209,-393.768 355.334,-389.931 360.661,-399.089 365.209,-393.768"/>
+</g>
+<!-- 6,2->2,4 -->
+<g id="edge55" class="edge"><title>6,2->2,4</title>
+<path fill="none" stroke="#218559" d="M440.065,-505.448C423.121,-572.666 388,-712 388,-712 388,-712 376,-736 376,-736 376,-736 369.498,-742.119 361.137,-749.988"/>
+<polygon fill="#218559" stroke="#218559" points="358.662,-747.511 353.779,-756.914 363.46,-752.609 358.662,-747.511"/>
+</g>
+<!-- 6,2->5,4 -->
+<g id="edge63" class="edge"><title>6,2->5,4</title>
+<path fill="none" stroke="#218559" d="M436.802,-504.593C419.001,-555.079 388,-643 388,-643 388,-643 378.242,-649.97 366.407,-658.423"/>
+<polygon fill="#218559" stroke="#218559" points="364.079,-655.785 357.976,-664.446 368.148,-661.481 364.079,-655.785"/>
+</g>
+<!-- 6,2->4,4 -->
+<g id="edge61" class="edge"><title>6,2->4,4</title>
+<path fill="none" stroke="#218559" d="M432.98,-436.204C411.491,-390.872 376,-316 376,-316 376,-316 367.973,-307.343 358.288,-296.899"/>
+<polygon fill="#218559" stroke="#218559" points="360.762,-294.42 351.396,-289.467 355.629,-299.179 360.762,-294.42"/>
+</g>
+<!-- 6,2->6,1 -->
+<g id="edge65" class="edge"><title>6,2->6,1</title>
+<path fill="none" stroke="#218559" d="M406.443,-476.446C397.158,-476.745 387.225,-476.824 377.602,-476.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-473.175 367.327,-476.438 377.242,-480.173 377.407,-473.175"/>
+</g>
+<!-- 6,3 -->
+<g id="node37" class="node"><title>6,3</title>
+<ellipse fill="none" stroke="black" cx="576" cy="-470" rx="44.7575" ry="36.0624"/>
+<text text-anchor="start" x="556" y="-481.167" font-family="Times Roman,serif" font-size="10.00">6,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="552,-462 552,-476 600,-476 600,-462 552,-462"/>
+<text text-anchor="start" x="554.5" y="-466.667" font-family="Times Roman,serif" font-size="10.00">AAGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="552,-448 552,-462 600,-462 600,-448 552,-448"/>
+<text text-anchor="start" x="559" y="-452.667" font-family="Times Roman,serif" font-size="10.00">CTTCTT</text>
+</g>
+<!-- 6,2->6,3 -->
+<g id="edge53" class="edge"><title>6,2->6,3</title>
+<path fill="none" stroke="#dd1e2f" d="M491.642,-463.585C501.055,-463.272 511.156,-463.178 520.981,-463.304"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="521.009,-466.805 531.079,-463.514 521.154,-459.807 521.009,-466.805"/>
+</g>
+<!-- 6,3->6,2 -->
+<g id="edge67" class="edge"><title>6,3->6,2</title>
+<path fill="none" stroke="#218559" d="M531.079,-476.486C521.562,-476.764 511.435,-476.822 501.66,-476.662"/>
+<polygon fill="#218559" stroke="#218559" points="501.726,-473.162 491.642,-476.415 501.553,-480.16 501.726,-473.162"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_length1_bubbles/.part-0.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_length1_bubbles/.part-0.crc
new file mode 100644
index 0000000..53d1b45
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_length1_bubbles/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_length1_bubbles/.part-1.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_length1_bubbles/.part-1.crc
new file mode 100644
index 0000000..e76bd36
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_length1_bubbles/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_length1_bubbles/part-0 b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_length1_bubbles/part-0
new file mode 100755
index 0000000..a9f8b84
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_length1_bubbles/part-0
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_length1_bubbles/part-1 b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_length1_bubbles/part-1
new file mode 100755
index 0000000..f1c5899
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/five_length1_bubbles/part-1
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble.txt b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble.txt
new file mode 100644
index 0000000..51cc418
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble.txt
@@ -0,0 +1,6 @@
+((2,1) [(2,3)] [] [] [] GGAATA) (null)
+((2,3) [(2,4)] [(1,4)] [] [(2,1)] AATAC) (null)
+((2,4) [] [(1,3)] [] [(2,3)] ATACG) (null)
+((1,1) [(1,3)] [] [] [] AAACGT) (null)
+((1,3) [(1,4)] [(2,4)] [] [(1,1)] ACGTA) (null)
+((1,4) [] [(2,3)] [] [(1,3)] CGTAT) (null)
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble.txt.svg b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble.txt.svg
new file mode 100644
index 0000000..cb1b291
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble.txt.svg
@@ -0,0 +1,193 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: fr_bubble_txt Pages: 1 -->
+<svg width="392pt" height="428pt"
+ viewBox="0.00 0.00 392.00 428.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 424)">
+<title>fr_bubble_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-424 389,-424 389,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-8 49,-209 211,-209 211,-8 49,-8"/>
+<text text-anchor="middle" x="130" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-217 8,-305 376,-305 376,-217 8,-217"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="8,-324 8,-412 376,-412 376,-324 8,-324"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.0074,-157C74.8673,-157 156.744,-157 188.46,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="188.862,-160.5 198.862,-157 188.861,-153.5 188.862,-160.5"/>
+<text text-anchor="middle" x="130" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.0074,-116C74.8673,-116 156.744,-116 188.46,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="188.862,-119.5 198.862,-116 188.861,-112.5 188.862,-119.5"/>
+<text text-anchor="middle" x="130" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.0074,-75C74.8673,-75 156.744,-75 188.46,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="188.862,-78.5001 198.862,-75 188.861,-71.5001 188.862,-78.5001"/>
+<text text-anchor="middle" x="130" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.0074,-34C74.8673,-34 156.744,-34 188.46,-34"/>
+<polygon fill="#218559" stroke="#218559" points="188.862,-37.5001 198.862,-34 188.861,-30.5001 188.862,-37.5001"/>
+<text text-anchor="middle" x="130" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-253 37,-267 82,-267 82,-253 37,-253"/>
+<text text-anchor="start" x="39.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AAACGT</text>
+<polygon fill="#218559" stroke="#218559" points="37,-239 37,-253 82,-253 82,-239 37,-239"/>
+<text text-anchor="start" x="41.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">ACGTTT</text>
+</g>
+<!-- 1,3 -->
+<g id="node16" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-253 179,-267 224,-267 224,-253 179,-253"/>
+<text text-anchor="start" x="185.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">ACGTA</text>
+<polygon fill="#218559" stroke="#218559" points="179,-239 179,-253 224,-253 224,-239 179,-239"/>
+<text text-anchor="start" x="186" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TACGT</text>
+</g>
+<!-- 1,1->1,3 -->
+<g id="edge25" class="edge"><title>1,1->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-254.755C116.207,-254.208 132.729,-254.105 148.049,-254.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-257.957 158.425,-254.756 148.534,-250.96 148.326,-257.957"/>
+</g>
+<!-- 1,3->1,1 -->
+<g id="edge31" class="edge"><title>1,3->1,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-267.244C143.825,-267.792 127.305,-267.895 111.982,-267.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-264.043 101.605,-267.245 111.497,-271.04 111.704,-264.043"/>
+</g>
+<!-- 1,4 -->
+<g id="node17" class="node"><title>1,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-253 303,-267 348,-267 348,-253 303,-253"/>
+<text text-anchor="start" x="310.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">CGTAT</text>
+<polygon fill="#218559" stroke="#218559" points="303,-239 303,-253 348,-253 348,-239 303,-239"/>
+<text text-anchor="start" x="309.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">ATACG</text>
+</g>
+<!-- 1,3->1,4 -->
+<g id="edge27" class="edge"><title>1,3->1,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-254.562C252.601,-254.258 262.531,-254.176 272.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-257.822 282.443,-254.554 272.527,-250.823 272.364,-257.822"/>
+</g>
+<!-- 2,4 -->
+<g id="node21" class="node"><title>2,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-368" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-379.167" font-family="Times Roman,serif" font-size="10.00">2,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-360 303,-374 348,-374 348,-360 303,-360"/>
+<text text-anchor="start" x="309.5" y="-364.667" font-family="Times Roman,serif" font-size="10.00">ATACG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-346 303,-360 348,-360 348,-346 303,-346"/>
+<text text-anchor="start" x="310.5" y="-350.667" font-family="Times Roman,serif" font-size="10.00">CGTAT</text>
+</g>
+<!-- 1,3->2,4 -->
+<g id="edge29" class="edge"><title>1,3->2,4</title>
+<path fill="none" stroke="#ebb035" d="M231.017,-286.902C247.799,-301.383 268.887,-319.58 286.832,-335.065"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="284.972,-338.083 294.83,-341.966 289.545,-332.783 284.972,-338.083"/>
+</g>
+<!-- 1,4->1,3 -->
+<g id="edge35" class="edge"><title>1,4->1,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-267.446C273.158,-267.745 263.225,-267.824 253.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-264.175 243.327,-267.438 253.242,-271.173 253.407,-264.175"/>
+</g>
+<!-- 2,3 -->
+<g id="node20" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-368" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-379.167" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-360 179,-374 224,-374 224,-360 179,-360"/>
+<text text-anchor="start" x="186" y="-364.667" font-family="Times Roman,serif" font-size="10.00">AATAC</text>
+<polygon fill="#218559" stroke="#218559" points="179,-346 179,-360 224,-360 224,-346 179,-346"/>
+<text text-anchor="start" x="187.5" y="-350.667" font-family="Times Roman,serif" font-size="10.00">GTATT</text>
+</g>
+<!-- 1,4->2,3 -->
+<g id="edge33" class="edge"><title>1,4->2,3</title>
+<path fill="none" stroke="#ebb035" d="M299.131,-289.838C282.505,-308.372 264,-329 264,-329 264,-329 255.356,-334.351 244.471,-341.089"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="242.477,-338.207 235.817,-346.447 246.162,-344.159 242.477,-338.207"/>
+</g>
+<!-- 2,1 -->
+<g id="node19" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-368" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-379.167" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-360 37,-374 82,-374 82,-360 37,-360"/>
+<text text-anchor="start" x="39.5" y="-364.667" font-family="Times Roman,serif" font-size="10.00">GGAATA</text>
+<polygon fill="#218559" stroke="#218559" points="37,-346 37,-360 82,-360 82,-346 37,-346"/>
+<text text-anchor="start" x="42.5" y="-350.667" font-family="Times Roman,serif" font-size="10.00">TATTCC</text>
+</g>
+<!-- 2,1->2,3 -->
+<g id="edge13" class="edge"><title>2,1->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-361.755C116.207,-361.208 132.729,-361.105 148.049,-361.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-364.957 158.425,-361.756 148.534,-357.96 148.326,-364.957"/>
+</g>
+<!-- 2,3->1,4 -->
+<g id="edge17" class="edge"><title>2,3->1,4</title>
+<path fill="none" stroke="#ebb035" d="M231.017,-342.098C247.799,-327.617 268.887,-309.42 286.832,-293.935"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="289.545,-296.217 294.83,-287.034 284.972,-290.917 289.545,-296.217"/>
+</g>
+<!-- 2,3->2,1 -->
+<g id="edge19" class="edge"><title>2,3->2,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-374.244C143.825,-374.792 127.305,-374.895 111.982,-374.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-371.043 101.605,-374.245 111.497,-378.04 111.704,-371.043"/>
+</g>
+<!-- 2,3->2,4 -->
+<g id="edge15" class="edge"><title>2,3->2,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-361.562C252.601,-361.258 262.531,-361.176 272.159,-361.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-364.822 282.443,-361.554 272.527,-357.823 272.364,-364.822"/>
+</g>
+<!-- 2,4->1,3 -->
+<g id="edge21" class="edge"><title>2,4->1,3</title>
+<path fill="none" stroke="#ebb035" d="M294.83,-341.966C278.021,-327.462 256.93,-309.262 239.004,-293.794"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="240.875,-290.785 231.017,-286.902 236.301,-296.085 240.875,-290.785"/>
+</g>
+<!-- 2,4->2,3 -->
+<g id="edge23" class="edge"><title>2,4->2,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-374.446C273.158,-374.745 263.225,-374.824 253.602,-374.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-371.175 243.327,-374.438 253.242,-378.173 253.407,-371.175"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble/.part-0.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble/.part-0.crc
new file mode 100644
index 0000000..d40cb4d
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble/.part-1.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble/.part-1.crc
new file mode 100644
index 0000000..bc50c79
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble/part-0 b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble/part-0
new file mode 100755
index 0000000..f757027
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble/part-0
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble/part-1 b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble/part-1
new file mode 100755
index 0000000..96978fb
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble/part-1
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble_and_ff_bubble.txt b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble_and_ff_bubble.txt
new file mode 100644
index 0000000..b53f352
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble_and_ff_bubble.txt
@@ -0,0 +1,9 @@
+((2,1) [(2,2)] [] [] [(1,3)] CGTAT) (null)
+((2,2) [(2,3)] [] [] [(1,4),(2,1)] GTATT) (null)
+((2,3) [] [] [] [(2,2)] TATTCC) (null)
+((1,1) [(1,3)] [] [] [] AAACGT) (null)
+((1,3) [(2,1),(1,4)] [(3,4)] [] [(1,1)] ACGTA) (null)
+((1,4) [(2,2)] [(3,3)] [] [(1,3)] CGTAT) (null)
+((3,1) [(3,3)] [] [] [] GGAATA) (null)
+((3,3) [(3,4)] [(1,4)] [] [(3,1)] AATAC) (null)
+((3,4) [] [(1,3)] [] [(3,3)] ATACG) (null)
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble_and_ff_bubble.txt.svg b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble_and_ff_bubble.txt.svg
new file mode 100644
index 0000000..aaaab66
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble_and_ff_bubble.txt.svg
@@ -0,0 +1,263 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: fr_bubble_and_ff_bubble_txt Pages: 1 -->
+<svg width="640pt" height="428pt"
+ viewBox="0.00 0.00 640.00 428.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 424)">
+<title>fr_bubble_and_ff_bubble_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-424 637,-424 637,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-8 49,-209 211,-209 211,-8 49,-8"/>
+<text text-anchor="middle" x="130" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-217 8,-305 376,-305 376,-217 8,-217"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_3</title>
+<polygon fill="none" stroke="black" points="8,-324 8,-412 376,-412 376,-324 8,-324"/>
+</g>
+<g id="graph5" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="274,-121 274,-209 624,-209 624,-121 274,-121"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.0074,-157C74.8673,-157 156.744,-157 188.46,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="188.862,-160.5 198.862,-157 188.861,-153.5 188.862,-160.5"/>
+<text text-anchor="middle" x="130" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.0074,-116C74.8673,-116 156.744,-116 188.46,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="188.862,-119.5 198.862,-116 188.861,-112.5 188.862,-119.5"/>
+<text text-anchor="middle" x="130" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.0074,-75C74.8673,-75 156.744,-75 188.46,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="188.862,-78.5001 198.862,-75 188.861,-71.5001 188.862,-78.5001"/>
+<text text-anchor="middle" x="130" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.0074,-34C74.8673,-34 156.744,-34 188.46,-34"/>
+<polygon fill="#218559" stroke="#218559" points="188.862,-37.5001 198.862,-34 188.861,-30.5001 188.862,-37.5001"/>
+<text text-anchor="middle" x="130" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-253 37,-267 82,-267 82,-253 37,-253"/>
+<text text-anchor="start" x="39.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AAACGT</text>
+<polygon fill="#218559" stroke="#218559" points="37,-239 37,-253 82,-253 82,-239 37,-239"/>
+<text text-anchor="start" x="41.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">ACGTTT</text>
+</g>
+<!-- 1,3 -->
+<g id="node16" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-253 179,-267 224,-267 224,-253 179,-253"/>
+<text text-anchor="start" x="185.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">ACGTA</text>
+<polygon fill="#218559" stroke="#218559" points="179,-239 179,-253 224,-253 224,-239 179,-239"/>
+<text text-anchor="start" x="186" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TACGT</text>
+</g>
+<!-- 1,1->1,3 -->
+<g id="edge26" class="edge"><title>1,1->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-254.755C116.207,-254.208 132.729,-254.105 148.049,-254.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-257.957 158.425,-254.756 148.534,-250.96 148.326,-257.957"/>
+</g>
+<!-- 1,3->1,1 -->
+<g id="edge34" class="edge"><title>1,3->1,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-267.244C143.825,-267.792 127.305,-267.895 111.982,-267.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-264.043 101.605,-267.245 111.497,-271.04 111.704,-264.043"/>
+</g>
+<!-- 1,4 -->
+<g id="node17" class="node"><title>1,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-253 303,-267 348,-267 348,-253 303,-253"/>
+<text text-anchor="start" x="310.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">CGTAT</text>
+<polygon fill="#218559" stroke="#218559" points="303,-239 303,-253 348,-253 348,-239 303,-239"/>
+<text text-anchor="start" x="309.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">ATACG</text>
+</g>
+<!-- 1,3->1,4 -->
+<g id="edge30" class="edge"><title>1,3->1,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-254.562C252.601,-254.258 262.531,-254.176 272.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-257.822 282.443,-254.554 272.527,-250.823 272.364,-257.822"/>
+</g>
+<!-- 3,4 -->
+<g id="node21" class="node"><title>3,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-368" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-379.167" font-family="Times Roman,serif" font-size="10.00">3,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-360 303,-374 348,-374 348,-360 303,-360"/>
+<text text-anchor="start" x="309.5" y="-364.667" font-family="Times Roman,serif" font-size="10.00">ATACG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-346 303,-360 348,-360 348,-346 303,-346"/>
+<text text-anchor="start" x="310.5" y="-350.667" font-family="Times Roman,serif" font-size="10.00">CGTAT</text>
+</g>
+<!-- 1,3->3,4 -->
+<g id="edge32" class="edge"><title>1,3->3,4</title>
+<path fill="none" stroke="#ebb035" d="M231.017,-286.902C247.799,-301.383 268.887,-319.58 286.832,-335.065"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="284.972,-338.083 294.83,-341.966 289.545,-332.783 284.972,-338.083"/>
+</g>
+<!-- 2,1 -->
+<g id="node23" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-165" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-157 303,-171 348,-171 348,-157 303,-157"/>
+<text text-anchor="start" x="310.5" y="-161.667" font-family="Times Roman,serif" font-size="10.00">CGTAT</text>
+<polygon fill="#218559" stroke="#218559" points="303,-143 303,-157 348,-157 348,-143 303,-143"/>
+<text text-anchor="start" x="309.5" y="-147.667" font-family="Times Roman,serif" font-size="10.00">ATACG</text>
+</g>
+<!-- 1,3->2,1 -->
+<g id="edge28" class="edge"><title>1,3->2,1</title>
+<path fill="none" stroke="#dd1e2f" d="M234.345,-237.712C249.456,-227.157 264,-217 264,-217 264,-217 274.622,-207.945 287.002,-197.392"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="289.518,-199.846 294.857,-190.695 284.977,-194.519 289.518,-199.846"/>
+</g>
+<!-- 1,4->1,3 -->
+<g id="edge40" class="edge"><title>1,4->1,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-267.446C273.158,-267.745 263.225,-267.824 253.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-264.175 243.327,-267.438 253.242,-271.173 253.407,-264.175"/>
+</g>
+<!-- 3,3 -->
+<g id="node20" class="node"><title>3,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-368" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-379.167" font-family="Times Roman,serif" font-size="10.00">3,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-360 179,-374 224,-374 224,-360 179,-360"/>
+<text text-anchor="start" x="186" y="-364.667" font-family="Times Roman,serif" font-size="10.00">AATAC</text>
+<polygon fill="#218559" stroke="#218559" points="179,-346 179,-360 224,-360 224,-346 179,-346"/>
+<text text-anchor="start" x="187.5" y="-350.667" font-family="Times Roman,serif" font-size="10.00">GTATT</text>
+</g>
+<!-- 1,4->3,3 -->
+<g id="edge38" class="edge"><title>1,4->3,3</title>
+<path fill="none" stroke="#ebb035" d="M294.83,-287.034C278.021,-301.538 256.93,-319.738 239.004,-335.206"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="236.301,-332.915 231.017,-342.098 240.875,-338.215 236.301,-332.915"/>
+</g>
+<!-- 2,2 -->
+<g id="node24" class="node"><title>2,2</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-165" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">2,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-157 427,-171 472,-171 472,-157 427,-157"/>
+<text text-anchor="start" x="435.5" y="-161.667" font-family="Times Roman,serif" font-size="10.00">GTATT</text>
+<polygon fill="#218559" stroke="#218559" points="427,-143 427,-157 472,-157 472,-143 427,-143"/>
+<text text-anchor="start" x="434" y="-147.667" font-family="Times Roman,serif" font-size="10.00">AATAC</text>
+</g>
+<!-- 1,4->2,2 -->
+<g id="edge36" class="edge"><title>1,4->2,2</title>
+<path fill="none" stroke="#dd1e2f" d="M356.611,-236.527C372.521,-224.21 391.976,-209.147 408.924,-196.027"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="411.251,-198.652 417.015,-189.762 406.965,-193.117 411.251,-198.652"/>
+</g>
+<!-- 3,1 -->
+<g id="node19" class="node"><title>3,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-368" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-379.167" font-family="Times Roman,serif" font-size="10.00">3,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-360 37,-374 82,-374 82,-360 37,-360"/>
+<text text-anchor="start" x="39.5" y="-364.667" font-family="Times Roman,serif" font-size="10.00">GGAATA</text>
+<polygon fill="#218559" stroke="#218559" points="37,-346 37,-360 82,-360 82,-346 37,-346"/>
+<text text-anchor="start" x="42.5" y="-350.667" font-family="Times Roman,serif" font-size="10.00">TATTCC</text>
+</g>
+<!-- 3,1->3,3 -->
+<g id="edge42" class="edge"><title>3,1->3,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-361.755C116.207,-361.208 132.729,-361.105 148.049,-361.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-364.957 158.425,-361.756 148.534,-357.96 148.326,-364.957"/>
+</g>
+<!-- 3,3->1,4 -->
+<g id="edge46" class="edge"><title>3,3->1,4</title>
+<path fill="none" stroke="#ebb035" d="M229.714,-341.169C245.535,-326.385 262,-311 262,-311 262,-311 272.801,-302.428 285.459,-292.382"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="287.843,-294.958 293.5,-286 283.491,-289.475 287.843,-294.958"/>
+</g>
+<!-- 3,3->3,1 -->
+<g id="edge48" class="edge"><title>3,3->3,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-374.244C143.825,-374.792 127.305,-374.895 111.982,-374.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-371.043 101.605,-374.245 111.497,-378.04 111.704,-371.043"/>
+</g>
+<!-- 3,3->3,4 -->
+<g id="edge44" class="edge"><title>3,3->3,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-361.562C252.601,-361.258 262.531,-361.176 272.159,-361.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-364.822 282.443,-361.554 272.527,-357.823 272.364,-364.822"/>
+</g>
+<!-- 3,4->1,3 -->
+<g id="edge50" class="edge"><title>3,4->1,3</title>
+<path fill="none" stroke="#ebb035" d="M294.83,-341.966C278.021,-327.462 256.93,-309.262 239.004,-293.794"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="240.875,-290.785 231.017,-286.902 236.301,-296.085 240.875,-290.785"/>
+</g>
+<!-- 3,4->3,3 -->
+<g id="edge52" class="edge"><title>3,4->3,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-374.446C273.158,-374.745 263.225,-374.824 253.602,-374.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-371.175 243.327,-374.438 253.242,-378.173 253.407,-371.175"/>
+</g>
+<!-- 2,1->1,3 -->
+<g id="edge16" class="edge"><title>2,1->1,3</title>
+<path fill="none" stroke="#218559" d="M293.015,-189.762C277.051,-202.122 257.587,-217.19 240.68,-230.28"/>
+<polygon fill="#218559" stroke="#218559" points="238.376,-227.637 232.611,-236.527 242.661,-233.173 238.376,-227.637"/>
+</g>
+<!-- 2,1->2,2 -->
+<g id="edge14" class="edge"><title>2,1->2,2</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-158.562C376.601,-158.258 386.531,-158.176 396.159,-158.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-161.822 406.443,-158.554 396.527,-154.823 396.364,-161.822"/>
+</g>
+<!-- 2,2->1,4 -->
+<g id="edge20" class="edge"><title>2,2->1,4</title>
+<path fill="none" stroke="#218559" d="M420.286,-191.831C404.465,-206.615 388,-222 388,-222 388,-222 379.356,-227.351 368.471,-234.089"/>
+<polygon fill="#218559" stroke="#218559" points="366.477,-231.207 359.817,-239.447 370.162,-237.159 366.477,-231.207"/>
+</g>
+<!-- 2,2->2,1 -->
+<g id="edge22" class="edge"><title>2,2->2,1</title>
+<path fill="none" stroke="#218559" d="M406.443,-171.446C397.158,-171.745 387.225,-171.824 377.602,-171.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-168.175 367.327,-171.438 377.242,-175.173 377.407,-168.175"/>
+</g>
+<!-- 2,3 -->
+<g id="node25" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="573" cy="-165" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="553.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="551,-157 551,-171 596,-171 596,-157 551,-157"/>
+<text text-anchor="start" x="556.5" y="-161.667" font-family="Times Roman,serif" font-size="10.00">TATTCC</text>
+<polygon fill="#218559" stroke="#218559" points="551,-143 551,-157 596,-157 596,-143 551,-143"/>
+<text text-anchor="start" x="553.5" y="-147.667" font-family="Times Roman,serif" font-size="10.00">GGAATA</text>
+</g>
+<!-- 2,2->2,3 -->
+<g id="edge18" class="edge"><title>2,2->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M491.327,-158.562C500.601,-158.258 510.531,-158.176 520.159,-158.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="520.364,-161.822 530.443,-158.554 520.527,-154.823 520.364,-161.822"/>
+</g>
+<!-- 2,3->2,2 -->
+<g id="edge24" class="edge"><title>2,3->2,2</title>
+<path fill="none" stroke="#218559" d="M530.443,-171.446C521.158,-171.745 511.225,-171.824 501.602,-171.681"/>
+<polygon fill="#218559" stroke="#218559" points="501.407,-168.175 491.327,-171.438 501.242,-175.173 501.407,-168.175"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/.part-0.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/.part-0.crc
new file mode 100644
index 0000000..528de91
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/.part-1.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/.part-1.crc
new file mode 100644
index 0000000..247ccc7
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/part-0 b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/part-0
new file mode 100755
index 0000000..795d0c7
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/part-0
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/part-1 b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/part-1
new file mode 100755
index 0000000..8a0aa72
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/part-1
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/rf_bubble.txt b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/rf_bubble.txt
new file mode 100644
index 0000000..8b0bfdc
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/rf_bubble.txt
@@ -0,0 +1,6 @@
+((2,1) [(2,2)] [] [(1,2)] [] ACCGT) (null)
+((2,2) [(2,3)] [] [(1,1)] [(2,1)] CCGTG) (null)
+((2,3) [] [] [] [(2,2)] CGTGGT) (null)
+((1,1) [(1,2)] [] [(2,2)] [] ACGGT) (null)
+((1,2) [(1,3)] [] [(2,1)] [(1,1)] CGGTG) (null)
+((1,3) [] [] [] [(1,2)] GGTGTA) (null)
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/rf_bubble.txt.svg b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/rf_bubble.txt.svg
new file mode 100644
index 0000000..80a9d65
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/rf_bubble.txt.svg
@@ -0,0 +1,193 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: rf_bubble_txt Pages: 1 -->
+<svg width="394pt" height="428pt"
+ viewBox="0.00 0.00 394.00 428.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 424)">
+<title>rf_bubble_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-424 391,-424 391,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-8 49,-209 211,-209 211,-8 49,-8"/>
+<text text-anchor="middle" x="130" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-324 8,-412 377,-412 377,-324 8,-324"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="8,-217 8,-305 378,-305 378,-217 8,-217"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.0074,-157C74.8673,-157 156.744,-157 188.46,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="188.862,-160.5 198.862,-157 188.861,-153.5 188.862,-160.5"/>
+<text text-anchor="middle" x="130" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.0074,-116C74.8673,-116 156.744,-116 188.46,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="188.862,-119.5 198.862,-116 188.861,-112.5 188.862,-119.5"/>
+<text text-anchor="middle" x="130" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.0074,-75C74.8673,-75 156.744,-75 188.46,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="188.862,-78.5001 198.862,-75 188.861,-71.5001 188.862,-78.5001"/>
+<text text-anchor="middle" x="130" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.0074,-34C74.8673,-34 156.744,-34 188.46,-34"/>
+<polygon fill="#218559" stroke="#218559" points="188.862,-37.5001 198.862,-34 188.861,-30.5001 188.862,-37.5001"/>
+<text text-anchor="middle" x="130" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-368" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-379.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-360 37,-374 82,-374 82,-360 37,-360"/>
+<text text-anchor="start" x="42.5" y="-364.667" font-family="Times Roman,serif" font-size="10.00">ACGGT</text>
+<polygon fill="#218559" stroke="#218559" points="37,-346 37,-360 82,-360 82,-346 37,-346"/>
+<text text-anchor="start" x="43" y="-350.667" font-family="Times Roman,serif" font-size="10.00">ACCGT</text>
+</g>
+<!-- 1,2 -->
+<g id="node16" class="node"><title>1,2</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-368" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-379.167" font-family="Times Roman,serif" font-size="10.00">1,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-360 179,-374 224,-374 224,-360 179,-360"/>
+<text text-anchor="start" x="184" y="-364.667" font-family="Times Roman,serif" font-size="10.00">CGGTG</text>
+<polygon fill="#218559" stroke="#218559" points="179,-346 179,-360 224,-360 224,-346 179,-346"/>
+<text text-anchor="start" x="184" y="-350.667" font-family="Times Roman,serif" font-size="10.00">CACCG</text>
+</g>
+<!-- 1,1->1,2 -->
+<g id="edge25" class="edge"><title>1,1->1,2</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-361.755C116.207,-361.208 132.729,-361.105 148.049,-361.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-364.957 158.425,-361.756 148.534,-357.96 148.326,-364.957"/>
+</g>
+<!-- 2,2 -->
+<g id="node20" class="node"><title>2,2</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-253 179,-267 224,-267 224,-253 179,-253"/>
+<text text-anchor="start" x="184.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">CCGTG</text>
+<polygon fill="#218559" stroke="#218559" points="179,-239 179,-253 224,-253 224,-239 179,-239"/>
+<text text-anchor="start" x="183.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">CACGG</text>
+</g>
+<!-- 1,1->2,2 -->
+<g id="edge27" class="edge"><title>1,1->2,2</title>
+<path fill="none" stroke="#06a2cb" d="M96.6544,-349.87C117.807,-339.685 140,-329 140,-329 140,-329 153.666,-313.765 168.062,-297.718"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="171.058,-299.619 175.131,-289.838 165.848,-294.945 171.058,-299.619"/>
+</g>
+<!-- 1,2->1,1 -->
+<g id="edge33" class="edge"><title>1,2->1,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-374.244C143.825,-374.792 127.305,-374.895 111.982,-374.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-371.043 101.605,-374.245 111.497,-378.04 111.704,-371.043"/>
+</g>
+<!-- 1,3 -->
+<g id="node17" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="326" cy="-368" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="306.5" y="-379.167" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="304,-360 304,-374 349,-374 349,-360 304,-360"/>
+<text text-anchor="start" x="307" y="-364.667" font-family="Times Roman,serif" font-size="10.00">GGTGTA</text>
+<polygon fill="#218559" stroke="#218559" points="304,-346 304,-360 349,-360 349,-346 304,-346"/>
+<text text-anchor="start" x="307.5" y="-350.667" font-family="Times Roman,serif" font-size="10.00">TACACC</text>
+</g>
+<!-- 1,2->1,3 -->
+<g id="edge29" class="edge"><title>1,2->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M243.319,-361.573C252.886,-361.256 263.16,-361.173 273.102,-361.321"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="273.22,-364.825 283.299,-361.561 273.384,-357.827 273.22,-364.825"/>
+</g>
+<!-- 2,1 -->
+<g id="node19" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-253 37,-267 82,-267 82,-253 37,-253"/>
+<text text-anchor="start" x="43" y="-257.667" font-family="Times Roman,serif" font-size="10.00">ACCGT</text>
+<polygon fill="#218559" stroke="#218559" points="37,-239 37,-253 82,-253 82,-239 37,-239"/>
+<text text-anchor="start" x="42.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">ACGGT</text>
+</g>
+<!-- 1,2->2,1 -->
+<g id="edge31" class="edge"><title>1,2->2,1</title>
+<path fill="none" stroke="#06a2cb" d="M167.587,-345.312C145.274,-330.161 120,-313 120,-313 120,-313 109.378,-303.945 96.9983,-293.392"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="99.0233,-290.519 89.1426,-286.695 94.4822,-295.846 99.0233,-290.519"/>
+</g>
+<!-- 1,3->1,2 -->
+<g id="edge35" class="edge"><title>1,3->1,2</title>
+<path fill="none" stroke="#218559" d="M283.299,-374.439C273.713,-374.75 263.434,-374.828 253.502,-374.673"/>
+<polygon fill="#218559" stroke="#218559" points="253.401,-371.169 243.319,-374.427 253.232,-378.167 253.401,-371.169"/>
+</g>
+<!-- 2,1->1,2 -->
+<g id="edge15" class="edge"><title>2,1->1,2</title>
+<path fill="none" stroke="#06a2cb" d="M92.4134,-283.688C114.726,-298.839 140,-316 140,-316 140,-316 150.622,-325.055 163.002,-335.608"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="160.977,-338.481 170.857,-342.305 165.518,-333.154 160.977,-338.481"/>
+</g>
+<!-- 2,1->2,2 -->
+<g id="edge13" class="edge"><title>2,1->2,2</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-254.755C116.207,-254.208 132.729,-254.105 148.049,-254.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-257.957 158.425,-254.756 148.534,-250.96 148.326,-257.957"/>
+</g>
+<!-- 2,2->1,1 -->
+<g id="edge19" class="edge"><title>2,2->1,1</title>
+<path fill="none" stroke="#06a2cb" d="M168.906,-285.184C148.325,-300.692 121.435,-320.954 99.4042,-337.555"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="97.1022,-334.907 91.222,-343.72 101.315,-340.497 97.1022,-334.907"/>
+</g>
+<!-- 2,2->2,1 -->
+<g id="edge21" class="edge"><title>2,2->2,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-267.244C143.825,-267.792 127.305,-267.895 111.982,-267.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-264.043 101.605,-267.245 111.497,-271.04 111.704,-264.043"/>
+</g>
+<!-- 2,3 -->
+<g id="node21" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="326" cy="-261" rx="43.8406" ry="36.0624"/>
+<text text-anchor="start" x="306" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-253 303,-267 349,-267 349,-253 303,-253"/>
+<text text-anchor="start" x="306" y="-257.667" font-family="Times Roman,serif" font-size="10.00">CGTGGT</text>
+<polygon fill="#218559" stroke="#218559" points="303,-239 303,-253 349,-253 349,-239 303,-239"/>
+<text text-anchor="start" x="305.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">ACCACG</text>
+</g>
+<!-- 2,2->2,3 -->
+<g id="edge17" class="edge"><title>2,2->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M243.319,-254.573C252.676,-254.263 262.709,-254.176 272.446,-254.312"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.37,-257.811 282.446,-254.534 272.526,-250.813 272.37,-257.811"/>
+</g>
+<!-- 2,3->2,2 -->
+<g id="edge23" class="edge"><title>2,3->2,2</title>
+<path fill="none" stroke="#218559" d="M282.446,-267.466C273.13,-267.753 263.195,-267.823 253.581,-267.674"/>
+<polygon fill="#218559" stroke="#218559" points="253.401,-264.169 243.319,-267.427 253.232,-271.167 253.401,-264.169"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/rf_bubble/.part-0.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/rf_bubble/.part-0.crc
new file mode 100644
index 0000000..e87ee4f
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/rf_bubble/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/rf_bubble/.part-1.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/rf_bubble/.part-1.crc
new file mode 100644
index 0000000..2c9bb22
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/rf_bubble/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/rf_bubble/part-0 b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/rf_bubble/part-0
new file mode 100755
index 0000000..4388d34
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/rf_bubble/part-0
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/rf_bubble/part-1 b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/rf_bubble/part-1
new file mode 100755
index 0000000..fbd0d87
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/rf_bubble/part-1
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/small_bubble.txt b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/small_bubble.txt
new file mode 100644
index 0000000..14cf2e1
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/small_bubble.txt
@@ -0,0 +1,6 @@
+((2,1) [(2,2)] [] [] [(1,3)] AGAAG) (null)
+((2,2) [(2,3)] [] [] [(1,4),(2,1)] GAAGC) (null)
+((2,3) [] [] [] [(2,2)] AAGCCC) (null)
+((1,1) [(1,3)] [] [] [] AATAGA) (null)
+((1,3) [(2,1),(1,4)] [] [] [(1,1)] TAGAA) (null)
+((1,4) [(2,2)] [] [] [(1,3)] AGAAG) (null)
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/small_bubble.txt.svg b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/small_bubble.txt.svg
new file mode 100644
index 0000000..550660b
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/small_bubble.txt.svg
@@ -0,0 +1,193 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: small_bubble_txt Pages: 1 -->
+<svg width="644pt" height="321pt"
+ viewBox="0.00 0.00 644.00 321.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 317)">
+<title>small_bubble_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-317 641,-317 641,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-8 49,-209 211,-209 211,-8 49,-8"/>
+<text text-anchor="middle" x="130" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-217 8,-305 376,-305 376,-217 8,-217"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="274,-121 274,-209 628,-209 628,-121 274,-121"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.0074,-157C74.8673,-157 156.744,-157 188.46,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="188.862,-160.5 198.862,-157 188.861,-153.5 188.862,-160.5"/>
+<text text-anchor="middle" x="130" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.0074,-116C74.8673,-116 156.744,-116 188.46,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="188.862,-119.5 198.862,-116 188.861,-112.5 188.862,-119.5"/>
+<text text-anchor="middle" x="130" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.0074,-75C74.8673,-75 156.744,-75 188.46,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="188.862,-78.5001 198.862,-75 188.861,-71.5001 188.862,-78.5001"/>
+<text text-anchor="middle" x="130" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.0074,-34C74.8673,-34 156.744,-34 188.46,-34"/>
+<polygon fill="#218559" stroke="#218559" points="188.862,-37.5001 198.862,-34 188.861,-30.5001 188.862,-37.5001"/>
+<text text-anchor="middle" x="130" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-253 37,-267 82,-267 82,-253 37,-253"/>
+<text text-anchor="start" x="40" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AATAGA</text>
+<polygon fill="#218559" stroke="#218559" points="37,-239 37,-253 82,-253 82,-239 37,-239"/>
+<text text-anchor="start" x="43" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TCTATT</text>
+</g>
+<!-- 1,3 -->
+<g id="node16" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-253 179,-267 224,-267 224,-253 179,-253"/>
+<text text-anchor="start" x="185" y="-257.667" font-family="Times Roman,serif" font-size="10.00">TAGAA</text>
+<polygon fill="#218559" stroke="#218559" points="179,-239 179,-253 224,-253 224,-239 179,-239"/>
+<text text-anchor="start" x="187.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TTCTA</text>
+</g>
+<!-- 1,1->1,3 -->
+<g id="edge25" class="edge"><title>1,1->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-254.755C116.207,-254.208 132.729,-254.105 148.049,-254.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-257.957 158.425,-254.756 148.534,-250.96 148.326,-257.957"/>
+</g>
+<!-- 1,3->1,1 -->
+<g id="edge31" class="edge"><title>1,3->1,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-267.244C143.825,-267.792 127.305,-267.895 111.982,-267.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-264.043 101.605,-267.245 111.497,-271.04 111.704,-264.043"/>
+</g>
+<!-- 1,4 -->
+<g id="node17" class="node"><title>1,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-253 303,-267 348,-267 348,-253 303,-253"/>
+<text text-anchor="start" x="307.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-239 303,-253 348,-253 348,-239 303,-239"/>
+<text text-anchor="start" x="311" y="-243.667" font-family="Times Roman,serif" font-size="10.00">CTTCT</text>
+</g>
+<!-- 1,3->1,4 -->
+<g id="edge29" class="edge"><title>1,3->1,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-254.562C252.601,-254.258 262.531,-254.176 272.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-257.822 282.443,-254.554 272.527,-250.823 272.364,-257.822"/>
+</g>
+<!-- 2,1 -->
+<g id="node19" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-165" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-157 303,-171 348,-171 348,-157 303,-157"/>
+<text text-anchor="start" x="307.5" y="-161.667" font-family="Times Roman,serif" font-size="10.00">AGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-143 303,-157 348,-157 348,-143 303,-143"/>
+<text text-anchor="start" x="311" y="-147.667" font-family="Times Roman,serif" font-size="10.00">CTTCT</text>
+</g>
+<!-- 1,3->2,1 -->
+<g id="edge27" class="edge"><title>1,3->2,1</title>
+<path fill="none" stroke="#dd1e2f" d="M234.345,-237.712C249.456,-227.157 264,-217 264,-217 264,-217 274.622,-207.945 287.002,-197.392"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="289.518,-199.846 294.857,-190.695 284.977,-194.519 289.518,-199.846"/>
+</g>
+<!-- 1,4->1,3 -->
+<g id="edge35" class="edge"><title>1,4->1,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-267.446C273.158,-267.745 263.225,-267.824 253.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-264.175 243.327,-267.438 253.242,-271.173 253.407,-264.175"/>
+</g>
+<!-- 2,2 -->
+<g id="node20" class="node"><title>2,2</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-165" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">2,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-157 427,-171 472,-171 472,-157 427,-157"/>
+<text text-anchor="start" x="431.5" y="-161.667" font-family="Times Roman,serif" font-size="10.00">GAAGC</text>
+<polygon fill="#218559" stroke="#218559" points="427,-143 427,-157 472,-157 472,-143 427,-143"/>
+<text text-anchor="start" x="434" y="-147.667" font-family="Times Roman,serif" font-size="10.00">GCTTC</text>
+</g>
+<!-- 1,4->2,2 -->
+<g id="edge33" class="edge"><title>1,4->2,2</title>
+<path fill="none" stroke="#dd1e2f" d="M361.282,-241.419C375.287,-233.861 388,-227 388,-227 388,-227 400.484,-214.311 414.157,-200.414"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="416.837,-202.681 421.356,-193.098 411.847,-197.771 416.837,-202.681"/>
+</g>
+<!-- 2,1->1,3 -->
+<g id="edge15" class="edge"><title>2,1->1,3</title>
+<path fill="none" stroke="#218559" d="M293.015,-189.762C277.051,-202.122 257.587,-217.19 240.68,-230.28"/>
+<polygon fill="#218559" stroke="#218559" points="238.376,-227.637 232.611,-236.527 242.661,-233.173 238.376,-227.637"/>
+</g>
+<!-- 2,1->2,2 -->
+<g id="edge13" class="edge"><title>2,1->2,2</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-158.562C376.601,-158.258 386.531,-158.176 396.159,-158.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-161.822 406.443,-158.554 396.527,-154.823 396.364,-161.822"/>
+</g>
+<!-- 2,2->1,4 -->
+<g id="edge19" class="edge"><title>2,2->1,4</title>
+<path fill="none" stroke="#218559" d="M417.015,-189.762C401.051,-202.122 381.587,-217.19 364.68,-230.28"/>
+<polygon fill="#218559" stroke="#218559" points="362.376,-227.637 356.611,-236.527 366.661,-233.173 362.376,-227.637"/>
+</g>
+<!-- 2,2->2,1 -->
+<g id="edge21" class="edge"><title>2,2->2,1</title>
+<path fill="none" stroke="#218559" d="M406.443,-171.446C397.158,-171.745 387.225,-171.824 377.602,-171.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-168.175 367.327,-171.438 377.242,-175.173 377.407,-168.175"/>
+</g>
+<!-- 2,3 -->
+<g id="node21" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="575" cy="-165" rx="44.0472" ry="36.0624"/>
+<text text-anchor="start" x="555.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="552,-157 552,-171 599,-171 599,-157 552,-157"/>
+<text text-anchor="start" x="554.5" y="-161.667" font-family="Times Roman,serif" font-size="10.00">AAGCCC</text>
+<polygon fill="#218559" stroke="#218559" points="552,-143 552,-157 599,-157 599,-143 552,-143"/>
+<text text-anchor="start" x="555.5" y="-147.667" font-family="Times Roman,serif" font-size="10.00">GGGCTT</text>
+</g>
+<!-- 2,2->2,3 -->
+<g id="edge17" class="edge"><title>2,2->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M491.658,-158.573C501.09,-158.263 511.203,-158.176 521.018,-158.312"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="521.023,-161.813 531.097,-158.534 521.177,-154.814 521.023,-161.813"/>
+</g>
+<!-- 2,3->2,2 -->
+<g id="edge23" class="edge"><title>2,3->2,2</title>
+<path fill="none" stroke="#218559" d="M531.097,-171.466C521.607,-171.756 511.479,-171.824 501.692,-171.669"/>
+<polygon fill="#218559" stroke="#218559" points="501.739,-168.169 491.658,-171.427 501.571,-175.167 501.739,-168.169"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/small_bubble/.part-0.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/small_bubble/.part-0.crc
new file mode 100644
index 0000000..fc426bc
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/small_bubble/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/small_bubble/.part-1.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/small_bubble/.part-1.crc
new file mode 100644
index 0000000..611cf9d
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/small_bubble/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/small_bubble/part-0 b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/small_bubble/part-0
new file mode 100755
index 0000000..de5dd64
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/small_bubble/part-0
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/small_bubble/part-1 b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/small_bubble/part-1
new file mode 100755
index 0000000..d84c1d2
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/small_bubble/part-1
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/tip_and_bubble.txt b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/tip_and_bubble.txt
new file mode 100644
index 0000000..5fde720
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/tip_and_bubble.txt
@@ -0,0 +1,9 @@
+((2,1) [(3,1),(2,2)] [] [] [(1,1)] ATAGA) (null)
+((2,2) [(2,3)] [] [] [(2,1)] TAGAC) (null)
+((2,3) [(2,4)] [] [] [(2,2)] AGACT) (null)
+((2,4) [(3,4)] [] [] [(2,3)] GACTA) (null)
+((1,1) [(2,1),(1,2)] [] [] [] AATAG) (null)
+((1,2) [] [] [] [(1,1)] ATAGAAG) (null)
+((3,1) [(3,3)] [] [] [(2,1)] TAGACT) (null)
+((3,3) [(3,4)] [] [] [(3,1)] GACTA) (null)
+((3,4) [] [] [] [(2,4),(3,3)] ACTAC) (null)
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/tip_and_bubble.txt.svg b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/tip_and_bubble.txt.svg
new file mode 100644
index 0000000..1ae4c53
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/tip_and_bubble.txt.svg
@@ -0,0 +1,253 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: tip_and_bubble_txt Pages: 1 -->
+<svg width="650pt" height="417pt"
+ viewBox="0.00 0.00 650.00 417.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 413)">
+<title>tip_and_bubble_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-413 647,-413 647,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-8 49,-209 216,-209 216,-8 49,-8"/>
+<text text-anchor="middle" x="132.5" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-313 8,-401 262,-401 262,-313 8,-313"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_3</title>
+<polygon fill="none" stroke="black" points="284,-109 284,-197 634,-197 634,-109 284,-109"/>
+</g>
+<g id="graph5" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="155,-217 155,-305 634,-305 634,-217 155,-217"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="206" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.078,-157C75.5476,-157 161.628,-157 193.842,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="194.198,-160.5 204.198,-157 194.198,-153.5 194.198,-160.5"/>
+<text text-anchor="middle" x="130" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="206" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.078,-116C75.5476,-116 161.628,-116 193.842,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="194.198,-119.5 204.198,-116 194.198,-112.5 194.198,-119.5"/>
+<text text-anchor="middle" x="130" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="206" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.078,-75C75.5476,-75 161.628,-75 193.842,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="194.198,-78.5001 204.198,-75 194.198,-71.5001 194.198,-78.5001"/>
+<text text-anchor="middle" x="130" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="206" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.078,-34C75.5476,-34 161.628,-34 193.842,-34"/>
+<polygon fill="#218559" stroke="#218559" points="194.198,-37.5001 204.198,-34 194.198,-30.5001 194.198,-37.5001"/>
+<text text-anchor="middle" x="130" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-357" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-368.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-349 37,-363 82,-363 82,-349 37,-349"/>
+<text text-anchor="start" x="43.5" y="-353.667" font-family="Times Roman,serif" font-size="10.00">AATAG</text>
+<polygon fill="#218559" stroke="#218559" points="37,-335 37,-349 82,-349 82,-335 37,-335"/>
+<text text-anchor="start" x="45.5" y="-339.667" font-family="Times Roman,serif" font-size="10.00">CTATT</text>
+</g>
+<!-- 1,2 -->
+<g id="node16" class="node"><title>1,2</title>
+<ellipse fill="none" stroke="black" cx="206" cy="-357" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="186.5" y="-368.167" font-family="Times Roman,serif" font-size="10.00">1,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="181,-349 181,-363 232,-363 232,-349 181,-349"/>
+<text text-anchor="start" x="183.5" y="-353.667" font-family="Times Roman,serif" font-size="10.00">ATAGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="181,-335 181,-349 232,-349 232,-335 181,-335"/>
+<text text-anchor="start" x="187" y="-339.667" font-family="Times Roman,serif" font-size="10.00">CTTCTAT</text>
+</g>
+<!-- 1,1->1,2 -->
+<g id="edge34" class="edge"><title>1,1->1,2</title>
+<path fill="none" stroke="#dd1e2f" d="M101.518,-350.814C116.334,-350.242 133.206,-350.105 149.005,-350.404"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="149.114,-353.907 159.199,-350.663 149.292,-346.91 149.114,-353.907"/>
+</g>
+<!-- 2,1 -->
+<g id="node22" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="206" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="186.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="184,-253 184,-267 229,-267 229,-253 184,-253"/>
+<text text-anchor="start" x="190.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">ATAGA</text>
+<polygon fill="#218559" stroke="#218559" points="184,-239 184,-253 229,-253 229,-239 184,-239"/>
+<text text-anchor="start" x="192.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TCTAT</text>
+</g>
+<!-- 1,1->2,1 -->
+<g id="edge32" class="edge"><title>1,1->2,1</title>
+<path fill="none" stroke="#dd1e2f" d="M95.2356,-337.316C116.802,-325.601 140,-313 140,-313 140,-313 152.492,-303.158 166.607,-292.037"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="168.857,-294.72 174.546,-285.782 164.525,-289.221 168.857,-294.72"/>
+</g>
+<!-- 1,2->1,1 -->
+<g id="edge36" class="edge"><title>1,2->1,1</title>
+<path fill="none" stroke="#218559" d="M159.199,-363.337C144.021,-363.827 127.084,-363.881 111.518,-363.499"/>
+<polygon fill="#218559" stroke="#218559" points="111.623,-360.001 101.518,-363.186 111.404,-366.998 111.623,-360.001"/>
+</g>
+<!-- 3,1 -->
+<g id="node18" class="node"><title>3,1</title>
+<ellipse fill="none" stroke="black" cx="335" cy="-153" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="315.5" y="-164.167" font-family="Times Roman,serif" font-size="10.00">3,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="313,-145 313,-159 358,-159 358,-145 313,-145"/>
+<text text-anchor="start" x="316.5" y="-149.667" font-family="Times Roman,serif" font-size="10.00">TAGACT</text>
+<polygon fill="#218559" stroke="#218559" points="313,-131 313,-145 358,-145 358,-131 313,-131"/>
+<text text-anchor="start" x="317" y="-135.667" font-family="Times Roman,serif" font-size="10.00">AGTCTA</text>
+</g>
+<!-- 3,3 -->
+<g id="node19" class="node"><title>3,3</title>
+<ellipse fill="none" stroke="black" cx="459" cy="-153" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="439.5" y="-164.167" font-family="Times Roman,serif" font-size="10.00">3,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="437,-145 437,-159 482,-159 482,-145 437,-145"/>
+<text text-anchor="start" x="443" y="-149.667" font-family="Times Roman,serif" font-size="10.00">GACTA</text>
+<polygon fill="#218559" stroke="#218559" points="437,-131 437,-145 482,-145 482,-131 437,-131"/>
+<text text-anchor="start" x="444.5" y="-135.667" font-family="Times Roman,serif" font-size="10.00">TAGTC</text>
+</g>
+<!-- 3,1->3,3 -->
+<g id="edge38" class="edge"><title>3,1->3,3</title>
+<path fill="none" stroke="#dd1e2f" d="M377.327,-146.562C386.601,-146.258 396.531,-146.176 406.159,-146.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="406.364,-149.822 416.443,-146.554 406.527,-142.823 406.364,-149.822"/>
+</g>
+<!-- 3,1->2,1 -->
+<g id="edge40" class="edge"><title>3,1->2,1</title>
+<path fill="none" stroke="#218559" d="M307.356,-180.644C291.216,-196.784 274,-214 274,-214 274,-214 261.746,-222.469 247.643,-232.218"/>
+<polygon fill="#218559" stroke="#218559" points="245.439,-229.486 239.203,-238.051 249.42,-235.244 245.439,-229.486"/>
+</g>
+<!-- 3,3->3,1 -->
+<g id="edge44" class="edge"><title>3,3->3,1</title>
+<path fill="none" stroke="#218559" d="M416.443,-159.446C407.158,-159.745 397.225,-159.824 387.602,-159.681"/>
+<polygon fill="#218559" stroke="#218559" points="387.407,-156.175 377.327,-159.438 387.242,-163.173 387.407,-156.175"/>
+</g>
+<!-- 3,4 -->
+<g id="node20" class="node"><title>3,4</title>
+<ellipse fill="none" stroke="black" cx="583" cy="-153" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="563.5" y="-164.167" font-family="Times Roman,serif" font-size="10.00">3,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="561,-145 561,-159 606,-159 606,-145 561,-145"/>
+<text text-anchor="start" x="567.5" y="-149.667" font-family="Times Roman,serif" font-size="10.00">ACTAC</text>
+<polygon fill="#218559" stroke="#218559" points="561,-131 561,-145 606,-145 606,-131 561,-131"/>
+<text text-anchor="start" x="568" y="-135.667" font-family="Times Roman,serif" font-size="10.00">GTAGT</text>
+</g>
+<!-- 3,3->3,4 -->
+<g id="edge42" class="edge"><title>3,3->3,4</title>
+<path fill="none" stroke="#dd1e2f" d="M501.327,-146.562C510.601,-146.258 520.531,-146.176 530.159,-146.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="530.364,-149.822 540.443,-146.554 530.527,-142.823 530.364,-149.822"/>
+</g>
+<!-- 3,4->3,3 -->
+<g id="edge48" class="edge"><title>3,4->3,3</title>
+<path fill="none" stroke="#218559" d="M540.443,-159.446C531.158,-159.745 521.225,-159.824 511.602,-159.681"/>
+<polygon fill="#218559" stroke="#218559" points="511.407,-156.175 501.327,-159.438 511.242,-163.173 511.407,-156.175"/>
+</g>
+<!-- 2,4 -->
+<g id="node25" class="node"><title>2,4</title>
+<ellipse fill="none" stroke="black" cx="583" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="563.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="561,-253 561,-267 606,-267 606,-253 561,-253"/>
+<text text-anchor="start" x="567" y="-257.667" font-family="Times Roman,serif" font-size="10.00">GACTA</text>
+<polygon fill="#218559" stroke="#218559" points="561,-239 561,-253 606,-253 606,-239 561,-239"/>
+<text text-anchor="start" x="568.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TAGTC</text>
+</g>
+<!-- 3,4->2,4 -->
+<g id="edge46" class="edge"><title>3,4->2,4</title>
+<path fill="none" stroke="#218559" d="M583,-188.897C583,-188.965 583,-189 583,-189 583,-189 583,-204.137 583,-214.808"/>
+<polygon fill="#218559" stroke="#218559" points="579.5,-215.103 583,-225.103 586.5,-215.103 579.5,-215.103"/>
+</g>
+<!-- 2,1->1,1 -->
+<g id="edge18" class="edge"><title>2,1->1,1</title>
+<path fill="none" stroke="#218559" d="M171.021,-282.15C147.211,-296.547 120,-313 120,-313 120,-313 111.029,-319.471 99.9682,-327.449"/>
+<polygon fill="#218559" stroke="#218559" points="97.7057,-324.766 91.643,-333.454 101.801,-330.443 97.7057,-324.766"/>
+</g>
+<!-- 2,1->3,1 -->
+<g id="edge14" class="edge"><title>2,1->3,1</title>
+<path fill="none" stroke="#dd1e2f" d="M236.571,-235.406C254.514,-220.384 277.353,-201.262 296.514,-185.221"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="298.78,-187.888 304.201,-178.785 294.287,-182.521 298.78,-187.888"/>
+</g>
+<!-- 2,2 -->
+<g id="node23" class="node"><title>2,2</title>
+<ellipse fill="none" stroke="black" cx="335" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="315.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="313,-253 313,-267 358,-267 358,-253 313,-253"/>
+<text text-anchor="start" x="319.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">TAGAC</text>
+<polygon fill="#218559" stroke="#218559" points="313,-239 313,-253 358,-253 358,-239 313,-239"/>
+<text text-anchor="start" x="320.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">GTCTA</text>
+</g>
+<!-- 2,1->2,2 -->
+<g id="edge16" class="edge"><title>2,1->2,2</title>
+<path fill="none" stroke="#dd1e2f" d="M248.597,-254.609C259.385,-254.242 271.101,-254.157 282.322,-254.352"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="282.362,-257.854 292.448,-254.61 282.54,-250.856 282.362,-257.854"/>
+</g>
+<!-- 2,2->2,1 -->
+<g id="edge22" class="edge"><title>2,2->2,1</title>
+<path fill="none" stroke="#218559" d="M292.448,-267.39C281.662,-267.757 269.947,-267.843 258.724,-267.649"/>
+<polygon fill="#218559" stroke="#218559" points="258.683,-264.147 248.597,-267.391 258.505,-271.145 258.683,-264.147"/>
+</g>
+<!-- 2,3 -->
+<g id="node24" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="459" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="439.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="437,-253 437,-267 482,-267 482,-253 437,-253"/>
+<text text-anchor="start" x="443" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AGACT</text>
+<polygon fill="#218559" stroke="#218559" points="437,-239 437,-253 482,-253 482,-239 437,-239"/>
+<text text-anchor="start" x="444" y="-243.667" font-family="Times Roman,serif" font-size="10.00">AGTCT</text>
+</g>
+<!-- 2,2->2,3 -->
+<g id="edge20" class="edge"><title>2,2->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M377.327,-254.562C386.601,-254.258 396.531,-254.176 406.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="406.364,-257.822 416.443,-254.554 406.527,-250.823 406.364,-257.822"/>
+</g>
+<!-- 2,3->2,2 -->
+<g id="edge26" class="edge"><title>2,3->2,2</title>
+<path fill="none" stroke="#218559" d="M416.443,-267.446C407.158,-267.745 397.225,-267.824 387.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="387.407,-264.175 377.327,-267.438 387.242,-271.173 387.407,-264.175"/>
+</g>
+<!-- 2,3->2,4 -->
+<g id="edge24" class="edge"><title>2,3->2,4</title>
+<path fill="none" stroke="#dd1e2f" d="M501.327,-254.562C510.601,-254.258 520.531,-254.176 530.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="530.364,-257.822 540.443,-254.554 530.527,-250.823 530.364,-257.822"/>
+</g>
+<!-- 2,4->3,4 -->
+<g id="edge28" class="edge"><title>2,4->3,4</title>
+<path fill="none" stroke="#dd1e2f" d="M583,-225.103C583,-225.035 583,-225 583,-225 583,-225 583,-209.863 583,-199.192"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="586.5,-198.897 583,-188.897 579.5,-198.897 586.5,-198.897"/>
+</g>
+<!-- 2,4->2,3 -->
+<g id="edge30" class="edge"><title>2,4->2,3</title>
+<path fill="none" stroke="#218559" d="M540.443,-267.446C531.158,-267.745 521.225,-267.824 511.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="511.407,-264.175 501.327,-267.438 511.242,-271.173 511.407,-264.175"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/tip_and_bubble/.part-0.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/tip_and_bubble/.part-0.crc
new file mode 100644
index 0000000..3b5cec2
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/tip_and_bubble/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/tip_and_bubble/.part-1.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/tip_and_bubble/.part-1.crc
new file mode 100644
index 0000000..6621cf4
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/tip_and_bubble/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/tip_and_bubble/part-0 b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/tip_and_bubble/part-0
new file mode 100755
index 0000000..a6a5e83
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/tip_and_bubble/part-0
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/tip_and_bubble/part-1 b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/tip_and_bubble/part-1
new file mode 100755
index 0000000..a65fe7e
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/bubblemerge/tip_and_bubble/part-1
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/pathmerge/singleread.txt b/genomix/genomix-hadoop/src/test/resources/input/graphs/pathmerge/singleread.txt
new file mode 100644
index 0000000..7a3e64f
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/pathmerge/singleread.txt
@@ -0,0 +1,4 @@
+((1,1) [(1,2)] [] [] [] AATAG) (null)
+((1,2) [(1,3)] [] [] [(1,1)] ATAGA) (null)
+((1,3) [(1,4)] [] [] [(1,2)] TAGAA) (null)
+((1,4) [] [] [] [(1,3)] AGAAG) (null)
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/pathmerge/singleread.txt.svg b/genomix/genomix-hadoop/src/test/resources/input/graphs/pathmerge/singleread.txt.svg
new file mode 100644
index 0000000..efaf9e1
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/pathmerge/singleread.txt.svg
@@ -0,0 +1,142 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: singleread_txt Pages: 1 -->
+<svg width="516pt" height="321pt"
+ viewBox="0.00 0.00 516.00 321.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 317)">
+<title>singleread_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-317 513,-317 513,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-8 49,-209 211,-209 211,-8 49,-8"/>
+<text text-anchor="middle" x="130" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-217 8,-305 500,-305 500,-217 8,-217"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.0074,-157C74.8673,-157 156.744,-157 188.46,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="188.862,-160.5 198.862,-157 188.861,-153.5 188.862,-160.5"/>
+<text text-anchor="middle" x="130" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.0074,-116C74.8673,-116 156.744,-116 188.46,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="188.862,-119.5 198.862,-116 188.861,-112.5 188.862,-119.5"/>
+<text text-anchor="middle" x="130" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.0074,-75C74.8673,-75 156.744,-75 188.46,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="188.862,-78.5001 198.862,-75 188.861,-71.5001 188.862,-78.5001"/>
+<text text-anchor="middle" x="130" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.0074,-34C74.8673,-34 156.744,-34 188.46,-34"/>
+<polygon fill="#218559" stroke="#218559" points="188.862,-37.5001 198.862,-34 188.861,-30.5001 188.862,-37.5001"/>
+<text text-anchor="middle" x="130" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-253 37,-267 82,-267 82,-253 37,-253"/>
+<text text-anchor="start" x="43.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AATAG</text>
+<polygon fill="#218559" stroke="#218559" points="37,-239 37,-253 82,-253 82,-239 37,-239"/>
+<text text-anchor="start" x="45.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">CTATT</text>
+</g>
+<!-- 1,2 -->
+<g id="node16" class="node"><title>1,2</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-253 179,-267 224,-267 224,-253 179,-253"/>
+<text text-anchor="start" x="185.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">ATAGA</text>
+<polygon fill="#218559" stroke="#218559" points="179,-239 179,-253 224,-253 224,-239 179,-239"/>
+<text text-anchor="start" x="187.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TCTAT</text>
+</g>
+<!-- 1,1->1,2 -->
+<g id="edge12" class="edge"><title>1,1->1,2</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-254.755C116.207,-254.208 132.729,-254.105 148.049,-254.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-257.957 158.425,-254.756 148.534,-250.96 148.326,-257.957"/>
+</g>
+<!-- 1,2->1,1 -->
+<g id="edge16" class="edge"><title>1,2->1,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-267.244C143.825,-267.792 127.305,-267.895 111.982,-267.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-264.043 101.605,-267.245 111.497,-271.04 111.704,-264.043"/>
+</g>
+<!-- 1,3 -->
+<g id="node17" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-253 303,-267 348,-267 348,-253 303,-253"/>
+<text text-anchor="start" x="309" y="-257.667" font-family="Times Roman,serif" font-size="10.00">TAGAA</text>
+<polygon fill="#218559" stroke="#218559" points="303,-239 303,-253 348,-253 348,-239 303,-239"/>
+<text text-anchor="start" x="311.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TTCTA</text>
+</g>
+<!-- 1,2->1,3 -->
+<g id="edge14" class="edge"><title>1,2->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-254.562C252.601,-254.258 262.531,-254.176 272.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-257.822 282.443,-254.554 272.527,-250.823 272.364,-257.822"/>
+</g>
+<!-- 1,3->1,2 -->
+<g id="edge20" class="edge"><title>1,3->1,2</title>
+<path fill="none" stroke="#218559" d="M282.443,-267.446C273.158,-267.745 263.225,-267.824 253.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-264.175 243.327,-267.438 253.242,-271.173 253.407,-264.175"/>
+</g>
+<!-- 1,4 -->
+<g id="node18" class="node"><title>1,4</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-253 427,-267 472,-267 472,-253 427,-253"/>
+<text text-anchor="start" x="431.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="427,-239 427,-253 472,-253 472,-239 427,-239"/>
+<text text-anchor="start" x="435" y="-243.667" font-family="Times Roman,serif" font-size="10.00">CTTCT</text>
+</g>
+<!-- 1,3->1,4 -->
+<g id="edge18" class="edge"><title>1,3->1,4</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-254.562C376.601,-254.258 386.531,-254.176 396.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-257.822 406.443,-254.554 396.527,-250.823 396.364,-257.822"/>
+</g>
+<!-- 1,4->1,3 -->
+<g id="edge22" class="edge"><title>1,4->1,3</title>
+<path fill="none" stroke="#218559" d="M406.443,-267.446C397.158,-267.745 387.225,-267.824 377.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-264.175 367.327,-267.438 377.242,-271.173 377.407,-264.175"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/pathmerge/singleread/.part-0.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/pathmerge/singleread/.part-0.crc
new file mode 100644
index 0000000..b972cba
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/pathmerge/singleread/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/pathmerge/singleread/.part-1.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/pathmerge/singleread/.part-1.crc
new file mode 100644
index 0000000..23505c8
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/pathmerge/singleread/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/pathmerge/singleread/part-0 b/genomix/genomix-hadoop/src/test/resources/input/graphs/pathmerge/singleread/part-0
new file mode 100755
index 0000000..39d79bc
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/pathmerge/singleread/part-0
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/pathmerge/singleread/part-1 b/genomix/genomix-hadoop/src/test/resources/input/graphs/pathmerge/singleread/part-1
new file mode 100755
index 0000000..e49fcdd
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/pathmerge/singleread/part-1
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq1.txt b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq1.txt
new file mode 100644
index 0000000..bdeb586
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq1.txt
@@ -0,0 +1,147 @@
+((2,1) [(2,2)] [] [] [] CCTCG) (null)
+((2,2) [(2,3)] [(1,4)] [] [(2,1)] CTCGC) (null)
+((2,3) [(2,4)] [] [] [(2,2)] TCGCA) (null)
+((2,4) [] [(1,2)] [] [(2,3)] CGCAC) (null)
+((4,1) [(4,2)] [] [] [] GAGGG) (null)
+((4,2) [(4,3)] [(5,4)] [] [(4,1)] AGGGT) (null)
+((4,3) [(4,4)] [] [] [(3,4),(4,2)] GGGTT) (null)
+((4,4) [] [(5,2)] [] [(4,3)] GGTTG) (null)
+((6,1) [(6,2)] [] [(5,3)] [] GTTGC) (null)
+((6,2) [(6,3)] [(7,4)] [] [(6,1)] TTGCT) (null)
+((6,3) [(6,4)] [] [(5,1)] [(6,2)] TGCTG) (null)
+((6,4) [] [(7,2)] [] [(6,3)] GCTGA) (null)
+((8,1) [(24,1),(8,2)] [] [(7,3)] [] CTGAA) (null)
+((8,2) [(8,3)] [(9,4)] [] [(8,1)] TGAAA) (null)
+((8,3) [(8,4)] [] [(7,1)] [(19,4),(8,2)] GAAAT) (null)
+((8,4) [] [(9,2)] [] [(8,3)] AAATC) (null)
+((10,1) [(10,2)] [] [] [(11,2)] GGCAG) (null)
+((10,2) [(9,1),(10,3)] [] [] [(10,1)] GCAGA) (null)
+((10,3) [(10,4)] [] [] [(11,4),(10,2)] CAGAT) (null)
+((10,4) [(9,3)] [] [] [(10,3)] AGATT) (null)
+((12,1) [(12,2)] [] [] [(13,2)] CTCTG) (null)
+((12,2) [(11,1),(12,3)] [] [] [(12,1)] TCTGG) (null)
+((12,3) [(12,4)] [] [] [(13,4),(12,2)] CTGGC) (null)
+((12,4) [(11,3)] [] [] [(12,3)] TGGCA) (null)
+((14,1) [(14,2)] [] [] [(15,2)] GCATC) (null)
+((14,2) [(13,1),(14,3)] [] [] [(14,1)] CATCT) (null)
+((14,3) [(14,4)] [] [] [(15,4),(14,2)] ATCTC) (null)
+((14,4) [(13,3)] [] [] [(14,3)] TCTCT) (null)
+((16,1) [(16,2)] [] [] [(17,2)] AACGG) (null)
+((16,2) [(15,1),(16,3)] [] [] [(16,1)] ACGGC) (null)
+((16,3) [(16,4)] [] [] [(17,4),(16,2)] CGGCA) (null)
+((16,4) [(15,3)] [] [] [(16,3)] GGCAT) (null)
+((18,1) [(23,1),(18,2)] [] [(17,3)] [] CGTTT) (null)
+((18,2) [(7,1),(18,3)] [(19,4)] [] [(18,1)] GTTTC) (null)
+((18,3) [(18,4)] [] [(17,1)] [(18,2)] TTTCA) (null)
+((18,4) [] [(19,2)] [(24,1)] [(18,3)] TTCAA) (null)
+((20,1) [(20,2)] [] [(19,3)] [] TCAAT) (null)
+((20,2) [(21,1),(20,3)] [] [] [(20,1)] CAATA) (null)
+((20,3) [(20,4)] [] [(19,1)] [(20,2)] AATAC) (null)
+((20,4) [(21,3)] [] [] [(20,3)] ATACG) (null)
+((22,1) [(22,2)] [] [] [(28,1),(27,3),(21,2)] TACGT) (null)
+((22,2) [(22,3)] [(23,4)] [] [(27,4),(22,1)] ACGTG) (null)
+((22,3) [(22,4)] [] [] [(21,4),(22,2)] CGTGA) (null)
+((22,4) [] [(23,2)] [] [(22,3)] GTGAA) (null)
+((24,1) [(17,1),(24,2)] [] [(23,3),(18,4),(7,2)] [(8,1),(19,3)] TGAAA) (null)
+((24,2) [(25,1),(24,3)] [] [(7,1)] [(19,4),(24,1)] GAAAC) (null)
+((24,3) [(24,4)] [] [(23,1)] [(24,2)] AAACT) (null)
+((24,4) [(25,3)] [] [] [(24,3)] AACTA) (null)
+((26,1) [(26,2)] [] [(27,3)] [] GTAAT) (null)
+((26,2) [(26,3)] [(25,4)] [] [(26,1)] TAATA) (null)
+((26,3) [(26,4)] [] [(27,1)] [(26,2)] AATAG) (null)
+((26,4) [] [(25,2)] [] [(26,3)] ATAGT) (null)
+((28,1) [(22,1),(28,2)] [] [] [(27,2)] TTACG) (null)
+((28,2) [(28,3)] [(29,4)] [] [(28,1)] TACGT) (null)
+((28,3) [(28,4)] [] [] [(27,4),(28,2)] ACGTC) (null)
+((28,4) [(31,3)] [(29,2)] [] [(28,3)] CGTCA) (null)
+((30,1) [(30,2)] [] [(29,3)] [(31,2)] GTCAT) (null)
+((30,2) [(29,1),(30,3)] [(31,4)] [] [(30,1)] TCATG) (null)
+((30,3) [(30,4)] [] [(29,1)] [(31,4),(30,2)] CATGA) (null)
+((30,4) [(29,3)] [(31,2)] [] [(30,3)] ATGAC) (null)
+((32,1) [(32,2)] [] [(33,3),(35,3)] [] AAGCG) (null)
+((32,2) [(31,1),(32,3)] [] [] [(32,1)] AGCGT) (null)
+((32,3) [(32,4)] [] [] [(32,2)] GCGTC) (null)
+((32,4) [(31,3)] [(29,2)] [] [(32,3)] CGTCA) (null)
+((34,1) [(34,2)] [] [] [(33,2),(35,2)] GCTTA) (null)
+((34,2) [(34,3)] [(35,4),(33,4)] [] [(34,1)] CTTAA) (null)
+((34,3) [(34,4)] [] [] [(35,4),(33,4),(34,2)] TTAAG) (null)
+((34,4) [] [(33,2),(35,2)] [] [(34,3)] TAAGC) (null)
+((36,1) [(36,2)] [] [(33,3),(35,3)] [] AAGCG) (null)
+((36,2) [(36,3)] [(37,4)] [] [(36,1)] AGCGT) (null)
+((36,3) [(36,4)] [] [] [(36,2)] GCGTG) (null)
+((36,4) [] [(37,2)] [] [(36,3)] CGTGT) (null)
+((1,1) [(1,2)] [] [] [] TAGTG) (null)
+((1,2) [(1,3)] [(2,4)] [] [(1,1)] AGTGC) (null)
+((1,3) [(1,4)] [] [] [(1,2)] GTGCG) (null)
+((1,4) [] [(2,2)] [] [(1,3)] TGCGA) (null)
+((3,1) [(3,3)] [] [] [] GCTAGG) (null)
+((3,3) [(3,4)] [] [] [(3,1)] TAGGG) (null)
+((3,4) [(4,3)] [] [] [(3,3)] AGGGT) (null)
+((5,1) [(5,2)] [] [(6,3)] [] AGCAA) (null)
+((5,2) [(5,3)] [(4,4)] [] [(5,1)] GCAAC) (null)
+((5,3) [(5,4)] [] [(6,1)] [(5,2)] CAACC) (null)
+((5,4) [] [(4,2)] [] [(5,3)] AACCC) (null)
+((7,1) [(7,2)] [] [(8,3),(24,2)] [(18,2),(23,1)] TTTCA) (null)
+((7,2) [(7,3)] [(6,4)] [(24,1)] [(7,1)] TTCAG) (null)
+((7,3) [(7,4)] [] [(8,1)] [(7,2)] TCAGC) (null)
+((7,4) [] [(6,2)] [] [(7,3)] CAGCA) (null)
+((9,1) [(9,2)] [] [] [(10,2)] CAGAT) (null)
+((9,2) [(9,3)] [(8,4)] [] [(9,1)] AGATT) (null)
+((9,3) [(9,4)] [] [] [(10,4),(9,2)] GATTT) (null)
+((9,4) [] [(8,2)] [] [(9,3)] ATTTC) (null)
+((11,1) [(11,2)] [] [] [(12,2)] CTGGC) (null)
+((11,2) [(10,1),(11,3)] [] [] [(11,1)] TGGCA) (null)
+((11,3) [(11,4)] [] [] [(12,4),(11,2)] GGCAG) (null)
+((11,4) [(10,3)] [] [] [(11,3)] GCAGA) (null)
+((13,1) [(13,2)] [] [] [(14,2)] ATCTC) (null)
+((13,2) [(12,1),(13,3)] [] [] [(13,1)] TCTCT) (null)
+((13,3) [(13,4)] [] [] [(14,4),(13,2)] CTCTG) (null)
+((13,4) [(12,3)] [] [] [(13,3)] TCTGG) (null)
+((15,1) [(15,2)] [] [] [(16,2)] CGGCA) (null)
+((15,2) [(14,1),(15,3)] [] [] [(15,1)] GGCAT) (null)
+((15,3) [(15,4)] [] [] [(16,4),(15,2)] GCATC) (null)
+((15,4) [(14,3)] [] [] [(15,3)] CATCT) (null)
+((17,1) [(17,2)] [] [(23,2),(18,3)] [(24,1)] GAAAC) (null)
+((17,2) [(16,1),(17,3)] [] [(23,1)] [(17,1)] AAACG) (null)
+((17,3) [(17,4)] [] [(18,1)] [(17,2)] AACGG) (null)
+((17,4) [(16,3)] [] [] [(17,3)] ACGGC) (null)
+((19,1) [(19,2)] [] [(20,3)] [] TATTG) (null)
+((19,2) [(19,3)] [(18,4)] [] [(19,1)] ATTGA) (null)
+((19,3) [(24,1),(19,4)] [] [(20,1)] [(19,2)] TTGAA) (null)
+((19,4) [(8,3),(24,2)] [(18,2),(23,1)] [] [(19,3)] TGAAA) (null)
+((21,1) [(21,2)] [] [] [(20,2)] AATAC) (null)
+((21,2) [(22,1),(21,3)] [] [] [(21,1)] ATACG) (null)
+((21,3) [(21,4)] [] [] [(20,4),(21,2)] TACGT) (null)
+((21,4) [(22,3)] [] [] [(27,4),(21,3)] ACGTG) (null)
+((23,1) [(7,1),(23,2)] [(19,4)] [(24,3),(17,2)] [(18,1)] GTTTC) (null)
+((23,2) [(23,3)] [(22,4)] [(17,1)] [(23,1)] TTTCA) (null)
+((23,3) [(23,4)] [] [(24,1)] [(23,2)] TTCAC) (null)
+((23,4) [] [(22,2)] [] [(23,3)] TCACG) (null)
+((25,1) [(25,2)] [] [] [(24,2)] AAACT) (null)
+((25,2) [(25,3)] [(26,4)] [] [(25,1)] AACTA) (null)
+((25,3) [(25,4)] [] [] [(24,4),(25,2)] ACTAT) (null)
+((25,4) [] [(26,2)] [] [(25,3)] CTATT) (null)
+((27,1) [(27,2)] [] [(26,3)] [] TATTA) (null)
+((27,2) [(28,1),(27,3)] [] [] [(27,1)] ATTAC) (null)
+((27,3) [(22,1),(27,4)] [] [(26,1)] [(27,2)] TTACG) (null)
+((27,4) [(28,3),(21,4),(22,2)] [] [] [(27,3)] TACGT) (null)
+((29,1) [(29,2)] [] [(30,3)] [(30,2)] CATGA) (null)
+((29,2) [(29,3)] [(32,4),(28,4)] [] [(29,1)] ATGAC) (null)
+((29,3) [(29,4)] [] [(30,1)] [(30,4),(29,2)] TGACG) (null)
+((29,4) [] [(28,2)] [] [(29,3)] GACGT) (null)
+((31,1) [(31,2)] [] [] [(32,2)] GCGTC) (null)
+((31,2) [(30,1),(31,3)] [(30,4)] [] [(31,1)] CGTCA) (null)
+((31,3) [(31,4)] [] [] [(32,4),(28,4),(31,2)] GTCAT) (null)
+((31,4) [(30,3)] [(30,2)] [] [(31,3)] TCATG) (null)
+((33,1) [(33,2)] [] [] [] TCGCT) (null)
+((33,2) [(34,1),(33,3)] [(34,4)] [] [(33,1)] CGCTT) (null)
+((33,3) [(33,4)] [] [(36,1),(32,1)] [(33,2)] GCTTA) (null)
+((33,4) [(34,3)] [(34,2)] [] [(33,3)] CTTAA) (null)
+((35,1) [(35,2)] [] [] [] TCGCT) (null)
+((35,2) [(34,1),(35,3)] [(34,4)] [] [(35,1)] CGCTT) (null)
+((35,3) [(35,4)] [] [(36,1),(32,1)] [(35,2)] GCTTA) (null)
+((35,4) [(34,3)] [(34,2)] [] [(35,3)] CTTAA) (null)
+((37,1) [(37,2)] [] [] [] CCACA) (null)
+((37,2) [(37,3)] [(36,4)] [] [(37,1)] CACAC) (null)
+((37,3) [(37,4)] [] [] [(37,2)] ACACG) (null)
+((37,4) [] [(36,2)] [] [(37,3)] CACGC) (null)
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq1.txt.svg b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq1.txt.svg
new file mode 100644
index 0000000..655df86
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq1.txt.svg
@@ -0,0 +1,3577 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: walk_random_seq1_txt Pages: 1 -->
+<svg width="1494pt" height="5501pt"
+ viewBox="0.00 0.00 1494.00 5501.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 5497)">
+<title>walk_random_seq1_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-5497 1491,-5497 1491,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="54,-1935 54,-2136 226,-2136 226,-1935 54,-1935"/>
+<text text-anchor="middle" x="140" y="-2119.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_24</title>
+<polygon fill="none" stroke="black" points="428,-5076 428,-5164 942,-5164 942,-5076 428,-5076"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_25</title>
+<polygon fill="none" stroke="black" points="562,-1126 562,-1214 1076,-1214 1076,-1126 562,-1126"/>
+</g>
+<g id="graph5" class="cluster"><title>cluster_26</title>
+<polygon fill="none" stroke="black" points="428,-2358 428,-2446 942,-2446 942,-2358 428,-2358"/>
+</g>
+<g id="graph6" class="cluster"><title>cluster_27</title>
+<polygon fill="none" stroke="black" points="294,-2792 294,-2880 808,-2880 808,-2792 294,-2792"/>
+</g>
+<g id="graph7" class="cluster"><title>cluster_20</title>
+<polygon fill="none" stroke="black" points="160,-4070 160,-4158 674,-4158 674,-4070 160,-4070"/>
+</g>
+<g id="graph8" class="cluster"><title>cluster_21</title>
+<polygon fill="none" stroke="black" points="428,-3605 428,-3693 942,-3693 942,-3605 428,-3605"/>
+</g>
+<g id="graph9" class="cluster"><title>cluster_22</title>
+<polygon fill="none" stroke="black" points="428,-3190 428,-3278 942,-3278 942,-3190 428,-3190"/>
+</g>
+<g id="graph10" class="cluster"><title>cluster_23</title>
+<polygon fill="none" stroke="black" points="294,-4681 294,-4769 808,-4769 808,-4681 294,-4681"/>
+</g>
+<g id="graph11" class="cluster"><title>cluster_28</title>
+<polygon fill="none" stroke="black" points="562,-2696 562,-2784 1076,-2784 1076,-2696 562,-2696"/>
+</g>
+<g id="graph12" class="cluster"><title>cluster_29</title>
+<polygon fill="none" stroke="black" points="696,-2262 696,-2350 1210,-2350 1210,-2262 696,-2262"/>
+</g>
+<g id="graph13" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="13,-5371 13,-5459 535,-5459 535,-5371 13,-5371"/>
+</g>
+<g id="graph14" class="cluster"><title>cluster_3</title>
+<polygon fill="none" stroke="black" points="700,-8 700,-96 1071,-96 1071,-8 700,-8"/>
+</g>
+<g id="graph15" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="165,-5275 165,-5363 669,-5363 669,-5275 165,-5275"/>
+</g>
+<g id="graph16" class="cluster"><title>cluster_5</title>
+<polygon fill="none" stroke="black" points="701,-2454 701,-2542 1205,-2542 1205,-2454 701,-2454"/>
+</g>
+<g id="graph17" class="cluster"><title>cluster_4</title>
+<polygon fill="none" stroke="black" points="835,-1400 835,-1488 1339,-1488 1339,-1400 835,-1400"/>
+</g>
+<g id="graph18" class="cluster"><title>cluster_7</title>
+<polygon fill="none" stroke="black" points="433,-4585 433,-4673 937,-4673 937,-4585 433,-4585"/>
+</g>
+<g id="graph19" class="cluster"><title>cluster_6</title>
+<polygon fill="none" stroke="black" points="567,-3087 567,-3175 1071,-3175 1071,-3087 567,-3087"/>
+</g>
+<g id="graph20" class="cluster"><title>cluster_9</title>
+<polygon fill="none" stroke="black" points="433,-3439 433,-3527 937,-3527 937,-3439 433,-3439"/>
+</g>
+<g id="graph21" class="cluster"><title>cluster_8</title>
+<polygon fill="none" stroke="black" points="299,-4980 299,-5068 803,-5068 803,-4980 299,-4980"/>
+</g>
+<g id="graph22" class="cluster"><title>cluster_11</title>
+<polygon fill="none" stroke="black" points="294,-616 294,-704 808,-704 808,-616 294,-616"/>
+</g>
+<g id="graph23" class="cluster"><title>cluster_10</title>
+<polygon fill="none" stroke="black" points="428,-2991 428,-3079 942,-3079 942,-2991 428,-2991"/>
+</g>
+<g id="graph24" class="cluster"><title>cluster_13</title>
+<polygon fill="none" stroke="black" points="294,-424 294,-512 808,-512 808,-424 294,-424"/>
+</g>
+<g id="graph25" class="cluster"><title>cluster_12</title>
+<polygon fill="none" stroke="black" points="294,-520 294,-608 808,-608 808,-520 294,-520"/>
+</g>
+<g id="graph26" class="cluster"><title>cluster_15</title>
+<polygon fill="none" stroke="black" points="294,-232 294,-320 808,-320 808,-232 294,-232"/>
+</g>
+<g id="graph27" class="cluster"><title>cluster_14</title>
+<polygon fill="none" stroke="black" points="294,-328 294,-416 808,-416 808,-328 294,-328"/>
+</g>
+<g id="graph28" class="cluster"><title>cluster_17</title>
+<polygon fill="none" stroke="black" points="294,-3904 294,-3992 808,-3992 808,-3904 294,-3904"/>
+</g>
+<g id="graph29" class="cluster"><title>cluster_16</title>
+<polygon fill="none" stroke="black" points="294,-104 294,-192 808,-192 808,-104 294,-104"/>
+</g>
+<g id="graph30" class="cluster"><title>cluster_19</title>
+<polygon fill="none" stroke="black" points="8,-5172 8,-5260 540,-5260 540,-5172 8,-5172"/>
+</g>
+<g id="graph31" class="cluster"><title>cluster_18</title>
+<polygon fill="none" stroke="black" points="160,-4419 160,-4507 674,-4507 674,-4419 160,-4419"/>
+</g>
+<g id="graph32" class="cluster"><title>cluster_31</title>
+<polygon fill="none" stroke="black" points="562,-1688 562,-1776 1076,-1776 1076,-1688 562,-1688"/>
+</g>
+<g id="graph33" class="cluster"><title>cluster_30</title>
+<polygon fill="none" stroke="black" points="696,-2096 696,-2184 1210,-2184 1210,-2096 696,-2096"/>
+</g>
+<g id="graph34" class="cluster"><title>cluster_37</title>
+<polygon fill="none" stroke="black" points="964,-616 964,-704 1478,-704 1478,-616 964,-616"/>
+</g>
+<g id="graph35" class="cluster"><title>cluster_36</title>
+<polygon fill="none" stroke="black" points="830,-1030 830,-1118 1344,-1118 1344,-1030 830,-1030"/>
+</g>
+<g id="graph36" class="cluster"><title>cluster_35</title>
+<polygon fill="none" stroke="black" points="428,-1592 428,-1680 942,-1680 942,-1592 428,-1592"/>
+</g>
+<g id="graph37" class="cluster"><title>cluster_34</title>
+<polygon fill="none" stroke="black" points="562,-712 562,-800 1076,-800 1076,-712 562,-712"/>
+</g>
+<g id="graph38" class="cluster"><title>cluster_33</title>
+<polygon fill="none" stroke="black" points="428,-1496 428,-1584 942,-1584 942,-1496 428,-1496"/>
+</g>
+<g id="graph39" class="cluster"><title>cluster_32</title>
+<polygon fill="none" stroke="black" points="562,-1930 562,-2018 1076,-2018 1076,-1930 562,-1930"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="64" cy="-2084" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="216" cy="-2084" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M66.1487,-2084C81.1775,-2084 170.918,-2084 203.873,-2084"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="204.136,-2087.5 214.136,-2084 204.136,-2080.5 204.136,-2087.5"/>
+<text text-anchor="middle" x="140" y="-2089.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="64" cy="-2043" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="216" cy="-2043" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M66.1487,-2043C81.1775,-2043 170.918,-2043 203.873,-2043"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="204.136,-2046.5 214.136,-2043 204.136,-2039.5 204.136,-2046.5"/>
+<text text-anchor="middle" x="140" y="-2048.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="64" cy="-2002" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="216" cy="-2002" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M66.1487,-2002C81.1775,-2002 170.918,-2002 203.873,-2002"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="204.136,-2005.5 214.136,-2002 204.136,-1998.5 204.136,-2005.5"/>
+<text text-anchor="middle" x="140" y="-2007.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="64" cy="-1961" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="216" cy="-1961" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M66.1487,-1961C81.1775,-1961 170.918,-1961 203.873,-1961"/>
+<polygon fill="#218559" stroke="#218559" points="204.136,-1964.5 214.136,-1961 204.136,-1957.5 204.136,-1964.5"/>
+<text text-anchor="middle" x="140" y="-1966.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 24,1 -->
+<g id="node15" class="node"><title>24,1</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-5120" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-5131.17" font-family="Times Roman,serif" font-size="10.00">24,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-5112 459,-5126 510,-5126 510,-5112 459,-5112"/>
+<text text-anchor="start" x="467.5" y="-5116.67" font-family="Times Roman,serif" font-size="10.00">TGAAA</text>
+<polygon fill="#218559" stroke="#218559" points="459,-5098 459,-5112 510,-5112 510,-5098 459,-5098"/>
+<text text-anchor="start" x="470" y="-5102.67" font-family="Times Roman,serif" font-size="10.00">TTTCA</text>
+</g>
+<!-- 24,2 -->
+<g id="node16" class="node"><title>24,2</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-5120" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-5131.17" font-family="Times Roman,serif" font-size="10.00">24,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-5112 593,-5126 644,-5126 644,-5112 593,-5112"/>
+<text text-anchor="start" x="601" y="-5116.67" font-family="Times Roman,serif" font-size="10.00">GAAAC</text>
+<polygon fill="#218559" stroke="#218559" points="593,-5098 593,-5112 644,-5112 644,-5098 593,-5098"/>
+<text text-anchor="start" x="604" y="-5102.67" font-family="Times Roman,serif" font-size="10.00">GTTTC</text>
+</g>
+<!-- 24,1->24,2 -->
+<g id="edge280" class="edge"><title>24,1->24,2</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-5113.53C540.501,-5113.25 550.748,-5113.18 560.703,-5113.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-5116.82 570.933,-5113.52 561.007,-5109.82 560.864,-5116.82"/>
+</g>
+<!-- 23,3 -->
+<g id="node52" class="node"><title>23,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-4725" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-4736.17" font-family="Times Roman,serif" font-size="10.00">23,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-4717 593,-4731 644,-4731 644,-4717 593,-4717"/>
+<text text-anchor="start" x="603.5" y="-4721.67" font-family="Times Roman,serif" font-size="10.00">TTCAC</text>
+<polygon fill="#218559" stroke="#218559" points="593,-4703 593,-4717 644,-4717 644,-4703 593,-4703"/>
+<text text-anchor="start" x="601.5" y="-4707.67" font-family="Times Roman,serif" font-size="10.00">GTGAA</text>
+</g>
+<!-- 24,1->23,3 -->
+<g id="edge282" class="edge"><title>24,1->23,3</title>
+<path fill="none" stroke="#06a2cb" d="M515.6,-5092.91C528.303,-5082.03 540,-5072 540,-5072 540,-5072 586.676,-4864.35 607.814,-4770.32"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="611.241,-4771.03 610.019,-4760.5 604.411,-4769.49 611.241,-4771.03"/>
+</g>
+<!-- 7,2 -->
+<g id="node90" class="node"><title>7,2</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-4629" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="598.5" y="-4640.17" font-family="Times Roman,serif" font-size="10.00">7,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="596,-4621 596,-4635 641,-4635 641,-4621 596,-4621"/>
+<text text-anchor="start" x="603" y="-4625.67" font-family="Times Roman,serif" font-size="10.00">TTCAG</text>
+<polygon fill="#218559" stroke="#218559" points="596,-4607 596,-4621 641,-4621 641,-4607 596,-4607"/>
+<text text-anchor="start" x="601.5" y="-4611.67" font-family="Times Roman,serif" font-size="10.00">CTGAA</text>
+</g>
+<!-- 24,1->7,2 -->
+<g id="edge286" class="edge"><title>24,1->7,2</title>
+<path fill="none" stroke="#06a2cb" d="M515.6,-5092.91C528.303,-5082.03 540,-5072 540,-5072 540,-5072 552,-4898 552,-4898 552,-4898 562,-4677 562,-4677 562,-4677 569.944,-4670.19 579.849,-4661.7"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="582.391,-4664.13 587.705,-4654.97 577.835,-4658.82 582.391,-4664.13"/>
+</g>
+<!-- 8,1 -->
+<g id="node104" class="node"><title>8,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-5024" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="330.5" y="-5035.17" font-family="Times Roman,serif" font-size="10.00">8,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="328,-5016 328,-5030 373,-5030 373,-5016 328,-5016"/>
+<text text-anchor="start" x="333.5" y="-5020.67" font-family="Times Roman,serif" font-size="10.00">CTGAA</text>
+<polygon fill="#218559" stroke="#218559" points="328,-5002 328,-5016 373,-5016 373,-5002 328,-5002"/>
+<text text-anchor="start" x="335" y="-5006.67" font-family="Times Roman,serif" font-size="10.00">TTCAG</text>
+</g>
+<!-- 24,1->8,1 -->
+<g id="edge288" class="edge"><title>24,1->8,1</title>
+<path fill="none" stroke="#218559" d="M445.233,-5098.91C429.962,-5090.6 416,-5083 416,-5083 416,-5083 402.293,-5070.75 387.372,-5057.41"/>
+<polygon fill="#218559" stroke="#218559" points="389.313,-5054.45 379.525,-5050.39 384.648,-5059.67 389.313,-5054.45"/>
+</g>
+<!-- 17,1 -->
+<g id="node139" class="node"><title>17,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-3948" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-3959.17" font-family="Times Roman,serif" font-size="10.00">17,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-3940 325,-3954 376,-3954 376,-3940 325,-3940"/>
+<text text-anchor="start" x="333" y="-3944.67" font-family="Times Roman,serif" font-size="10.00">GAAAC</text>
+<polygon fill="#218559" stroke="#218559" points="325,-3926 325,-3940 376,-3940 376,-3926 325,-3926"/>
+<text text-anchor="start" x="336" y="-3930.67" font-family="Times Roman,serif" font-size="10.00">GTTTC</text>
+</g>
+<!-- 24,1->17,1 -->
+<g id="edge278" class="edge"><title>24,1->17,1</title>
+<path fill="none" stroke="#dd1e2f" d="M452.4,-5092.91C439.697,-5082.03 428,-5072 428,-5072 428,-5072 418,-3850 418,-3850 418,-3850 416,-3850 416,-3850 416,-3850 395.53,-3880.4 377.39,-3907.33"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="374.294,-3905.66 371.611,-3915.91 380.1,-3909.57 374.294,-3905.66"/>
+</g>
+<!-- 19,3 -->
+<g id="node151" class="node"><title>19,3</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-5216" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-5227.17" font-family="Times Roman,serif" font-size="10.00">19,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-5208 325,-5222 376,-5222 376,-5208 325,-5208"/>
+<text text-anchor="start" x="334.5" y="-5212.67" font-family="Times Roman,serif" font-size="10.00">TTGAA</text>
+<polygon fill="#218559" stroke="#218559" points="325,-5194 325,-5208 376,-5208 376,-5194 325,-5194"/>
+<text text-anchor="start" x="335" y="-5198.67" font-family="Times Roman,serif" font-size="10.00">TTCAA</text>
+</g>
+<!-- 24,1->19,3 -->
+<g id="edge290" class="edge"><title>24,1->19,3</title>
+<path fill="none" stroke="#218559" d="M449.317,-5144.85C432.127,-5157.16 411.205,-5172.15 393.005,-5185.19"/>
+<polygon fill="#218559" stroke="#218559" points="390.948,-5182.36 384.857,-5191.03 395.025,-5188.05 390.948,-5182.36"/>
+</g>
+<!-- 18,4 -->
+<g id="node157" class="node"><title>18,4</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-4463" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-4474.17" font-family="Times Roman,serif" font-size="10.00">18,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-4455 593,-4469 644,-4469 644,-4455 593,-4455"/>
+<text text-anchor="start" x="603" y="-4459.67" font-family="Times Roman,serif" font-size="10.00">TTCAA</text>
+<polygon fill="#218559" stroke="#218559" points="593,-4441 593,-4455 644,-4455 644,-4441 593,-4441"/>
+<text text-anchor="start" x="602.5" y="-4445.67" font-family="Times Roman,serif" font-size="10.00">TTGAA</text>
+</g>
+<!-- 24,1->18,4 -->
+<g id="edge284" class="edge"><title>24,1->18,4</title>
+<path fill="none" stroke="#06a2cb" d="M515.6,-5092.91C528.303,-5082.03 540,-5072 540,-5072 540,-5072 550,-4876 550,-4876 550,-4876 552,-4843 552,-4843 552,-4843 562,-4581 562,-4581 562,-4581 581.437,-4540.04 597.409,-4506.39"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="600.677,-4507.67 601.802,-4497.13 594.353,-4504.66 600.677,-4507.67"/>
+</g>
+<!-- 24,2->24,1 -->
+<g id="edge300" class="edge"><title>24,2->24,1</title>
+<path fill="none" stroke="#218559" d="M570.933,-5126.48C561.29,-5126.75 551.041,-5126.82 541.091,-5126.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-5123.18 530.867,-5126.47 540.792,-5130.18 540.937,-5123.18"/>
+</g>
+<!-- 24,3 -->
+<g id="node17" class="node"><title>24,3</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-5120" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-5131.17" font-family="Times Roman,serif" font-size="10.00">24,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-5112 727,-5126 778,-5126 778,-5112 727,-5112"/>
+<text text-anchor="start" x="736" y="-5116.67" font-family="Times Roman,serif" font-size="10.00">AAACT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-5098 727,-5112 778,-5112 778,-5098 727,-5098"/>
+<text text-anchor="start" x="737.5" y="-5102.67" font-family="Times Roman,serif" font-size="10.00">AGTTT</text>
+</g>
+<!-- 24,2->24,3 -->
+<g id="edge294" class="edge"><title>24,2->24,3</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-5113.53C674.501,-5113.25 684.748,-5113.18 694.703,-5113.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-5116.82 704.933,-5113.52 695.007,-5109.82 694.864,-5116.82"/>
+</g>
+<!-- 25,1 -->
+<g id="node20" class="node"><title>25,1</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-1170" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-1181.17" font-family="Times Roman,serif" font-size="10.00">25,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-1162 593,-1176 644,-1176 644,-1162 593,-1162"/>
+<text text-anchor="start" x="602" y="-1166.67" font-family="Times Roman,serif" font-size="10.00">AAACT</text>
+<polygon fill="#218559" stroke="#218559" points="593,-1148 593,-1162 644,-1162 644,-1148 593,-1148"/>
+<text text-anchor="start" x="603.5" y="-1152.67" font-family="Times Roman,serif" font-size="10.00">AGTTT</text>
+</g>
+<!-- 24,2->25,1 -->
+<g id="edge292" class="edge"><title>24,2->25,1</title>
+<path fill="none" stroke="#dd1e2f" d="M586.07,-5092.84C568.813,-5078.15 551,-5063 551,-5063 551,-5063 551,-1501 551,-1501 551,-1501 590.35,-1306.6 608.737,-1215.76"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="612.213,-1216.23 610.766,-1205.74 605.352,-1214.84 612.213,-1216.23"/>
+</g>
+<!-- 7,1 -->
+<g id="node89" class="node"><title>7,1</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-4629" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="464.5" y="-4640.17" font-family="Times Roman,serif" font-size="10.00">7,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="462,-4621 462,-4635 507,-4635 507,-4621 462,-4621"/>
+<text text-anchor="start" x="470" y="-4625.67" font-family="Times Roman,serif" font-size="10.00">TTTCA</text>
+<polygon fill="#218559" stroke="#218559" points="462,-4607 462,-4621 507,-4621 507,-4607 462,-4607"/>
+<text text-anchor="start" x="467.5" y="-4611.67" font-family="Times Roman,serif" font-size="10.00">TGAAA</text>
+</g>
+<!-- 24,2->7,1 -->
+<g id="edge296" class="edge"><title>24,2->7,1</title>
+<path fill="none" stroke="#06a2cb" d="M586.4,-5092.91C573.697,-5082.03 562,-5072 562,-5072 562,-5072 552,-4365 552,-4365 552,-4365 550,-4365 550,-4365 550,-4365 540,-4579 540,-4579 540,-4579 531.629,-4586.47 521.36,-4595.64"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="518.768,-4593.26 513.64,-4602.54 523.43,-4598.49 518.768,-4593.26"/>
+</g>
+<!-- 19,4 -->
+<g id="node152" class="node"><title>19,4</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-5216" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-5227.17" font-family="Times Roman,serif" font-size="10.00">19,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-5208 459,-5222 510,-5222 510,-5208 459,-5208"/>
+<text text-anchor="start" x="467.5" y="-5212.67" font-family="Times Roman,serif" font-size="10.00">TGAAA</text>
+<polygon fill="#218559" stroke="#218559" points="459,-5194 459,-5208 510,-5208 510,-5194 459,-5194"/>
+<text text-anchor="start" x="470" y="-5198.67" font-family="Times Roman,serif" font-size="10.00">TTTCA</text>
+</g>
+<!-- 24,2->19,4 -->
+<g id="edge298" class="edge"><title>24,2->19,4</title>
+<path fill="none" stroke="#218559" d="M599.965,-5153.61C580.424,-5190.03 552,-5243 552,-5243 552,-5243 550,-5243 550,-5243 550,-5243 544,-5240.55 535.618,-5237.12"/>
+<polygon fill="#218559" stroke="#218559" points="536.753,-5233.8 526.172,-5233.25 534.103,-5240.28 536.753,-5233.8"/>
+</g>
+<!-- 24,3->24,2 -->
+<g id="edge306" class="edge"><title>24,3->24,2</title>
+<path fill="none" stroke="#218559" d="M704.933,-5126.48C695.29,-5126.75 685.041,-5126.82 675.091,-5126.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-5123.18 664.867,-5126.47 674.792,-5130.18 674.937,-5123.18"/>
+</g>
+<!-- 24,4 -->
+<g id="node18" class="node"><title>24,4</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-5120" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-5131.17" font-family="Times Roman,serif" font-size="10.00">24,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-5112 861,-5126 912,-5126 912,-5112 861,-5112"/>
+<text text-anchor="start" x="870.5" y="-5116.67" font-family="Times Roman,serif" font-size="10.00">AACTA</text>
+<polygon fill="#218559" stroke="#218559" points="861,-5098 861,-5112 912,-5112 912,-5098 861,-5098"/>
+<text text-anchor="start" x="872" y="-5102.67" font-family="Times Roman,serif" font-size="10.00">TAGTT</text>
+</g>
+<!-- 24,3->24,4 -->
+<g id="edge302" class="edge"><title>24,3->24,4</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-5113.53C808.501,-5113.25 818.748,-5113.18 828.703,-5113.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-5116.82 838.933,-5113.52 829.007,-5109.82 828.864,-5116.82"/>
+</g>
+<!-- 23,1 -->
+<g id="node50" class="node"><title>23,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-4725" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-4736.17" font-family="Times Roman,serif" font-size="10.00">23,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-4717 325,-4731 376,-4731 376,-4717 325,-4717"/>
+<text text-anchor="start" x="336" y="-4721.67" font-family="Times Roman,serif" font-size="10.00">GTTTC</text>
+<polygon fill="#218559" stroke="#218559" points="325,-4703 325,-4717 376,-4717 376,-4703 325,-4703"/>
+<text text-anchor="start" x="333" y="-4707.67" font-family="Times Roman,serif" font-size="10.00">GAAAC</text>
+</g>
+<!-- 24,3->23,1 -->
+<g id="edge304" class="edge"><title>24,3->23,1</title>
+<path fill="none" stroke="#06a2cb" d="M720.4,-5092.91C707.697,-5082.03 696,-5072 696,-5072 696,-5072 674,-3900 674,-3900 674,-3900 562,-3900 562,-3900 562,-3900 540,-4026 540,-4026 540,-4026 428,-4066 428,-4066 428,-4066 418,-4240 418,-4240 418,-4240 398,-4511 398,-4511 398,-4511 374.399,-4616.22 360.296,-4679.1"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="356.808,-4678.66 358.034,-4689.18 363.638,-4680.19 356.808,-4678.66"/>
+</g>
+<!-- 24,4->24,3 -->
+<g id="edge310" class="edge"><title>24,4->24,3</title>
+<path fill="none" stroke="#218559" d="M838.933,-5126.48C829.29,-5126.75 819.041,-5126.82 809.091,-5126.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-5123.18 798.867,-5126.47 808.792,-5130.18 808.937,-5123.18"/>
+</g>
+<!-- 25,3 -->
+<g id="node22" class="node"><title>25,3</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-1170" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-1181.17" font-family="Times Roman,serif" font-size="10.00">25,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-1162 861,-1176 912,-1176 912,-1162 861,-1162"/>
+<text text-anchor="start" x="871.5" y="-1166.67" font-family="Times Roman,serif" font-size="10.00">ACTAT</text>
+<polygon fill="#218559" stroke="#218559" points="861,-1148 861,-1162 912,-1162 912,-1148 861,-1148"/>
+<text text-anchor="start" x="871.5" y="-1152.67" font-family="Times Roman,serif" font-size="10.00">ATAGT</text>
+</g>
+<!-- 24,4->25,3 -->
+<g id="edge308" class="edge"><title>24,4->25,3</title>
+<path fill="none" stroke="#dd1e2f" d="M880.687,-5084.15C864.84,-4977.25 819,-4668 819,-4668 819,-4668 819,-1405 819,-1405 819,-1405 853.684,-1283.35 873.142,-1215.1"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="876.521,-1216.01 875.897,-1205.44 869.789,-1214.09 876.521,-1216.01"/>
+</g>
+<!-- 25,1->24,2 -->
+<g id="edge716" class="edge"><title>25,1->24,2</title>
+<path fill="none" stroke="#218559" d="M610.766,-1205.74C593.441,-1291.33 551,-1501 551,-1501 551,-1501 551,-5063 551,-5063 551,-5063 563.917,-5073.99 578.409,-5086.32"/>
+<polygon fill="#218559" stroke="#218559" points="576.185,-5089.02 586.07,-5092.84 580.721,-5083.69 576.185,-5089.02"/>
+</g>
+<!-- 25,2 -->
+<g id="node21" class="node"><title>25,2</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-1170" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-1181.17" font-family="Times Roman,serif" font-size="10.00">25,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-1162 727,-1176 778,-1176 778,-1162 727,-1162"/>
+<text text-anchor="start" x="736.5" y="-1166.67" font-family="Times Roman,serif" font-size="10.00">AACTA</text>
+<polygon fill="#218559" stroke="#218559" points="727,-1148 727,-1162 778,-1162 778,-1148 727,-1148"/>
+<text text-anchor="start" x="738" y="-1152.67" font-family="Times Roman,serif" font-size="10.00">TAGTT</text>
+</g>
+<!-- 25,1->25,2 -->
+<g id="edge714" class="edge"><title>25,1->25,2</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-1163.53C674.501,-1163.25 684.748,-1163.18 694.703,-1163.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-1166.82 704.933,-1163.52 695.007,-1159.82 694.864,-1166.82"/>
+</g>
+<!-- 25,2->25,1 -->
+<g id="edge722" class="edge"><title>25,2->25,1</title>
+<path fill="none" stroke="#218559" d="M704.933,-1176.48C695.29,-1176.75 685.041,-1176.82 675.091,-1176.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-1173.18 664.867,-1176.47 674.792,-1180.18 674.937,-1173.18"/>
+</g>
+<!-- 25,2->25,3 -->
+<g id="edge718" class="edge"><title>25,2->25,3</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-1163.53C808.501,-1163.25 818.748,-1163.18 828.703,-1163.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-1166.82 838.933,-1163.52 829.007,-1159.82 828.864,-1166.82"/>
+</g>
+<!-- 26,4 -->
+<g id="node28" class="node"><title>26,4</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-2402" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-2413.17" font-family="Times Roman,serif" font-size="10.00">26,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-2394 861,-2408 912,-2408 912,-2394 861,-2394"/>
+<text text-anchor="start" x="871.5" y="-2398.67" font-family="Times Roman,serif" font-size="10.00">ATAGT</text>
+<polygon fill="#218559" stroke="#218559" points="861,-2380 861,-2394 912,-2394 912,-2380 861,-2380"/>
+<text text-anchor="start" x="871.5" y="-2384.67" font-family="Times Roman,serif" font-size="10.00">ACTAT</text>
+</g>
+<!-- 25,2->26,4 -->
+<g id="edge720" class="edge"><title>25,2->26,4</title>
+<path fill="none" stroke="#ebb035" d="M758.261,-1206C772.874,-1290.02 808,-1492 808,-1492 808,-1492 820,-1894 820,-1894 820,-1894 830,-2354 830,-2354 830,-2354 837.283,-2360.24 846.596,-2368.22"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="844.529,-2371.06 854.4,-2374.91 849.085,-2365.75 844.529,-2371.06"/>
+</g>
+<!-- 25,3->24,4 -->
+<g id="edge726" class="edge"><title>25,3->24,4</title>
+<path fill="none" stroke="#218559" d="M875.897,-1205.44C857.18,-1271.09 819,-1405 819,-1405 819,-1405 819,-4668 819,-4668 819,-4668 862.193,-4959.39 879.219,-5074.25"/>
+<polygon fill="#218559" stroke="#218559" points="875.758,-5074.78 880.687,-5084.15 882.682,-5073.75 875.758,-5074.78"/>
+</g>
+<!-- 25,3->25,2 -->
+<g id="edge728" class="edge"><title>25,3->25,2</title>
+<path fill="none" stroke="#218559" d="M838.933,-1176.48C829.29,-1176.75 819.041,-1176.82 809.091,-1176.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-1173.18 798.867,-1176.47 808.792,-1180.18 808.937,-1173.18"/>
+</g>
+<!-- 25,4 -->
+<g id="node23" class="node"><title>25,4</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-1170" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="997.5" y="-1181.17" font-family="Times Roman,serif" font-size="10.00">25,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="995,-1162 995,-1176 1046,-1176 1046,-1162 995,-1162"/>
+<text text-anchor="start" x="1006.5" y="-1166.67" font-family="Times Roman,serif" font-size="10.00">CTATT</text>
+<polygon fill="#218559" stroke="#218559" points="995,-1148 995,-1162 1046,-1162 1046,-1148 995,-1148"/>
+<text text-anchor="start" x="1004.5" y="-1152.67" font-family="Times Roman,serif" font-size="10.00">AATAG</text>
+</g>
+<!-- 25,3->25,4 -->
+<g id="edge724" class="edge"><title>25,3->25,4</title>
+<path fill="none" stroke="#dd1e2f" d="M932.867,-1163.53C942.501,-1163.25 952.748,-1163.18 962.703,-1163.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="962.864,-1166.82 972.933,-1163.52 963.007,-1159.82 962.864,-1166.82"/>
+</g>
+<!-- 25,4->25,3 -->
+<g id="edge732" class="edge"><title>25,4->25,3</title>
+<path fill="none" stroke="#218559" d="M972.933,-1176.48C963.29,-1176.75 953.041,-1176.82 943.091,-1176.68"/>
+<polygon fill="#218559" stroke="#218559" points="942.937,-1173.18 932.867,-1176.47 942.792,-1180.18 942.937,-1173.18"/>
+</g>
+<!-- 26,2 -->
+<g id="node26" class="node"><title>26,2</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-2402" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-2413.17" font-family="Times Roman,serif" font-size="10.00">26,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-2394 593,-2408 644,-2408 644,-2394 593,-2394"/>
+<text text-anchor="start" x="604" y="-2398.67" font-family="Times Roman,serif" font-size="10.00">TAATA</text>
+<polygon fill="#218559" stroke="#218559" points="593,-2380 593,-2394 644,-2394 644,-2380 593,-2380"/>
+<text text-anchor="start" x="605" y="-2384.67" font-family="Times Roman,serif" font-size="10.00">TATTA</text>
+</g>
+<!-- 25,4->26,2 -->
+<g id="edge730" class="edge"><title>25,4->26,2</title>
+<path fill="none" stroke="#ebb035" d="M1007.69,-1205.13C988.09,-1261.04 952,-1364 952,-1364 952,-1364 942,-1814 942,-1814 942,-1814 830,-1848 830,-1848 830,-1848 818,-1892 818,-1892 818,-1892 808,-2054 808,-2054 808,-2054 696,-2092 696,-2092 696,-2092 651.064,-2270.59 629.406,-2356.67"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="625.994,-2355.89 626.948,-2366.44 632.782,-2357.6 625.994,-2355.89"/>
+</g>
+<!-- 26,1 -->
+<g id="node25" class="node"><title>26,1</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-2402" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-2413.17" font-family="Times Roman,serif" font-size="10.00">26,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-2394 459,-2408 510,-2408 510,-2394 459,-2394"/>
+<text text-anchor="start" x="469.5" y="-2398.67" font-family="Times Roman,serif" font-size="10.00">GTAAT</text>
+<polygon fill="#218559" stroke="#218559" points="459,-2380 459,-2394 510,-2394 510,-2380 459,-2380"/>
+<text text-anchor="start" x="470" y="-2384.67" font-family="Times Roman,serif" font-size="10.00">ATTAC</text>
+</g>
+<!-- 26,1->26,2 -->
+<g id="edge312" class="edge"><title>26,1->26,2</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-2395.53C540.501,-2395.25 550.748,-2395.18 560.703,-2395.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-2398.82 570.933,-2395.52 561.007,-2391.82 560.864,-2398.82"/>
+</g>
+<!-- 27,3 -->
+<g id="node32" class="node"><title>27,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-2836" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-2847.17" font-family="Times Roman,serif" font-size="10.00">27,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-2828 593,-2842 644,-2842 644,-2828 593,-2828"/>
+<text text-anchor="start" x="603" y="-2832.67" font-family="Times Roman,serif" font-size="10.00">TTACG</text>
+<polygon fill="#218559" stroke="#218559" points="593,-2814 593,-2828 644,-2828 644,-2814 593,-2814"/>
+<text text-anchor="start" x="602" y="-2818.67" font-family="Times Roman,serif" font-size="10.00">CGTAA</text>
+</g>
+<!-- 26,1->27,3 -->
+<g id="edge314" class="edge"><title>26,1->27,3</title>
+<path fill="none" stroke="#06a2cb" d="M491.198,-2437.62C510.508,-2533.18 562,-2788 562,-2788 562,-2788 569.283,-2794.24 578.596,-2802.22"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="576.529,-2805.06 586.4,-2808.91 581.085,-2799.75 576.529,-2805.06"/>
+</g>
+<!-- 26,2->25,4 -->
+<g id="edge318" class="edge"><title>26,2->25,4</title>
+<path fill="none" stroke="#ebb035" d="M624.938,-2366.26C641.884,-2278.96 684,-2062 684,-2062 684,-2062 808,-2022 808,-2022 808,-2022 818,-1826 818,-1826 818,-1826 830,-1820 830,-1820 830,-1820 942,-1780 942,-1780 942,-1780 952,-1268 952,-1268 952,-1268 973.274,-1237.34 992.023,-1210.32"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="994.909,-1212.3 997.734,-1202.09 989.158,-1208.31 994.909,-1212.3"/>
+</g>
+<!-- 26,2->26,1 -->
+<g id="edge320" class="edge"><title>26,2->26,1</title>
+<path fill="none" stroke="#218559" d="M570.933,-2408.48C561.29,-2408.75 551.041,-2408.82 541.091,-2408.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-2405.18 530.867,-2408.47 540.792,-2412.18 540.937,-2405.18"/>
+</g>
+<!-- 26,3 -->
+<g id="node27" class="node"><title>26,3</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-2402" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-2413.17" font-family="Times Roman,serif" font-size="10.00">26,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-2394 727,-2408 778,-2408 778,-2394 727,-2394"/>
+<text text-anchor="start" x="736.5" y="-2398.67" font-family="Times Roman,serif" font-size="10.00">AATAG</text>
+<polygon fill="#218559" stroke="#218559" points="727,-2380 727,-2394 778,-2394 778,-2380 727,-2380"/>
+<text text-anchor="start" x="738.5" y="-2384.67" font-family="Times Roman,serif" font-size="10.00">CTATT</text>
+</g>
+<!-- 26,2->26,3 -->
+<g id="edge316" class="edge"><title>26,2->26,3</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-2395.53C674.501,-2395.25 684.748,-2395.18 694.703,-2395.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-2398.82 704.933,-2395.52 695.007,-2391.82 694.864,-2398.82"/>
+</g>
+<!-- 26,3->26,2 -->
+<g id="edge326" class="edge"><title>26,3->26,2</title>
+<path fill="none" stroke="#218559" d="M704.933,-2408.48C695.29,-2408.75 685.041,-2408.82 675.091,-2408.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-2405.18 664.867,-2408.47 674.792,-2412.18 674.937,-2405.18"/>
+</g>
+<!-- 26,3->26,4 -->
+<g id="edge322" class="edge"><title>26,3->26,4</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-2395.53C808.501,-2395.25 818.748,-2395.18 828.703,-2395.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-2398.82 838.933,-2395.52 829.007,-2391.82 828.864,-2398.82"/>
+</g>
+<!-- 27,1 -->
+<g id="node30" class="node"><title>27,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-2836" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-2847.17" font-family="Times Roman,serif" font-size="10.00">27,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-2828 325,-2842 376,-2842 376,-2828 325,-2828"/>
+<text text-anchor="start" x="337" y="-2832.67" font-family="Times Roman,serif" font-size="10.00">TATTA</text>
+<polygon fill="#218559" stroke="#218559" points="325,-2814 325,-2828 376,-2828 376,-2814 325,-2814"/>
+<text text-anchor="start" x="336" y="-2818.67" font-family="Times Roman,serif" font-size="10.00">TAATA</text>
+</g>
+<!-- 26,3->27,1 -->
+<g id="edge324" class="edge"><title>26,3->27,1</title>
+<path fill="none" stroke="#06a2cb" d="M720.4,-2429.09C707.697,-2439.97 696,-2450 696,-2450 696,-2450 686,-2566 686,-2566 686,-2566 480.412,-2731.2 390.866,-2803.16"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="388.432,-2800.63 382.829,-2809.62 392.816,-2806.08 388.432,-2800.63"/>
+</g>
+<!-- 26,4->25,2 -->
+<g id="edge328" class="edge"><title>26,4->25,2</title>
+<path fill="none" stroke="#ebb035" d="M854.4,-2374.91C841.697,-2364.03 830,-2354 830,-2354 830,-2354 808,-1492 808,-1492 808,-1492 775.499,-1305.12 760.017,-1216.1"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="763.423,-1215.25 758.261,-1206 756.526,-1216.45 763.423,-1215.25"/>
+</g>
+<!-- 26,4->26,3 -->
+<g id="edge330" class="edge"><title>26,4->26,3</title>
+<path fill="none" stroke="#218559" d="M838.933,-2408.48C829.29,-2408.75 819.041,-2408.82 809.091,-2408.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-2405.18 798.867,-2408.47 808.792,-2412.18 808.937,-2405.18"/>
+</g>
+<!-- 27,1->26,3 -->
+<g id="edge736" class="edge"><title>27,1->26,3</title>
+<path fill="none" stroke="#06a2cb" d="M366.925,-2802.15C389.865,-2756.27 428,-2680 428,-2680 428,-2680 696,-2450 696,-2450 696,-2450 703.283,-2443.76 712.596,-2435.78"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="715.085,-2438.25 720.4,-2429.09 710.529,-2432.94 715.085,-2438.25"/>
+</g>
+<!-- 27,2 -->
+<g id="node31" class="node"><title>27,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-2836" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-2847.17" font-family="Times Roman,serif" font-size="10.00">27,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-2828 459,-2842 510,-2842 510,-2828 459,-2828"/>
+<text text-anchor="start" x="470" y="-2832.67" font-family="Times Roman,serif" font-size="10.00">ATTAC</text>
+<polygon fill="#218559" stroke="#218559" points="459,-2814 459,-2828 510,-2828 510,-2814 459,-2814"/>
+<text text-anchor="start" x="469.5" y="-2818.67" font-family="Times Roman,serif" font-size="10.00">GTAAT</text>
+</g>
+<!-- 27,1->27,2 -->
+<g id="edge734" class="edge"><title>27,1->27,2</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-2829.53C406.501,-2829.25 416.748,-2829.18 426.703,-2829.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-2832.82 436.933,-2829.52 427.007,-2825.82 426.864,-2832.82"/>
+</g>
+<!-- 27,2->27,1 -->
+<g id="edge742" class="edge"><title>27,2->27,1</title>
+<path fill="none" stroke="#218559" d="M436.933,-2842.48C427.29,-2842.75 417.041,-2842.82 407.091,-2842.68"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-2839.18 396.867,-2842.47 406.792,-2846.18 406.937,-2839.18"/>
+</g>
+<!-- 27,2->27,3 -->
+<g id="edge740" class="edge"><title>27,2->27,3</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-2829.53C540.501,-2829.25 550.748,-2829.18 560.703,-2829.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-2832.82 570.933,-2829.52 561.007,-2825.82 560.864,-2832.82"/>
+</g>
+<!-- 28,1 -->
+<g id="node55" class="node"><title>28,1</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-2740" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-2751.17" font-family="Times Roman,serif" font-size="10.00">28,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-2732 593,-2746 644,-2746 644,-2732 593,-2732"/>
+<text text-anchor="start" x="603" y="-2736.67" font-family="Times Roman,serif" font-size="10.00">TTACG</text>
+<polygon fill="#218559" stroke="#218559" points="593,-2718 593,-2732 644,-2732 644,-2718 593,-2718"/>
+<text text-anchor="start" x="602" y="-2722.67" font-family="Times Roman,serif" font-size="10.00">CGTAA</text>
+</g>
+<!-- 27,2->28,1 -->
+<g id="edge738" class="edge"><title>27,2->28,1</title>
+<path fill="none" stroke="#dd1e2f" d="M523.949,-2816.03C538.751,-2808.62 552,-2802 552,-2802 552,-2802 562,-2788 562,-2788 562,-2788 569.283,-2781.76 578.596,-2773.78"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="581.085,-2776.25 586.4,-2767.09 576.529,-2770.94 581.085,-2776.25"/>
+</g>
+<!-- 27,3->26,1 -->
+<g id="edge748" class="edge"><title>27,3->26,1</title>
+<path fill="none" stroke="#06a2cb" d="M586.4,-2808.91C573.697,-2798.03 562,-2788 562,-2788 562,-2788 550,-2746 550,-2746 550,-2746 510.777,-2541.56 492.805,-2447.89"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="496.198,-2447 490.876,-2437.84 489.323,-2448.32 496.198,-2447"/>
+</g>
+<!-- 27,3->27,2 -->
+<g id="edge750" class="edge"><title>27,3->27,2</title>
+<path fill="none" stroke="#218559" d="M570.933,-2842.48C561.29,-2842.75 551.041,-2842.82 541.091,-2842.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-2839.18 530.867,-2842.47 540.792,-2846.18 540.937,-2839.18"/>
+</g>
+<!-- 27,4 -->
+<g id="node33" class="node"><title>27,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-2836" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-2847.17" font-family="Times Roman,serif" font-size="10.00">27,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-2828 727,-2842 778,-2842 778,-2828 727,-2828"/>
+<text text-anchor="start" x="737" y="-2832.67" font-family="Times Roman,serif" font-size="10.00">TACGT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-2814 727,-2828 778,-2828 778,-2814 727,-2814"/>
+<text text-anchor="start" x="736.5" y="-2818.67" font-family="Times Roman,serif" font-size="10.00">ACGTA</text>
+</g>
+<!-- 27,3->27,4 -->
+<g id="edge746" class="edge"><title>27,3->27,4</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-2829.53C674.501,-2829.25 684.748,-2829.18 694.703,-2829.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-2832.82 704.933,-2829.52 695.007,-2825.82 694.864,-2832.82"/>
+</g>
+<!-- 22,1 -->
+<g id="node45" class="node"><title>22,1</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-3234" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-3245.17" font-family="Times Roman,serif" font-size="10.00">22,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-3226 459,-3240 510,-3240 510,-3226 459,-3226"/>
+<text text-anchor="start" x="469" y="-3230.67" font-family="Times Roman,serif" font-size="10.00">TACGT</text>
+<polygon fill="#218559" stroke="#218559" points="459,-3212 459,-3226 510,-3226 510,-3212 459,-3212"/>
+<text text-anchor="start" x="468.5" y="-3216.67" font-family="Times Roman,serif" font-size="10.00">ACGTA</text>
+</g>
+<!-- 27,3->22,1 -->
+<g id="edge744" class="edge"><title>27,3->22,1</title>
+<path fill="none" stroke="#dd1e2f" d="M605.012,-2871.02C588.469,-2915.63 562,-2987 562,-2987 562,-2987 540,-3083 540,-3083 540,-3083 517.026,-3144.95 500.508,-3189.49"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="497.184,-3188.38 496.988,-3198.98 503.747,-3190.82 497.184,-3188.38"/>
+</g>
+<!-- 27,4->27,3 -->
+<g id="edge758" class="edge"><title>27,4->27,3</title>
+<path fill="none" stroke="#218559" d="M704.933,-2842.48C695.29,-2842.75 685.041,-2842.82 675.091,-2842.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-2839.18 664.867,-2842.47 674.792,-2846.18 674.937,-2839.18"/>
+</g>
+<!-- 21,4 -->
+<g id="node43" class="node"><title>21,4</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-3649" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-3660.17" font-family="Times Roman,serif" font-size="10.00">21,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-3641 861,-3655 912,-3655 912,-3641 861,-3641"/>
+<text text-anchor="start" x="869.5" y="-3645.67" font-family="Times Roman,serif" font-size="10.00">ACGTG</text>
+<polygon fill="#218559" stroke="#218559" points="861,-3627 861,-3641 912,-3641 912,-3627 861,-3627"/>
+<text text-anchor="start" x="870" y="-3631.67" font-family="Times Roman,serif" font-size="10.00">CACGT</text>
+</g>
+<!-- 27,4->21,4 -->
+<g id="edge754" class="edge"><title>27,4->21,4</title>
+<path fill="none" stroke="#dd1e2f" d="M764.988,-2871.02C781.531,-2915.63 808,-2987 808,-2987 808,-2987 820,-3083 820,-3083 820,-3083 830,-3531 830,-3531 830,-3531 849.437,-3571.96 865.409,-3605.61"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="862.353,-3607.34 869.802,-3614.87 868.677,-3604.33 862.353,-3607.34"/>
+</g>
+<!-- 22,2 -->
+<g id="node46" class="node"><title>22,2</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-3234" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-3245.17" font-family="Times Roman,serif" font-size="10.00">22,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-3226 593,-3240 644,-3240 644,-3226 593,-3226"/>
+<text text-anchor="start" x="601.5" y="-3230.67" font-family="Times Roman,serif" font-size="10.00">ACGTG</text>
+<polygon fill="#218559" stroke="#218559" points="593,-3212 593,-3226 644,-3226 644,-3212 593,-3212"/>
+<text text-anchor="start" x="602" y="-3216.67" font-family="Times Roman,serif" font-size="10.00">CACGT</text>
+</g>
+<!-- 27,4->22,2 -->
+<g id="edge756" class="edge"><title>27,4->22,2</title>
+<path fill="none" stroke="#dd1e2f" d="M739.012,-2871.02C722.469,-2915.63 696,-2987 696,-2987 696,-2987 674,-3179 674,-3179 674,-3179 665.159,-3187.68 654.502,-3198.15"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="651.994,-3195.71 647.312,-3205.21 656.899,-3200.7 651.994,-3195.71"/>
+</g>
+<!-- 28,3 -->
+<g id="node57" class="node"><title>28,3</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-2740" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-2751.17" font-family="Times Roman,serif" font-size="10.00">28,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-2732 861,-2746 912,-2746 912,-2732 861,-2732"/>
+<text text-anchor="start" x="870.5" y="-2736.67" font-family="Times Roman,serif" font-size="10.00">ACGTC</text>
+<polygon fill="#218559" stroke="#218559" points="861,-2718 861,-2732 912,-2732 912,-2718 861,-2718"/>
+<text text-anchor="start" x="869.5" y="-2722.67" font-family="Times Roman,serif" font-size="10.00">GACGT</text>
+</g>
+<!-- 27,4->28,3 -->
+<g id="edge752" class="edge"><title>27,4->28,3</title>
+<path fill="none" stroke="#dd1e2f" d="M790.767,-2814.91C806.038,-2806.6 820,-2799 820,-2799 820,-2799 832.918,-2787.45 847.324,-2774.57"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="849.81,-2777.05 854.933,-2767.77 845.145,-2771.83 849.81,-2777.05"/>
+</g>
+<!-- 20,1 -->
+<g id="node35" class="node"><title>20,1</title>
+<ellipse fill="none" stroke="black" cx="216" cy="-4114" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="193.5" y="-4125.17" font-family="Times Roman,serif" font-size="10.00">20,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="191,-4106 191,-4120 242,-4120 242,-4106 191,-4106"/>
+<text text-anchor="start" x="201.5" y="-4110.67" font-family="Times Roman,serif" font-size="10.00">TCAAT</text>
+<polygon fill="#218559" stroke="#218559" points="191,-4092 191,-4106 242,-4106 242,-4092 191,-4092"/>
+<text text-anchor="start" x="201" y="-4096.67" font-family="Times Roman,serif" font-size="10.00">ATTGA</text>
+</g>
+<!-- 20,2 -->
+<g id="node36" class="node"><title>20,2</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-4114" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-4125.17" font-family="Times Roman,serif" font-size="10.00">20,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-4106 325,-4120 376,-4120 376,-4106 325,-4106"/>
+<text text-anchor="start" x="335" y="-4110.67" font-family="Times Roman,serif" font-size="10.00">CAATA</text>
+<polygon fill="#218559" stroke="#218559" points="325,-4092 325,-4106 376,-4106 376,-4092 325,-4092"/>
+<text text-anchor="start" x="336" y="-4096.67" font-family="Times Roman,serif" font-size="10.00">TATTG</text>
+</g>
+<!-- 20,1->20,2 -->
+<g id="edge232" class="edge"><title>20,1->20,2</title>
+<path fill="none" stroke="#dd1e2f" d="M262.867,-4107.53C272.501,-4107.25 282.748,-4107.18 292.703,-4107.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="292.864,-4110.82 302.933,-4107.52 293.007,-4103.82 292.864,-4110.82"/>
+</g>
+<!-- 20,1->19,3 -->
+<g id="edge234" class="edge"><title>20,1->19,3</title>
+<path fill="none" stroke="#06a2cb" d="M237.703,-4146.23C257.603,-4175.79 284,-4215 284,-4215 284,-4215 294,-5072 294,-5072 294,-5072 316.267,-5129.26 332.719,-5171.56"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="329.557,-5173.09 336.444,-5181.14 336.081,-5170.55 329.557,-5173.09"/>
+</g>
+<!-- 20,2->20,1 -->
+<g id="edge240" class="edge"><title>20,2->20,1</title>
+<path fill="none" stroke="#218559" d="M302.933,-4120.48C293.29,-4120.75 283.041,-4120.82 273.091,-4120.68"/>
+<polygon fill="#218559" stroke="#218559" points="272.937,-4117.18 262.867,-4120.47 272.792,-4124.18 272.937,-4117.18"/>
+</g>
+<!-- 20,3 -->
+<g id="node37" class="node"><title>20,3</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-4114" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-4125.17" font-family="Times Roman,serif" font-size="10.00">20,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-4106 459,-4120 510,-4120 510,-4106 459,-4106"/>
+<text text-anchor="start" x="469" y="-4110.67" font-family="Times Roman,serif" font-size="10.00">AATAC</text>
+<polygon fill="#218559" stroke="#218559" points="459,-4092 459,-4106 510,-4106 510,-4092 459,-4092"/>
+<text text-anchor="start" x="470.5" y="-4096.67" font-family="Times Roman,serif" font-size="10.00">GTATT</text>
+</g>
+<!-- 20,2->20,3 -->
+<g id="edge238" class="edge"><title>20,2->20,3</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-4107.53C406.501,-4107.25 416.748,-4107.18 426.703,-4107.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-4110.82 436.933,-4107.52 427.007,-4103.82 426.864,-4110.82"/>
+</g>
+<!-- 21,1 -->
+<g id="node40" class="node"><title>21,1</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-3649" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-3660.17" font-family="Times Roman,serif" font-size="10.00">21,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-3641 459,-3655 510,-3655 510,-3641 459,-3641"/>
+<text text-anchor="start" x="469" y="-3645.67" font-family="Times Roman,serif" font-size="10.00">AATAC</text>
+<polygon fill="#218559" stroke="#218559" points="459,-3627 459,-3641 510,-3641 510,-3627 459,-3627"/>
+<text text-anchor="start" x="470.5" y="-3631.67" font-family="Times Roman,serif" font-size="10.00">GTATT</text>
+</g>
+<!-- 20,2->21,1 -->
+<g id="edge236" class="edge"><title>20,2->21,1</title>
+<path fill="none" stroke="#dd1e2f" d="M364.013,-4079.55C378.18,-4044.72 398,-3996 398,-3996 398,-3996 449.463,-3788.35 472.769,-3694.32"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="476.192,-3695.05 475.201,-3684.5 469.398,-3693.37 476.192,-3695.05"/>
+</g>
+<!-- 20,3->20,2 -->
+<g id="edge246" class="edge"><title>20,3->20,2</title>
+<path fill="none" stroke="#218559" d="M436.933,-4120.48C427.29,-4120.75 417.041,-4120.82 407.091,-4120.68"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-4117.18 396.867,-4120.47 406.792,-4124.18 406.937,-4117.18"/>
+</g>
+<!-- 20,4 -->
+<g id="node38" class="node"><title>20,4</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-4114" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-4125.17" font-family="Times Roman,serif" font-size="10.00">20,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-4106 593,-4120 644,-4120 644,-4106 593,-4106"/>
+<text text-anchor="start" x="602.5" y="-4110.67" font-family="Times Roman,serif" font-size="10.00">ATACG</text>
+<polygon fill="#218559" stroke="#218559" points="593,-4092 593,-4106 644,-4106 644,-4092 593,-4092"/>
+<text text-anchor="start" x="603.5" y="-4096.67" font-family="Times Roman,serif" font-size="10.00">CGTAT</text>
+</g>
+<!-- 20,3->20,4 -->
+<g id="edge242" class="edge"><title>20,3->20,4</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-4107.53C540.501,-4107.25 550.748,-4107.18 560.703,-4107.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-4110.82 570.933,-4107.52 561.007,-4103.82 560.864,-4110.82"/>
+</g>
+<!-- 19,1 -->
+<g id="node149" class="node"><title>19,1</title>
+<ellipse fill="none" stroke="black" cx="64" cy="-5216" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="41.5" y="-5227.17" font-family="Times Roman,serif" font-size="10.00">19,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="39,-5208 39,-5222 90,-5222 90,-5208 39,-5208"/>
+<text text-anchor="start" x="50" y="-5212.67" font-family="Times Roman,serif" font-size="10.00">TATTG</text>
+<polygon fill="#218559" stroke="#218559" points="39,-5194 39,-5208 90,-5208 90,-5194 39,-5194"/>
+<text text-anchor="start" x="49" y="-5198.67" font-family="Times Roman,serif" font-size="10.00">CAATA</text>
+</g>
+<!-- 20,3->19,1 -->
+<g id="edge244" class="edge"><title>20,3->19,1</title>
+<path fill="none" stroke="#06a2cb" d="M476.816,-4149.67C456.146,-4252.3 398,-4541 398,-4541 398,-4541 294,-4677 294,-4677 294,-4677 137.748,-5043.17 82.8196,-5171.9"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="79.4431,-5170.89 78.7374,-5181.46 85.8814,-5173.64 79.4431,-5170.89"/>
+</g>
+<!-- 20,4->20,3 -->
+<g id="edge250" class="edge"><title>20,4->20,3</title>
+<path fill="none" stroke="#218559" d="M570.933,-4120.48C561.29,-4120.75 551.041,-4120.82 541.091,-4120.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-4117.18 530.867,-4120.47 540.792,-4124.18 540.937,-4117.18"/>
+</g>
+<!-- 21,3 -->
+<g id="node42" class="node"><title>21,3</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-3649" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-3660.17" font-family="Times Roman,serif" font-size="10.00">21,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-3641 727,-3655 778,-3655 778,-3641 727,-3641"/>
+<text text-anchor="start" x="737" y="-3645.67" font-family="Times Roman,serif" font-size="10.00">TACGT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-3627 727,-3641 778,-3641 778,-3627 727,-3627"/>
+<text text-anchor="start" x="736.5" y="-3631.67" font-family="Times Roman,serif" font-size="10.00">ACGTA</text>
+</g>
+<!-- 20,4->21,3 -->
+<g id="edge248" class="edge"><title>20,4->21,3</title>
+<path fill="none" stroke="#dd1e2f" d="M634.198,-4079.87C650.736,-4045.02 674,-3996 674,-3996 674,-3996 684,-3750 684,-3750 684,-3750 705.679,-3717.8 724.556,-3689.76"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727.616,-3691.48 730.297,-3681.23 721.809,-3687.58 727.616,-3691.48"/>
+</g>
+<!-- 21,1->20,2 -->
+<g id="edge664" class="edge"><title>21,1->20,2</title>
+<path fill="none" stroke="#218559" d="M475.04,-3684.44C456.575,-3757.49 416,-3918 416,-3918 416,-3918 398,-3996 398,-3996 398,-3996 381.582,-4036.36 367.95,-4069.87"/>
+<polygon fill="#218559" stroke="#218559" points="364.539,-4068.97 364.013,-4079.55 371.023,-4071.61 364.539,-4068.97"/>
+</g>
+<!-- 21,2 -->
+<g id="node41" class="node"><title>21,2</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-3649" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-3660.17" font-family="Times Roman,serif" font-size="10.00">21,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-3641 593,-3655 644,-3655 644,-3641 593,-3641"/>
+<text text-anchor="start" x="602.5" y="-3645.67" font-family="Times Roman,serif" font-size="10.00">ATACG</text>
+<polygon fill="#218559" stroke="#218559" points="593,-3627 593,-3641 644,-3641 644,-3627 593,-3627"/>
+<text text-anchor="start" x="603.5" y="-3631.67" font-family="Times Roman,serif" font-size="10.00">CGTAT</text>
+</g>
+<!-- 21,1->21,2 -->
+<g id="edge662" class="edge"><title>21,1->21,2</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-3642.53C540.501,-3642.25 550.748,-3642.18 560.703,-3642.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-3645.82 570.933,-3642.52 561.007,-3638.82 560.864,-3645.82"/>
+</g>
+<!-- 21,2->21,1 -->
+<g id="edge670" class="edge"><title>21,2->21,1</title>
+<path fill="none" stroke="#218559" d="M570.933,-3655.48C561.29,-3655.75 551.041,-3655.82 541.091,-3655.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-3652.18 530.867,-3655.47 540.792,-3659.18 540.937,-3652.18"/>
+</g>
+<!-- 21,2->21,3 -->
+<g id="edge668" class="edge"><title>21,2->21,3</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-3642.53C674.501,-3642.25 684.748,-3642.18 694.703,-3642.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-3645.82 704.933,-3642.52 695.007,-3638.82 694.864,-3645.82"/>
+</g>
+<!-- 21,2->22,1 -->
+<g id="edge666" class="edge"><title>21,2->22,1</title>
+<path fill="none" stroke="#dd1e2f" d="M601.802,-3614.87C585.264,-3580.02 562,-3531 562,-3531 562,-3531 552,-3362 552,-3362 552,-3362 526.516,-3314.03 506.749,-3276.82"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="509.649,-3274.82 501.866,-3267.63 503.467,-3278.1 509.649,-3274.82"/>
+</g>
+<!-- 21,3->20,4 -->
+<g id="edge674" class="edge"><title>21,3->20,4</title>
+<path fill="none" stroke="#218559" d="M733.956,-3682.43C713.81,-3719.76 684,-3775 684,-3775 684,-3775 674,-3996 674,-3996 674,-3996 654.563,-4036.96 638.591,-4070.61"/>
+<polygon fill="#218559" stroke="#218559" points="635.323,-4069.33 634.198,-4079.87 641.647,-4072.34 635.323,-4069.33"/>
+</g>
+<!-- 21,3->21,2 -->
+<g id="edge676" class="edge"><title>21,3->21,2</title>
+<path fill="none" stroke="#218559" d="M704.933,-3655.48C695.29,-3655.75 685.041,-3655.82 675.091,-3655.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-3652.18 664.867,-3655.47 674.792,-3659.18 674.937,-3652.18"/>
+</g>
+<!-- 21,3->21,4 -->
+<g id="edge672" class="edge"><title>21,3->21,4</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-3642.53C808.501,-3642.25 818.748,-3642.18 828.703,-3642.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-3645.82 838.933,-3642.52 829.007,-3638.82 828.864,-3645.82"/>
+</g>
+<!-- 21,4->27,4 -->
+<g id="edge680" class="edge"><title>21,4->27,4</title>
+<path fill="none" stroke="#218559" d="M869.802,-3614.87C853.264,-3580.02 830,-3531 830,-3531 830,-3531 820,-2915 820,-2915 820,-2915 802.13,-2894.24 784.712,-2874"/>
+<polygon fill="#218559" stroke="#218559" points="787.272,-2871.61 778.096,-2866.32 781.967,-2876.18 787.272,-2871.61"/>
+</g>
+<!-- 21,4->21,3 -->
+<g id="edge682" class="edge"><title>21,4->21,3</title>
+<path fill="none" stroke="#218559" d="M838.933,-3655.48C829.29,-3655.75 819.041,-3655.82 809.091,-3655.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-3652.18 798.867,-3655.47 808.792,-3659.18 808.937,-3652.18"/>
+</g>
+<!-- 22,3 -->
+<g id="node47" class="node"><title>22,3</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-3234" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-3245.17" font-family="Times Roman,serif" font-size="10.00">22,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-3226 727,-3240 778,-3240 778,-3226 727,-3226"/>
+<text text-anchor="start" x="735.5" y="-3230.67" font-family="Times Roman,serif" font-size="10.00">CGTGA</text>
+<polygon fill="#218559" stroke="#218559" points="727,-3212 727,-3226 778,-3226 778,-3212 727,-3212"/>
+<text text-anchor="start" x="736" y="-3216.67" font-family="Times Roman,serif" font-size="10.00">TCACG</text>
+</g>
+<!-- 21,4->22,3 -->
+<g id="edge678" class="edge"><title>21,4->22,3</title>
+<path fill="none" stroke="#dd1e2f" d="M869.802,-3614.87C853.264,-3580.02 830,-3531 830,-3531 830,-3531 820,-3477 820,-3477 820,-3477 784.341,-3349.57 764.668,-3279.27"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="767.953,-3278.02 761.888,-3269.33 761.212,-3279.91 767.953,-3278.02"/>
+</g>
+<!-- 22,1->27,3 -->
+<g id="edge256" class="edge"><title>22,1->27,3</title>
+<path fill="none" stroke="#218559" d="M496.988,-3198.98C513.531,-3154.37 540,-3083 540,-3083 540,-3083 550,-2962 550,-2962 550,-2962 575.335,-2915.06 595.076,-2878.48"/>
+<polygon fill="#218559" stroke="#218559" points="598.286,-2879.9 599.956,-2869.43 592.126,-2876.57 598.286,-2879.9"/>
+</g>
+<!-- 22,1->21,2 -->
+<g id="edge258" class="edge"><title>22,1->21,2</title>
+<path fill="none" stroke="#218559" d="M500.984,-3267.72C521.125,-3307.7 552,-3369 552,-3369 552,-3369 562,-3531 562,-3531 562,-3531 581.437,-3571.96 597.409,-3605.61"/>
+<polygon fill="#218559" stroke="#218559" points="594.353,-3607.34 601.802,-3614.87 600.677,-3604.33 594.353,-3607.34"/>
+</g>
+<!-- 22,1->22,2 -->
+<g id="edge252" class="edge"><title>22,1->22,2</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-3227.53C540.501,-3227.25 550.748,-3227.18 560.703,-3227.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-3230.82 570.933,-3227.52 561.007,-3223.82 560.864,-3230.82"/>
+</g>
+<!-- 22,1->28,1 -->
+<g id="edge254" class="edge"><title>22,1->28,1</title>
+<path fill="none" stroke="#218559" d="M496.988,-3198.98C513.531,-3154.37 540,-3083 540,-3083 540,-3083 562,-2788 562,-2788 562,-2788 569.283,-2781.76 578.596,-2773.78"/>
+<polygon fill="#218559" stroke="#218559" points="581.085,-2776.25 586.4,-2767.09 576.529,-2770.94 581.085,-2776.25"/>
+</g>
+<!-- 22,2->27,4 -->
+<g id="edge264" class="edge"><title>22,2->27,4</title>
+<path fill="none" stroke="#218559" d="M647.312,-3205.21C660.861,-3191.9 674,-3179 674,-3179 674,-3179 684,-2962 684,-2962 684,-2962 709.335,-2915.06 729.076,-2878.48"/>
+<polygon fill="#218559" stroke="#218559" points="732.286,-2879.9 733.956,-2869.43 726.126,-2876.57 732.286,-2879.9"/>
+</g>
+<!-- 22,2->22,1 -->
+<g id="edge266" class="edge"><title>22,2->22,1</title>
+<path fill="none" stroke="#218559" d="M570.933,-3240.48C561.29,-3240.75 551.041,-3240.82 541.091,-3240.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-3237.18 530.867,-3240.47 540.792,-3244.18 540.937,-3237.18"/>
+</g>
+<!-- 22,2->22,3 -->
+<g id="edge260" class="edge"><title>22,2->22,3</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-3227.53C674.501,-3227.25 684.748,-3227.18 694.703,-3227.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-3230.82 704.933,-3227.52 695.007,-3223.82 694.864,-3230.82"/>
+</g>
+<!-- 23,4 -->
+<g id="node53" class="node"><title>23,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-4725" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-4736.17" font-family="Times Roman,serif" font-size="10.00">23,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-4717 727,-4731 778,-4731 778,-4717 727,-4717"/>
+<text text-anchor="start" x="736" y="-4721.67" font-family="Times Roman,serif" font-size="10.00">TCACG</text>
+<polygon fill="#218559" stroke="#218559" points="727,-4703 727,-4717 778,-4717 778,-4703 727,-4703"/>
+<text text-anchor="start" x="735.5" y="-4707.67" font-family="Times Roman,serif" font-size="10.00">CGTGA</text>
+</g>
+<!-- 22,2->23,4 -->
+<g id="edge262" class="edge"><title>22,2->23,4</title>
+<path fill="none" stroke="#ebb035" d="M627.883,-3269.47C643.959,-3327.18 674,-3435 674,-3435 674,-3435 686,-3516 686,-3516 686,-3516 696,-4677 696,-4677 696,-4677 703.283,-4683.24 712.596,-4691.22"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="710.529,-4694.06 720.4,-4697.91 715.085,-4688.75 710.529,-4694.06"/>
+</g>
+<!-- 22,3->21,4 -->
+<g id="edge270" class="edge"><title>22,3->21,4</title>
+<path fill="none" stroke="#218559" d="M761.883,-3269.47C777.959,-3327.18 808,-3435 808,-3435 808,-3435 830,-3531 830,-3531 830,-3531 849.437,-3571.96 865.409,-3605.61"/>
+<polygon fill="#218559" stroke="#218559" points="862.353,-3607.34 869.802,-3614.87 868.677,-3604.33 862.353,-3607.34"/>
+</g>
+<!-- 22,3->22,2 -->
+<g id="edge272" class="edge"><title>22,3->22,2</title>
+<path fill="none" stroke="#218559" d="M704.933,-3240.48C695.29,-3240.75 685.041,-3240.82 675.091,-3240.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-3237.18 664.867,-3240.47 674.792,-3244.18 674.937,-3237.18"/>
+</g>
+<!-- 22,4 -->
+<g id="node48" class="node"><title>22,4</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-3234" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-3245.17" font-family="Times Roman,serif" font-size="10.00">22,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-3226 861,-3240 912,-3240 912,-3226 861,-3226"/>
+<text text-anchor="start" x="869.5" y="-3230.67" font-family="Times Roman,serif" font-size="10.00">GTGAA</text>
+<polygon fill="#218559" stroke="#218559" points="861,-3212 861,-3226 912,-3226 912,-3212 861,-3212"/>
+<text text-anchor="start" x="871.5" y="-3216.67" font-family="Times Roman,serif" font-size="10.00">TTCAC</text>
+</g>
+<!-- 22,3->22,4 -->
+<g id="edge268" class="edge"><title>22,3->22,4</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-3227.53C808.501,-3227.25 818.748,-3227.18 828.703,-3227.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-3230.82 838.933,-3227.52 829.007,-3223.82 828.864,-3230.82"/>
+</g>
+<!-- 22,4->22,3 -->
+<g id="edge276" class="edge"><title>22,4->22,3</title>
+<path fill="none" stroke="#218559" d="M838.933,-3240.48C829.29,-3240.75 819.041,-3240.82 809.091,-3240.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-3237.18 798.867,-3240.47 808.792,-3244.18 808.937,-3237.18"/>
+</g>
+<!-- 23,2 -->
+<g id="node51" class="node"><title>23,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-4725" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-4736.17" font-family="Times Roman,serif" font-size="10.00">23,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-4717 459,-4731 510,-4731 510,-4717 459,-4717"/>
+<text text-anchor="start" x="470" y="-4721.67" font-family="Times Roman,serif" font-size="10.00">TTTCA</text>
+<polygon fill="#218559" stroke="#218559" points="459,-4703 459,-4717 510,-4717 510,-4703 459,-4703"/>
+<text text-anchor="start" x="467.5" y="-4707.67" font-family="Times Roman,serif" font-size="10.00">TGAAA</text>
+</g>
+<!-- 22,4->23,2 -->
+<g id="edge274" class="edge"><title>22,4->23,2</title>
+<path fill="none" stroke="#ebb035" d="M876.117,-3269.47C860.041,-3327.18 830,-3435 830,-3435 830,-3435 808,-3561 808,-3561 808,-3561 696,-3601 696,-3601 696,-3601 674,-4192 674,-4192 674,-4192 562,-4415 562,-4415 562,-4415 550,-4546 550,-4546 550,-4546 540,-4677 540,-4677 540,-4677 532.717,-4683.24 523.404,-4691.22"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="520.915,-4688.75 515.6,-4697.91 525.471,-4694.06 520.915,-4688.75"/>
+</g>
+<!-- 23,1->24,3 -->
+<g id="edge690" class="edge"><title>23,1->24,3</title>
+<path fill="none" stroke="#06a2cb" d="M358.034,-4689.18C371.699,-4628.26 398,-4511 398,-4511 398,-4511 416,-4240 416,-4240 416,-4240 418,-4212 418,-4212 418,-4212 428,-4026 428,-4026 428,-4026 540,-3996 540,-3996 540,-3996 562,-3896 562,-3896 562,-3896 674,-3896 674,-3896 674,-3896 686,-4442 686,-4442 686,-4442 696,-5072 696,-5072 696,-5072 703.283,-5078.24 712.596,-5086.22"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="710.529,-5089.06 720.4,-5092.91 715.085,-5083.75 710.529,-5089.06"/>
+</g>
+<!-- 23,1->23,2 -->
+<g id="edge686" class="edge"><title>23,1->23,2</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-4718.53C406.501,-4718.25 416.748,-4718.18 426.703,-4718.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-4721.82 436.933,-4718.52 427.007,-4714.82 426.864,-4721.82"/>
+</g>
+<!-- 23,1->7,1 -->
+<g id="edge684" class="edge"><title>23,1->7,1</title>
+<path fill="none" stroke="#dd1e2f" d="M386.389,-4701.45C402.556,-4690.99 418,-4681 418,-4681 418,-4681 430.492,-4671.16 444.607,-4660.04"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="446.857,-4662.72 452.546,-4653.78 442.525,-4657.22 446.857,-4662.72"/>
+</g>
+<!-- 17,2 -->
+<g id="node140" class="node"><title>17,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-3948" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-3959.17" font-family="Times Roman,serif" font-size="10.00">17,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-3940 459,-3954 510,-3954 510,-3940 459,-3940"/>
+<text text-anchor="start" x="467" y="-3944.67" font-family="Times Roman,serif" font-size="10.00">AAACG</text>
+<polygon fill="#218559" stroke="#218559" points="459,-3926 459,-3940 510,-3940 510,-3926 459,-3926"/>
+<text text-anchor="start" x="469.5" y="-3930.67" font-family="Times Roman,serif" font-size="10.00">CGTTT</text>
+</g>
+<!-- 23,1->17,2 -->
+<g id="edge692" class="edge"><title>23,1->17,2</title>
+<path fill="none" stroke="#06a2cb" d="M358.034,-4689.18C371.699,-4628.26 398,-4511 398,-4511 398,-4511 416,-4215 416,-4215 416,-4215 418,-4187 418,-4187 418,-4187 428,-3998 428,-3998 428,-3998 435.684,-3991.14 445.359,-3982.5"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="447.923,-3984.9 453.052,-3975.63 443.261,-3979.68 447.923,-3984.9"/>
+</g>
+<!-- 23,1->19,4 -->
+<g id="edge688" class="edge"><title>23,1->19,4</title>
+<path fill="none" stroke="#ebb035" d="M356.826,-4760.7C370.078,-4829.99 398,-4976 398,-4976 398,-4976 418,-5091 418,-5091 418,-5091 428,-5168 428,-5168 428,-5168 435.283,-5174.24 444.596,-5182.22"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="442.529,-5185.06 452.4,-5188.91 447.085,-5179.75 442.529,-5185.06"/>
+</g>
+<!-- 18,1 -->
+<g id="node154" class="node"><title>18,1</title>
+<ellipse fill="none" stroke="black" cx="216" cy="-4463" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="193.5" y="-4474.17" font-family="Times Roman,serif" font-size="10.00">18,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="191,-4455 191,-4469 242,-4469 242,-4455 191,-4455"/>
+<text text-anchor="start" x="201.5" y="-4459.67" font-family="Times Roman,serif" font-size="10.00">CGTTT</text>
+<polygon fill="#218559" stroke="#218559" points="191,-4441 191,-4455 242,-4455 242,-4441 191,-4441"/>
+<text text-anchor="start" x="199" y="-4445.67" font-family="Times Roman,serif" font-size="10.00">AAACG</text>
+</g>
+<!-- 23,1->18,1 -->
+<g id="edge694" class="edge"><title>23,1->18,1</title>
+<path fill="none" stroke="#218559" d="M333.345,-4690.93C313.793,-4650.94 284,-4590 284,-4590 284,-4590 258.516,-4542.4 238.749,-4505.49"/>
+<polygon fill="#218559" stroke="#218559" points="241.672,-4503.53 233.866,-4496.37 235.501,-4506.84 241.672,-4503.53"/>
+</g>
+<!-- 23,2->22,4 -->
+<g id="edge698" class="edge"><title>23,2->22,4</title>
+<path fill="none" stroke="#ebb035" d="M515.6,-4697.91C528.303,-4687.03 540,-4677 540,-4677 540,-4677 562,-4192 562,-4192 562,-4192 674,-4162 674,-4162 674,-4162 684,-3796 684,-3796 684,-3796 696,-3561 696,-3561 696,-3561 808,-3531 808,-3531 808,-3531 830,-3435 830,-3435 830,-3435 856.845,-3338.65 873.362,-3279.36"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="876.805,-3280.04 876.117,-3269.47 870.062,-3278.17 876.805,-3280.04"/>
+</g>
+<!-- 23,2->23,1 -->
+<g id="edge702" class="edge"><title>23,2->23,1</title>
+<path fill="none" stroke="#218559" d="M436.933,-4731.48C427.29,-4731.75 417.041,-4731.82 407.091,-4731.68"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-4728.18 396.867,-4731.47 406.792,-4735.18 406.937,-4728.18"/>
+</g>
+<!-- 23,2->23,3 -->
+<g id="edge696" class="edge"><title>23,2->23,3</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-4718.53C540.501,-4718.25 550.748,-4718.18 560.703,-4718.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-4721.82 570.933,-4718.52 561.007,-4714.82 560.864,-4721.82"/>
+</g>
+<!-- 23,2->17,1 -->
+<g id="edge700" class="edge"><title>23,2->17,1</title>
+<path fill="none" stroke="#06a2cb" d="M452.4,-4697.91C439.697,-4687.03 428,-4677 428,-4677 428,-4677 418,-3825 418,-3825 418,-3825 416,-3825 416,-3825 416,-3825 392.048,-3869.64 373.003,-3905.13"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="369.679,-3903.92 368.035,-3914.39 375.847,-3907.23 369.679,-3903.92"/>
+</g>
+<!-- 23,3->24,1 -->
+<g id="edge706" class="edge"><title>23,3->24,1</title>
+<path fill="none" stroke="#06a2cb" d="M609.73,-4760.63C591.608,-4838.72 550,-5018 550,-5018 550,-5018 540,-5072 540,-5072 540,-5072 532.717,-5078.24 523.404,-5086.22"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="520.915,-5083.75 515.6,-5092.91 525.471,-5089.06 520.915,-5083.75"/>
+</g>
+<!-- 23,3->23,2 -->
+<g id="edge708" class="edge"><title>23,3->23,2</title>
+<path fill="none" stroke="#218559" d="M570.933,-4731.48C561.29,-4731.75 551.041,-4731.82 541.091,-4731.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-4728.18 530.867,-4731.47 540.792,-4735.18 540.937,-4728.18"/>
+</g>
+<!-- 23,3->23,4 -->
+<g id="edge704" class="edge"><title>23,3->23,4</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-4718.53C674.501,-4718.25 684.748,-4718.18 694.703,-4718.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-4721.82 704.933,-4718.52 695.007,-4714.82 694.864,-4721.82"/>
+</g>
+<!-- 23,4->22,2 -->
+<g id="edge710" class="edge"><title>23,4->22,2</title>
+<path fill="none" stroke="#ebb035" d="M720.4,-4697.91C707.697,-4687.03 696,-4677 696,-4677 696,-4677 686,-3501 686,-3501 686,-3501 674,-3435 674,-3435 674,-3435 647.155,-3338.65 630.638,-3279.36"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="633.938,-3278.17 627.883,-3269.47 627.195,-3280.04 633.938,-3278.17"/>
+</g>
+<!-- 23,4->23,3 -->
+<g id="edge712" class="edge"><title>23,4->23,3</title>
+<path fill="none" stroke="#218559" d="M704.933,-4731.48C695.29,-4731.75 685.041,-4731.82 675.091,-4731.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-4728.18 664.867,-4731.47 674.792,-4735.18 674.937,-4728.18"/>
+</g>
+<!-- 28,1->27,2 -->
+<g id="edge336" class="edge"><title>28,1->27,2</title>
+<path fill="none" stroke="#218559" d="M583.317,-2764.85C566.127,-2777.16 545.205,-2792.15 527.005,-2805.19"/>
+<polygon fill="#218559" stroke="#218559" points="524.948,-2802.36 518.857,-2811.03 529.025,-2808.05 524.948,-2802.36"/>
+</g>
+<!-- 28,1->22,1 -->
+<g id="edge332" class="edge"><title>28,1->22,1</title>
+<path fill="none" stroke="#dd1e2f" d="M586.4,-2767.09C573.697,-2777.97 562,-2788 562,-2788 562,-2788 550,-2923 550,-2923 550,-2923 540,-3083 540,-3083 540,-3083 517.026,-3144.95 500.508,-3189.49"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="497.184,-3188.38 496.988,-3198.98 503.747,-3190.82 497.184,-3188.38"/>
+</g>
+<!-- 28,2 -->
+<g id="node56" class="node"><title>28,2</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-2740" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-2751.17" font-family="Times Roman,serif" font-size="10.00">28,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-2732 727,-2746 778,-2746 778,-2732 727,-2732"/>
+<text text-anchor="start" x="737" y="-2736.67" font-family="Times Roman,serif" font-size="10.00">TACGT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-2718 727,-2732 778,-2732 778,-2718 727,-2718"/>
+<text text-anchor="start" x="736.5" y="-2722.67" font-family="Times Roman,serif" font-size="10.00">ACGTA</text>
+</g>
+<!-- 28,1->28,2 -->
+<g id="edge334" class="edge"><title>28,1->28,2</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-2733.53C674.501,-2733.25 684.748,-2733.18 694.703,-2733.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-2736.82 704.933,-2733.52 695.007,-2729.82 694.864,-2736.82"/>
+</g>
+<!-- 28,2->28,1 -->
+<g id="edge342" class="edge"><title>28,2->28,1</title>
+<path fill="none" stroke="#218559" d="M704.933,-2746.48C695.29,-2746.75 685.041,-2746.82 675.091,-2746.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-2743.18 664.867,-2746.47 674.792,-2750.18 674.937,-2743.18"/>
+</g>
+<!-- 28,2->28,3 -->
+<g id="edge338" class="edge"><title>28,2->28,3</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-2733.53C808.501,-2733.25 818.748,-2733.18 828.703,-2733.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-2736.82 838.933,-2733.52 829.007,-2729.82 828.864,-2736.82"/>
+</g>
+<!-- 29,4 -->
+<g id="node63" class="node"><title>29,4</title>
+<ellipse fill="none" stroke="black" cx="1154" cy="-2306" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="1131.5" y="-2317.17" font-family="Times Roman,serif" font-size="10.00">29,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1129,-2298 1129,-2312 1180,-2312 1180,-2298 1129,-2298"/>
+<text text-anchor="start" x="1137.5" y="-2302.67" font-family="Times Roman,serif" font-size="10.00">GACGT</text>
+<polygon fill="#218559" stroke="#218559" points="1129,-2284 1129,-2298 1180,-2298 1180,-2284 1129,-2284"/>
+<text text-anchor="start" x="1138.5" y="-2288.67" font-family="Times Roman,serif" font-size="10.00">ACGTC</text>
+</g>
+<!-- 28,2->29,4 -->
+<g id="edge340" class="edge"><title>28,2->29,4</title>
+<path fill="none" stroke="#ebb035" d="M790.086,-2718.03C809.886,-2706.6 830,-2695 830,-2695 830,-2695 1076,-2610 1076,-2610 1076,-2610 1106,-2450 1106,-2450 1106,-2450 1124.98,-2393.07 1139.07,-2350.8"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1142.42,-2351.82 1142.26,-2341.23 1135.78,-2349.61 1142.42,-2351.82"/>
+</g>
+<!-- 28,3->27,4 -->
+<g id="edge346" class="edge"><title>28,3->27,4</title>
+<path fill="none" stroke="#218559" d="M851.317,-2764.85C834.127,-2777.16 813.205,-2792.15 795.005,-2805.19"/>
+<polygon fill="#218559" stroke="#218559" points="792.948,-2802.36 786.857,-2811.03 797.025,-2808.05 792.948,-2802.36"/>
+</g>
+<!-- 28,3->28,2 -->
+<g id="edge348" class="edge"><title>28,3->28,2</title>
+<path fill="none" stroke="#218559" d="M838.933,-2746.48C829.29,-2746.75 819.041,-2746.82 809.091,-2746.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-2743.18 798.867,-2746.47 808.792,-2750.18 808.937,-2743.18"/>
+</g>
+<!-- 28,4 -->
+<g id="node58" class="node"><title>28,4</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-2740" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="997.5" y="-2751.17" font-family="Times Roman,serif" font-size="10.00">28,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="995,-2732 995,-2746 1046,-2746 1046,-2732 995,-2732"/>
+<text text-anchor="start" x="1004" y="-2736.67" font-family="Times Roman,serif" font-size="10.00">CGTCA</text>
+<polygon fill="#218559" stroke="#218559" points="995,-2718 995,-2732 1046,-2732 1046,-2718 995,-2718"/>
+<text text-anchor="start" x="1003.5" y="-2722.67" font-family="Times Roman,serif" font-size="10.00">TGACG</text>
+</g>
+<!-- 28,3->28,4 -->
+<g id="edge344" class="edge"><title>28,3->28,4</title>
+<path fill="none" stroke="#dd1e2f" d="M932.867,-2733.53C942.501,-2733.25 952.748,-2733.18 962.703,-2733.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="962.864,-2736.82 972.933,-2733.52 963.007,-2729.82 962.864,-2736.82"/>
+</g>
+<!-- 28,4->28,3 -->
+<g id="edge354" class="edge"><title>28,4->28,3</title>
+<path fill="none" stroke="#218559" d="M972.933,-2746.48C963.29,-2746.75 953.041,-2746.82 943.091,-2746.68"/>
+<polygon fill="#218559" stroke="#218559" points="942.937,-2743.18 932.867,-2746.47 942.792,-2750.18 942.937,-2743.18"/>
+</g>
+<!-- 29,2 -->
+<g id="node61" class="node"><title>29,2</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-2306" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-2317.17" font-family="Times Roman,serif" font-size="10.00">29,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-2298 861,-2312 912,-2312 912,-2298 861,-2298"/>
+<text text-anchor="start" x="870" y="-2302.67" font-family="Times Roman,serif" font-size="10.00">ATGAC</text>
+<polygon fill="#218559" stroke="#218559" points="861,-2284 861,-2298 912,-2298 912,-2284 861,-2284"/>
+<text text-anchor="start" x="871.5" y="-2288.67" font-family="Times Roman,serif" font-size="10.00">GTCAT</text>
+</g>
+<!-- 28,4->29,2 -->
+<g id="edge352" class="edge"><title>28,4->29,2</title>
+<path fill="none" stroke="#ebb035" d="M1009.73,-2704.42C993.562,-2648.41 964,-2546 964,-2546 964,-2546 954,-2407 954,-2407 954,-2407 942,-2354 942,-2354 942,-2354 934.717,-2347.76 925.404,-2339.78"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="927.471,-2336.94 917.6,-2333.09 922.915,-2342.25 927.471,-2336.94"/>
+</g>
+<!-- 31,3 -->
+<g id="node161" class="node"><title>31,3</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-1732" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-1743.17" font-family="Times Roman,serif" font-size="10.00">31,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-1724 861,-1738 912,-1738 912,-1724 861,-1724"/>
+<text text-anchor="start" x="871.5" y="-1728.67" font-family="Times Roman,serif" font-size="10.00">GTCAT</text>
+<polygon fill="#218559" stroke="#218559" points="861,-1710 861,-1724 912,-1724 912,-1710 861,-1710"/>
+<text text-anchor="start" x="870" y="-1714.67" font-family="Times Roman,serif" font-size="10.00">ATGAC</text>
+</g>
+<!-- 28,4->31,3 -->
+<g id="edge350" class="edge"><title>28,4->31,3</title>
+<path fill="none" stroke="#dd1e2f" d="M1009.73,-2704.42C993.562,-2648.41 964,-2546 964,-2546 964,-2546 954,-1346 954,-1346 954,-1346 952,-1346 952,-1346 952,-1346 942,-1684 942,-1684 942,-1684 934.717,-1690.24 925.404,-1698.22"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="922.915,-1695.75 917.6,-1704.91 927.471,-1701.06 922.915,-1695.75"/>
+</g>
+<!-- 29,1 -->
+<g id="node60" class="node"><title>29,1</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-2306" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-2317.17" font-family="Times Roman,serif" font-size="10.00">29,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-2298 727,-2312 778,-2312 778,-2298 727,-2298"/>
+<text text-anchor="start" x="736" y="-2302.67" font-family="Times Roman,serif" font-size="10.00">CATGA</text>
+<polygon fill="#218559" stroke="#218559" points="727,-2284 727,-2298 778,-2298 778,-2284 727,-2284"/>
+<text text-anchor="start" x="737" y="-2288.67" font-family="Times Roman,serif" font-size="10.00">TCATG</text>
+</g>
+<!-- 29,1->29,2 -->
+<g id="edge760" class="edge"><title>29,1->29,2</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-2299.53C808.501,-2299.25 818.748,-2299.18 828.703,-2299.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-2302.82 838.933,-2299.52 829.007,-2295.82 828.864,-2302.82"/>
+</g>
+<!-- 30,2 -->
+<g id="node165" class="node"><title>30,2</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-2140" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-2151.17" font-family="Times Roman,serif" font-size="10.00">30,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-2132 861,-2146 912,-2146 912,-2132 861,-2132"/>
+<text text-anchor="start" x="871" y="-2136.67" font-family="Times Roman,serif" font-size="10.00">TCATG</text>
+<polygon fill="#218559" stroke="#218559" points="861,-2118 861,-2132 912,-2132 912,-2118 861,-2118"/>
+<text text-anchor="start" x="870" y="-2122.67" font-family="Times Roman,serif" font-size="10.00">CATGA</text>
+</g>
+<!-- 29,1->30,2 -->
+<g id="edge764" class="edge"><title>29,1->30,2</title>
+<path fill="none" stroke="#218559" d="M780.444,-2277.14C799.078,-2258.23 820,-2237 820,-2237 820,-2237 840.341,-2207.11 858.438,-2180.51"/>
+<polygon fill="#218559" stroke="#218559" points="861.474,-2182.27 864.206,-2172.03 855.687,-2178.33 861.474,-2182.27"/>
+</g>
+<!-- 30,3 -->
+<g id="node166" class="node"><title>30,3</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-2140" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="997.5" y="-2151.17" font-family="Times Roman,serif" font-size="10.00">30,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="995,-2132 995,-2146 1046,-2146 1046,-2132 995,-2132"/>
+<text text-anchor="start" x="1004" y="-2136.67" font-family="Times Roman,serif" font-size="10.00">CATGA</text>
+<polygon fill="#218559" stroke="#218559" points="995,-2118 995,-2132 1046,-2132 1046,-2118 995,-2118"/>
+<text text-anchor="start" x="1005" y="-2122.67" font-family="Times Roman,serif" font-size="10.00">TCATG</text>
+</g>
+<!-- 29,1->30,3 -->
+<g id="edge762" class="edge"><title>29,1->30,3</title>
+<path fill="none" stroke="#06a2cb" d="M768.198,-2271.87C784.736,-2237.02 808,-2188 808,-2188 808,-2188 820,-2139 820,-2139 820,-2139 830,-2095 830,-2095 830,-2095 942,-2095 942,-2095 942,-2095 956.586,-2103.41 973.143,-2112.97"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="971.503,-2116.06 981.914,-2118.03 975.001,-2110 971.503,-2116.06"/>
+</g>
+<!-- 29,2->28,4 -->
+<g id="edge770" class="edge"><title>29,2->28,4</title>
+<path fill="none" stroke="#ebb035" d="M917.6,-2333.09C930.303,-2343.97 942,-2354 942,-2354 942,-2354 964,-2546 964,-2546 964,-2546 990.308,-2637.14 1006.86,-2694.48"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1003.59,-2695.78 1009.73,-2704.42 1010.32,-2693.84 1003.59,-2695.78"/>
+</g>
+<!-- 29,2->29,1 -->
+<g id="edge772" class="edge"><title>29,2->29,1</title>
+<path fill="none" stroke="#218559" d="M838.933,-2312.48C829.29,-2312.75 819.041,-2312.82 809.091,-2312.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-2309.18 798.867,-2312.47 808.792,-2316.18 808.937,-2309.18"/>
+</g>
+<!-- 29,3 -->
+<g id="node62" class="node"><title>29,3</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-2306" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="997.5" y="-2317.17" font-family="Times Roman,serif" font-size="10.00">29,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="995,-2298 995,-2312 1046,-2312 1046,-2298 995,-2298"/>
+<text text-anchor="start" x="1003.5" y="-2302.67" font-family="Times Roman,serif" font-size="10.00">TGACG</text>
+<polygon fill="#218559" stroke="#218559" points="995,-2284 995,-2298 1046,-2298 1046,-2284 995,-2284"/>
+<text text-anchor="start" x="1004" y="-2288.67" font-family="Times Roman,serif" font-size="10.00">CGTCA</text>
+</g>
+<!-- 29,2->29,3 -->
+<g id="edge766" class="edge"><title>29,2->29,3</title>
+<path fill="none" stroke="#dd1e2f" d="M932.867,-2299.53C942.501,-2299.25 952.748,-2299.18 962.703,-2299.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="962.864,-2302.82 972.933,-2299.52 963.007,-2295.82 962.864,-2302.82"/>
+</g>
+<!-- 32,4 -->
+<g id="node197" class="node"><title>32,4</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-1974" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="997.5" y="-1985.17" font-family="Times Roman,serif" font-size="10.00">32,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="995,-1966 995,-1980 1046,-1980 1046,-1966 995,-1966"/>
+<text text-anchor="start" x="1004" y="-1970.67" font-family="Times Roman,serif" font-size="10.00">CGTCA</text>
+<polygon fill="#218559" stroke="#218559" points="995,-1952 995,-1966 1046,-1966 1046,-1952 995,-1952"/>
+<text text-anchor="start" x="1003.5" y="-1956.67" font-family="Times Roman,serif" font-size="10.00">TGACG</text>
+</g>
+<!-- 29,2->32,4 -->
+<g id="edge768" class="edge"><title>29,2->32,4</title>
+<path fill="none" stroke="#ebb035" d="M902.198,-2271.87C918.736,-2237.02 942,-2188 942,-2188 942,-2188 954,-2154 954,-2154 954,-2154 964,-2092 964,-2092 964,-2092 983.437,-2051.04 999.409,-2017.39"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1002.68,-2018.67 1003.8,-2008.13 996.353,-2015.66 1002.68,-2018.67"/>
+</g>
+<!-- 29,3->29,2 -->
+<g id="edge780" class="edge"><title>29,3->29,2</title>
+<path fill="none" stroke="#218559" d="M972.933,-2312.48C963.29,-2312.75 953.041,-2312.82 943.091,-2312.68"/>
+<polygon fill="#218559" stroke="#218559" points="942.937,-2309.18 932.867,-2312.47 942.792,-2316.18 942.937,-2309.18"/>
+</g>
+<!-- 29,3->29,4 -->
+<g id="edge774" class="edge"><title>29,3->29,4</title>
+<path fill="none" stroke="#dd1e2f" d="M1066.87,-2299.53C1076.5,-2299.25 1086.75,-2299.18 1096.7,-2299.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1096.86,-2302.82 1106.93,-2299.52 1097.01,-2295.82 1096.86,-2302.82"/>
+</g>
+<!-- 30,1 -->
+<g id="node164" class="node"><title>30,1</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-2140" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-2151.17" font-family="Times Roman,serif" font-size="10.00">30,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-2132 727,-2146 778,-2146 778,-2132 727,-2132"/>
+<text text-anchor="start" x="737.5" y="-2136.67" font-family="Times Roman,serif" font-size="10.00">GTCAT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-2118 727,-2132 778,-2132 778,-2118 727,-2118"/>
+<text text-anchor="start" x="736" y="-2122.67" font-family="Times Roman,serif" font-size="10.00">ATGAC</text>
+</g>
+<!-- 29,3->30,1 -->
+<g id="edge776" class="edge"><title>29,3->30,1</title>
+<path fill="none" stroke="#06a2cb" d="M1003.8,-2271.87C987.264,-2237.02 964,-2188 964,-2188 964,-2188 942,-1926 942,-1926 942,-1926 830,-1926 830,-1926 830,-1926 808,-2090 808,-2090 808,-2090 800.316,-2096.86 790.641,-2105.5"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="788.077,-2103.1 782.948,-2112.37 792.739,-2108.32 788.077,-2103.1"/>
+</g>
+<!-- 30,4 -->
+<g id="node167" class="node"><title>30,4</title>
+<ellipse fill="none" stroke="black" cx="1154" cy="-2140" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="1131.5" y="-2151.17" font-family="Times Roman,serif" font-size="10.00">30,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1129,-2132 1129,-2146 1180,-2146 1180,-2132 1129,-2132"/>
+<text text-anchor="start" x="1138" y="-2136.67" font-family="Times Roman,serif" font-size="10.00">ATGAC</text>
+<polygon fill="#218559" stroke="#218559" points="1129,-2118 1129,-2132 1180,-2132 1180,-2118 1129,-2118"/>
+<text text-anchor="start" x="1139.5" y="-2122.67" font-family="Times Roman,serif" font-size="10.00">GTCAT</text>
+</g>
+<!-- 29,3->30,4 -->
+<g id="edge778" class="edge"><title>29,3->30,4</title>
+<path fill="none" stroke="#218559" d="M1048.44,-2277.14C1067.08,-2258.23 1088,-2237 1088,-2237 1088,-2237 1108.34,-2207.11 1126.44,-2180.51"/>
+<polygon fill="#218559" stroke="#218559" points="1129.47,-2182.27 1132.21,-2172.03 1123.69,-2178.33 1129.47,-2182.27"/>
+</g>
+<!-- 29,4->28,2 -->
+<g id="edge782" class="edge"><title>29,4->28,2</title>
+<path fill="none" stroke="#ebb035" d="M1142.26,-2341.23C1128.05,-2383.86 1106,-2450 1106,-2450 1106,-2450 1076,-2546 1076,-2546 1076,-2546 954,-2614 954,-2614 954,-2614 830,-2653 830,-2653 830,-2653 808,-2678 808,-2678 808,-2678 797.753,-2689.34 786.026,-2702.33"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="783.121,-2700.32 779.016,-2710.09 788.316,-2705.01 783.121,-2700.32"/>
+</g>
+<!-- 29,4->29,3 -->
+<g id="edge784" class="edge"><title>29,4->29,3</title>
+<path fill="none" stroke="#218559" d="M1106.93,-2312.48C1097.29,-2312.75 1087.04,-2312.82 1077.09,-2312.68"/>
+<polygon fill="#218559" stroke="#218559" points="1076.94,-2309.18 1066.87,-2312.47 1076.79,-2316.18 1076.94,-2309.18"/>
+</g>
+<!-- 1,1 -->
+<g id="node65" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="64" cy="-5415" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="44.5" y="-5426.17" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="42,-5407 42,-5421 87,-5421 87,-5407 42,-5407"/>
+<text text-anchor="start" x="49" y="-5411.67" font-family="Times Roman,serif" font-size="10.00">TAGTG</text>
+<polygon fill="#218559" stroke="#218559" points="42,-5393 42,-5407 87,-5407 87,-5393 42,-5393"/>
+<text text-anchor="start" x="48.5" y="-5397.67" font-family="Times Roman,serif" font-size="10.00">CACTA</text>
+</g>
+<!-- 1,2 -->
+<g id="node66" class="node"><title>1,2</title>
+<ellipse fill="none" stroke="black" cx="216" cy="-5415" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="196.5" y="-5426.17" font-family="Times Roman,serif" font-size="10.00">1,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="194,-5407 194,-5421 239,-5421 239,-5407 194,-5407"/>
+<text text-anchor="start" x="199.5" y="-5411.67" font-family="Times Roman,serif" font-size="10.00">AGTGC</text>
+<polygon fill="#218559" stroke="#218559" points="194,-5393 194,-5407 239,-5407 239,-5393 194,-5393"/>
+<text text-anchor="start" x="200" y="-5397.67" font-family="Times Roman,serif" font-size="10.00">GCACT</text>
+</g>
+<!-- 1,1->1,2 -->
+<g id="edge454" class="edge"><title>1,1->1,2</title>
+<path fill="none" stroke="#dd1e2f" d="M106.745,-5408.86C124.378,-5408.17 144.986,-5408.07 163.523,-5408.55"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="163.467,-5412.05 173.576,-5408.87 163.695,-5405.05 163.467,-5412.05"/>
+</g>
+<!-- 1,2->1,1 -->
+<g id="edge460" class="edge"><title>1,2->1,1</title>
+<path fill="none" stroke="#218559" d="M173.576,-5421.13C155.979,-5421.82 135.377,-5421.94 116.815,-5421.46"/>
+<polygon fill="#218559" stroke="#218559" points="116.852,-5417.96 106.745,-5421.14 116.627,-5424.96 116.852,-5417.96"/>
+</g>
+<!-- 1,3 -->
+<g id="node67" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-5415" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="330.5" y="-5426.17" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="328,-5407 328,-5421 373,-5421 373,-5407 328,-5407"/>
+<text text-anchor="start" x="333" y="-5411.67" font-family="Times Roman,serif" font-size="10.00">GTGCG</text>
+<polygon fill="#218559" stroke="#218559" points="328,-5393 328,-5407 373,-5407 373,-5393 328,-5393"/>
+<text text-anchor="start" x="333" y="-5397.67" font-family="Times Roman,serif" font-size="10.00">CGCAC</text>
+</g>
+<!-- 1,2->1,3 -->
+<g id="edge456" class="edge"><title>1,2->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M258.398,-5408.67C270.741,-5408.23 284.377,-5408.14 297.273,-5408.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="297.326,-5411.89 307.417,-5408.67 297.517,-5404.89 297.326,-5411.89"/>
+</g>
+<!-- 2,4 -->
+<g id="node77" class="node"><title>2,4</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-5319" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="598.5" y="-5330.17" font-family="Times Roman,serif" font-size="10.00">2,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="596,-5311 596,-5325 641,-5325 641,-5311 596,-5311"/>
+<text text-anchor="start" x="601" y="-5315.67" font-family="Times Roman,serif" font-size="10.00">CGCAC</text>
+<polygon fill="#218559" stroke="#218559" points="596,-5297 596,-5311 641,-5311 641,-5297 596,-5297"/>
+<text text-anchor="start" x="601" y="-5301.67" font-family="Times Roman,serif" font-size="10.00">GTGCG</text>
+</g>
+<!-- 1,2->2,4 -->
+<g id="edge458" class="edge"><title>1,2->2,4</title>
+<path fill="none" stroke="#ebb035" d="M251.804,-5435.66C272.309,-5447.49 294,-5460 294,-5460 294,-5460 540,-5460 540,-5460 540,-5460 571.999,-5402.16 595.006,-5360.57"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="598.131,-5362.15 599.909,-5351.7 592.006,-5358.76 598.131,-5362.15"/>
+</g>
+<!-- 1,3->1,2 -->
+<g id="edge464" class="edge"><title>1,3->1,2</title>
+<path fill="none" stroke="#218559" d="M307.417,-5421.33C295.062,-5421.77 281.424,-5421.86 268.535,-5421.61"/>
+<polygon fill="#218559" stroke="#218559" points="268.491,-5418.11 258.398,-5421.33 268.298,-5425.1 268.491,-5418.11"/>
+</g>
+<!-- 1,4 -->
+<g id="node68" class="node"><title>1,4</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-5415" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="464.5" y="-5426.17" font-family="Times Roman,serif" font-size="10.00">1,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="462,-5407 462,-5421 507,-5421 507,-5407 462,-5407"/>
+<text text-anchor="start" x="467.5" y="-5411.67" font-family="Times Roman,serif" font-size="10.00">TGCGA</text>
+<polygon fill="#218559" stroke="#218559" points="462,-5393 462,-5407 507,-5407 507,-5393 462,-5393"/>
+<text text-anchor="start" x="468" y="-5397.67" font-family="Times Roman,serif" font-size="10.00">TCGCA</text>
+</g>
+<!-- 1,3->1,4 -->
+<g id="edge462" class="edge"><title>1,3->1,4</title>
+<path fill="none" stroke="#dd1e2f" d="M392.398,-5408.67C404.741,-5408.23 418.377,-5408.14 431.273,-5408.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="431.326,-5411.89 441.417,-5408.67 431.517,-5404.89 431.326,-5411.89"/>
+</g>
+<!-- 1,4->1,3 -->
+<g id="edge468" class="edge"><title>1,4->1,3</title>
+<path fill="none" stroke="#218559" d="M441.417,-5421.33C429.062,-5421.77 415.424,-5421.86 402.535,-5421.61"/>
+<polygon fill="#218559" stroke="#218559" points="402.491,-5418.11 392.398,-5421.33 402.298,-5425.1 402.491,-5418.11"/>
+</g>
+<!-- 2,2 -->
+<g id="node75" class="node"><title>2,2</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-5319" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="330.5" y="-5330.17" font-family="Times Roman,serif" font-size="10.00">2,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="328,-5311 328,-5325 373,-5325 373,-5311 328,-5311"/>
+<text text-anchor="start" x="334" y="-5315.67" font-family="Times Roman,serif" font-size="10.00">CTCGC</text>
+<polygon fill="#218559" stroke="#218559" points="328,-5297 328,-5311 373,-5311 373,-5297 328,-5297"/>
+<text text-anchor="start" x="332.5" y="-5301.67" font-family="Times Roman,serif" font-size="10.00">GCGAG</text>
+</g>
+<!-- 1,4->2,2 -->
+<g id="edge466" class="edge"><title>1,4->2,2</title>
+<path fill="none" stroke="#ebb035" d="M448.805,-5394.3C432.212,-5384.54 416,-5375 416,-5375 416,-5375 402.69,-5363.71 388.025,-5351.26"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="390.185,-5348.51 380.296,-5344.71 385.656,-5353.84 390.185,-5348.51"/>
+</g>
+<!-- 3,1 -->
+<g id="node70" class="node"><title>3,1</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-52" rx="43.8406" ry="36.0624"/>
+<text text-anchor="start" x="732" y="-63.1667" font-family="Times Roman,serif" font-size="10.00">3,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="729,-44 729,-58 775,-58 775,-44 729,-44"/>
+<text text-anchor="start" x="731.5" y="-48.6667" font-family="Times Roman,serif" font-size="10.00">GCTAGG</text>
+<polygon fill="#218559" stroke="#218559" points="729,-30 729,-44 775,-44 775,-30 729,-30"/>
+<text text-anchor="start" x="732.5" y="-34.6667" font-family="Times Roman,serif" font-size="10.00">CCTAGC</text>
+</g>
+<!-- 3,3 -->
+<g id="node71" class="node"><title>3,3</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-52" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="866.5" y="-63.1667" font-family="Times Roman,serif" font-size="10.00">3,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="864,-44 864,-58 909,-58 909,-44 864,-44"/>
+<text text-anchor="start" x="869.5" y="-48.6667" font-family="Times Roman,serif" font-size="10.00">TAGGG</text>
+<polygon fill="#218559" stroke="#218559" points="864,-30 864,-44 909,-44 909,-30 864,-30"/>
+<text text-anchor="start" x="870.5" y="-34.6667" font-family="Times Roman,serif" font-size="10.00">CCCTA</text>
+</g>
+<!-- 3,1->3,3 -->
+<g id="edge470" class="edge"><title>3,1->3,3</title>
+<path fill="none" stroke="#dd1e2f" d="M795.506,-45.6333C807.566,-45.224 820.789,-45.1425 833.312,-45.3888"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="833.533,-48.8961 843.626,-45.6728 833.726,-41.8988 833.533,-48.8961"/>
+</g>
+<!-- 3,3->3,1 -->
+<g id="edge474" class="edge"><title>3,3->3,1</title>
+<path fill="none" stroke="#218559" d="M843.626,-58.3272C831.641,-58.7571 818.434,-58.8591 805.869,-58.6334"/>
+<polygon fill="#218559" stroke="#218559" points="805.593,-55.1253 795.506,-58.3667 805.413,-62.1229 805.593,-55.1253"/>
+</g>
+<!-- 3,4 -->
+<g id="node72" class="node"><title>3,4</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-52" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="1000.5" y="-63.1667" font-family="Times Roman,serif" font-size="10.00">3,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="998,-44 998,-58 1043,-58 1043,-44 998,-44"/>
+<text text-anchor="start" x="1003.5" y="-48.6667" font-family="Times Roman,serif" font-size="10.00">AGGGT</text>
+<polygon fill="#218559" stroke="#218559" points="998,-30 998,-44 1043,-44 1043,-30 998,-30"/>
+<text text-anchor="start" x="1004" y="-34.6667" font-family="Times Roman,serif" font-size="10.00">ACCCT</text>
+</g>
+<!-- 3,3->3,4 -->
+<g id="edge472" class="edge"><title>3,3->3,4</title>
+<path fill="none" stroke="#dd1e2f" d="M928.398,-45.6719C940.741,-45.2298 954.377,-45.1351 967.273,-45.388"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="967.326,-48.8907 977.417,-45.6653 967.517,-41.8933 967.326,-48.8907"/>
+</g>
+<!-- 3,4->3,3 -->
+<g id="edge478" class="edge"><title>3,4->3,3</title>
+<path fill="none" stroke="#218559" d="M977.417,-58.3347C965.062,-58.7734 951.424,-58.8645 938.535,-58.6082"/>
+<polygon fill="#218559" stroke="#218559" points="938.491,-55.1057 928.398,-58.3281 938.298,-62.1031 938.491,-55.1057"/>
+</g>
+<!-- 4,3 -->
+<g id="node86" class="node"><title>4,3</title>
+<ellipse fill="none" stroke="black" cx="1154" cy="-1444" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="1134.5" y="-1455.17" font-family="Times Roman,serif" font-size="10.00">4,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1132,-1436 1132,-1450 1177,-1450 1177,-1436 1132,-1436"/>
+<text text-anchor="start" x="1138" y="-1440.67" font-family="Times Roman,serif" font-size="10.00">GGGTT</text>
+<polygon fill="#218559" stroke="#218559" points="1132,-1422 1132,-1436 1177,-1436 1177,-1422 1132,-1422"/>
+<text text-anchor="start" x="1137.5" y="-1426.67" font-family="Times Roman,serif" font-size="10.00">AACCC</text>
+</g>
+<!-- 3,4->4,3 -->
+<g id="edge476" class="edge"><title>3,4->4,3</title>
+<path fill="none" stroke="#dd1e2f" d="M1023.59,-87.8824C1035.95,-211.48 1076,-612 1076,-612 1076,-612 1106,-1122 1106,-1122 1106,-1122 1133.86,-1308.88 1147.13,-1397.9"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1143.7,-1398.63 1148.63,-1408 1150.62,-1397.59 1143.7,-1398.63"/>
+</g>
+<!-- 2,1 -->
+<g id="node74" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="216" cy="-5319" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="196.5" y="-5330.17" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="194,-5311 194,-5325 239,-5325 239,-5311 194,-5311"/>
+<text text-anchor="start" x="200" y="-5315.67" font-family="Times Roman,serif" font-size="10.00">CCTCG</text>
+<polygon fill="#218559" stroke="#218559" points="194,-5297 194,-5311 239,-5311 239,-5297 194,-5297"/>
+<text text-anchor="start" x="198.5" y="-5301.67" font-family="Times Roman,serif" font-size="10.00">CGAGG</text>
+</g>
+<!-- 2,1->2,2 -->
+<g id="edge48" class="edge"><title>2,1->2,2</title>
+<path fill="none" stroke="#dd1e2f" d="M258.398,-5312.67C270.741,-5312.23 284.377,-5312.14 297.273,-5312.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="297.326,-5315.89 307.417,-5312.67 297.517,-5308.89 297.326,-5315.89"/>
+</g>
+<!-- 2,2->1,4 -->
+<g id="edge52" class="edge"><title>2,2->1,4</title>
+<path fill="none" stroke="#ebb035" d="M382.78,-5342.48C400.868,-5355.44 423.505,-5371.66 442.782,-5385.47"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="440.926,-5388.45 451.094,-5391.43 445.003,-5382.76 440.926,-5388.45"/>
+</g>
+<!-- 2,2->2,1 -->
+<g id="edge54" class="edge"><title>2,2->2,1</title>
+<path fill="none" stroke="#218559" d="M307.417,-5325.33C295.062,-5325.77 281.424,-5325.86 268.535,-5325.61"/>
+<polygon fill="#218559" stroke="#218559" points="268.491,-5322.11 258.398,-5325.33 268.298,-5329.1 268.491,-5322.11"/>
+</g>
+<!-- 2,3 -->
+<g id="node76" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-5319" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="464.5" y="-5330.17" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="462,-5311 462,-5325 507,-5325 507,-5311 462,-5311"/>
+<text text-anchor="start" x="468" y="-5315.67" font-family="Times Roman,serif" font-size="10.00">TCGCA</text>
+<polygon fill="#218559" stroke="#218559" points="462,-5297 462,-5311 507,-5311 507,-5297 462,-5297"/>
+<text text-anchor="start" x="467.5" y="-5301.67" font-family="Times Roman,serif" font-size="10.00">TGCGA</text>
+</g>
+<!-- 2,2->2,3 -->
+<g id="edge50" class="edge"><title>2,2->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M392.398,-5312.67C404.741,-5312.23 418.377,-5312.14 431.273,-5312.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="431.326,-5315.89 441.417,-5312.67 431.517,-5308.89 431.326,-5315.89"/>
+</g>
+<!-- 2,3->2,2 -->
+<g id="edge58" class="edge"><title>2,3->2,2</title>
+<path fill="none" stroke="#218559" d="M441.417,-5325.33C429.062,-5325.77 415.424,-5325.86 402.535,-5325.61"/>
+<polygon fill="#218559" stroke="#218559" points="402.491,-5322.11 392.398,-5325.33 402.298,-5329.1 402.491,-5322.11"/>
+</g>
+<!-- 2,3->2,4 -->
+<g id="edge56" class="edge"><title>2,3->2,4</title>
+<path fill="none" stroke="#dd1e2f" d="M526.398,-5312.67C538.741,-5312.23 552.377,-5312.14 565.273,-5312.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="565.326,-5315.89 575.417,-5312.67 565.517,-5308.89 565.326,-5315.89"/>
+</g>
+<!-- 2,4->1,2 -->
+<g id="edge60" class="edge"><title>2,4->1,2</title>
+<path fill="none" stroke="#ebb035" d="M604.961,-5353.38C585.726,-5404.09 552,-5493 552,-5493 552,-5493 282,-5493 282,-5493 282,-5493 264.575,-5472.41 247.632,-5452.38"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="250.083,-5449.86 240.952,-5444.49 244.739,-5454.38 250.083,-5449.86"/>
+</g>
+<!-- 2,4->2,3 -->
+<g id="edge62" class="edge"><title>2,4->2,3</title>
+<path fill="none" stroke="#218559" d="M575.417,-5325.33C563.062,-5325.77 549.424,-5325.86 536.535,-5325.61"/>
+<polygon fill="#218559" stroke="#218559" points="536.491,-5322.11 526.398,-5325.33 536.298,-5329.1 536.491,-5322.11"/>
+</g>
+<!-- 5,1 -->
+<g id="node79" class="node"><title>5,1</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-2498" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="732.5" y="-2509.17" font-family="Times Roman,serif" font-size="10.00">5,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="730,-2490 730,-2504 775,-2504 775,-2490 730,-2490"/>
+<text text-anchor="start" x="735" y="-2494.67" font-family="Times Roman,serif" font-size="10.00">AGCAA</text>
+<polygon fill="#218559" stroke="#218559" points="730,-2476 730,-2490 775,-2490 775,-2476 730,-2476"/>
+<text text-anchor="start" x="737.5" y="-2480.67" font-family="Times Roman,serif" font-size="10.00">TTGCT</text>
+</g>
+<!-- 5,2 -->
+<g id="node80" class="node"><title>5,2</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-2498" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="866.5" y="-2509.17" font-family="Times Roman,serif" font-size="10.00">5,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="864,-2490 864,-2504 909,-2504 909,-2490 864,-2490"/>
+<text text-anchor="start" x="869" y="-2494.67" font-family="Times Roman,serif" font-size="10.00">GCAAC</text>
+<polygon fill="#218559" stroke="#218559" points="864,-2476 864,-2490 909,-2490 909,-2476 864,-2476"/>
+<text text-anchor="start" x="870.5" y="-2480.67" font-family="Times Roman,serif" font-size="10.00">GTTGC</text>
+</g>
+<!-- 5,1->5,2 -->
+<g id="edge480" class="edge"><title>5,1->5,2</title>
+<path fill="none" stroke="#dd1e2f" d="M794.398,-2491.67C806.741,-2491.23 820.377,-2491.14 833.273,-2491.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="833.326,-2494.89 843.417,-2491.67 833.517,-2487.89 833.326,-2494.89"/>
+</g>
+<!-- 6,3 -->
+<g id="node96" class="node"><title>6,3</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-3131" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="866.5" y="-3142.17" font-family="Times Roman,serif" font-size="10.00">6,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="864,-3123 864,-3137 909,-3137 909,-3123 864,-3123"/>
+<text text-anchor="start" x="870" y="-3127.67" font-family="Times Roman,serif" font-size="10.00">TGCTG</text>
+<polygon fill="#218559" stroke="#218559" points="864,-3109 864,-3123 909,-3123 909,-3109 864,-3109"/>
+<text text-anchor="start" x="869" y="-3113.67" font-family="Times Roman,serif" font-size="10.00">CAGCA</text>
+</g>
+<!-- 5,1->6,3 -->
+<g id="edge482" class="edge"><title>5,1->6,3</title>
+<path fill="none" stroke="#06a2cb" d="M762.141,-2533.13C778.279,-2589.04 808,-2692 808,-2692 808,-2692 820,-2810 820,-2810 820,-2810 830,-3083 830,-3083 830,-3083 837.944,-3089.81 847.849,-3098.3"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="845.835,-3101.18 855.705,-3105.03 850.391,-3095.87 845.835,-3101.18"/>
+</g>
+<!-- 5,2->5,1 -->
+<g id="edge488" class="edge"><title>5,2->5,1</title>
+<path fill="none" stroke="#218559" d="M843.417,-2504.33C831.062,-2504.77 817.424,-2504.86 804.535,-2504.61"/>
+<polygon fill="#218559" stroke="#218559" points="804.491,-2501.11 794.398,-2504.33 804.298,-2508.1 804.491,-2501.11"/>
+</g>
+<!-- 5,3 -->
+<g id="node81" class="node"><title>5,3</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-2498" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="1000.5" y="-2509.17" font-family="Times Roman,serif" font-size="10.00">5,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="998,-2490 998,-2504 1043,-2504 1043,-2490 998,-2490"/>
+<text text-anchor="start" x="1003.5" y="-2494.67" font-family="Times Roman,serif" font-size="10.00">CAACC</text>
+<polygon fill="#218559" stroke="#218559" points="998,-2476 998,-2490 1043,-2490 1043,-2476 998,-2476"/>
+<text text-anchor="start" x="1004" y="-2480.67" font-family="Times Roman,serif" font-size="10.00">GGTTG</text>
+</g>
+<!-- 5,2->5,3 -->
+<g id="edge484" class="edge"><title>5,2->5,3</title>
+<path fill="none" stroke="#dd1e2f" d="M928.398,-2491.67C940.741,-2491.23 954.377,-2491.14 967.273,-2491.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="967.326,-2494.89 977.417,-2491.67 967.517,-2487.89 967.326,-2494.89"/>
+</g>
+<!-- 4,4 -->
+<g id="node87" class="node"><title>4,4</title>
+<ellipse fill="none" stroke="black" cx="1288" cy="-1444" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="1268.5" y="-1455.17" font-family="Times Roman,serif" font-size="10.00">4,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1266,-1436 1266,-1450 1311,-1450 1311,-1436 1266,-1436"/>
+<text text-anchor="start" x="1272" y="-1440.67" font-family="Times Roman,serif" font-size="10.00">GGTTG</text>
+<polygon fill="#218559" stroke="#218559" points="1266,-1422 1266,-1436 1311,-1436 1311,-1422 1266,-1422"/>
+<text text-anchor="start" x="1271.5" y="-1426.67" font-family="Times Roman,serif" font-size="10.00">CAACC</text>
+</g>
+<!-- 5,2->4,4 -->
+<g id="edge486" class="edge"><title>5,2->4,4</title>
+<path fill="none" stroke="#ebb035" d="M916.295,-2472.03C929.489,-2460.72 942,-2450 942,-2450 942,-2450 954,-2353 954,-2353 954,-2353 964,-2258 964,-2258 964,-2258 1210,-2211 1210,-2211 1210,-2211 1222,-2096 1222,-2096 1222,-2096 1268.27,-1638.87 1283.33,-1490.15"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1286.83,-1490.28 1284.36,-1479.98 1279.87,-1489.58 1286.83,-1490.28"/>
+</g>
+<!-- 5,3->5,2 -->
+<g id="edge494" class="edge"><title>5,3->5,2</title>
+<path fill="none" stroke="#218559" d="M977.417,-2504.33C965.062,-2504.77 951.424,-2504.86 938.535,-2504.61"/>
+<polygon fill="#218559" stroke="#218559" points="938.491,-2501.11 928.398,-2504.33 938.298,-2508.1 938.491,-2501.11"/>
+</g>
+<!-- 5,4 -->
+<g id="node82" class="node"><title>5,4</title>
+<ellipse fill="none" stroke="black" cx="1154" cy="-2498" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="1134.5" y="-2509.17" font-family="Times Roman,serif" font-size="10.00">5,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1132,-2490 1132,-2504 1177,-2504 1177,-2490 1132,-2490"/>
+<text text-anchor="start" x="1137.5" y="-2494.67" font-family="Times Roman,serif" font-size="10.00">AACCC</text>
+<polygon fill="#218559" stroke="#218559" points="1132,-2476 1132,-2490 1177,-2490 1177,-2476 1132,-2476"/>
+<text text-anchor="start" x="1138" y="-2480.67" font-family="Times Roman,serif" font-size="10.00">GGGTT</text>
+</g>
+<!-- 5,3->5,4 -->
+<g id="edge490" class="edge"><title>5,3->5,4</title>
+<path fill="none" stroke="#dd1e2f" d="M1062.4,-2491.67C1074.74,-2491.23 1088.38,-2491.14 1101.27,-2491.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1101.33,-2494.89 1111.42,-2491.67 1101.52,-2487.89 1101.33,-2494.89"/>
+</g>
+<!-- 6,1 -->
+<g id="node94" class="node"><title>6,1</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-3131" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="598.5" y="-3142.17" font-family="Times Roman,serif" font-size="10.00">6,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="596,-3123 596,-3137 641,-3137 641,-3123 596,-3123"/>
+<text text-anchor="start" x="602.5" y="-3127.67" font-family="Times Roman,serif" font-size="10.00">GTTGC</text>
+<polygon fill="#218559" stroke="#218559" points="596,-3109 596,-3123 641,-3123 641,-3109 596,-3109"/>
+<text text-anchor="start" x="601" y="-3113.67" font-family="Times Roman,serif" font-size="10.00">GCAAC</text>
+</g>
+<!-- 5,3->6,1 -->
+<g id="edge492" class="edge"><title>5,3->6,1</title>
+<path fill="none" stroke="#06a2cb" d="M988.4,-2522.83C975.697,-2532.81 964,-2542 964,-2542 964,-2542 942,-2577 942,-2577 942,-2577 830,-2615 830,-2615 830,-2615 808,-2636 808,-2636 808,-2636 696,-2692 696,-2692 696,-2692 686,-2881 686,-2881 686,-2881 674,-3083 674,-3083 674,-3083 666.056,-3089.81 656.151,-3098.3"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="653.609,-3095.87 648.295,-3105.03 658.165,-3101.18 653.609,-3095.87"/>
+</g>
+<!-- 5,4->5,3 -->
+<g id="edge498" class="edge"><title>5,4->5,3</title>
+<path fill="none" stroke="#218559" d="M1111.42,-2504.33C1099.06,-2504.77 1085.42,-2504.86 1072.54,-2504.61"/>
+<polygon fill="#218559" stroke="#218559" points="1072.49,-2501.11 1062.4,-2504.33 1072.3,-2508.1 1072.49,-2501.11"/>
+</g>
+<!-- 4,2 -->
+<g id="node85" class="node"><title>4,2</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-1444" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="1000.5" y="-1455.17" font-family="Times Roman,serif" font-size="10.00">4,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="998,-1436 998,-1450 1043,-1450 1043,-1436 998,-1436"/>
+<text text-anchor="start" x="1003.5" y="-1440.67" font-family="Times Roman,serif" font-size="10.00">AGGGT</text>
+<polygon fill="#218559" stroke="#218559" points="998,-1422 998,-1436 1043,-1436 1043,-1422 998,-1422"/>
+<text text-anchor="start" x="1004" y="-1426.67" font-family="Times Roman,serif" font-size="10.00">ACCCT</text>
+</g>
+<!-- 5,4->4,2 -->
+<g id="edge496" class="edge"><title>5,4->4,2</title>
+<path fill="none" stroke="#ebb035" d="M1142.38,-2463.14C1128.17,-2420.52 1106,-2354 1106,-2354 1106,-2354 1086,-1913 1086,-1913 1086,-1913 1076,-1684 1076,-1684 1076,-1684 1046.78,-1558.79 1030.56,-1489.25"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1033.94,-1488.36 1028.26,-1479.41 1027.13,-1489.95 1033.94,-1488.36"/>
+</g>
+<!-- 4,1 -->
+<g id="node84" class="node"><title>4,1</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-1444" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="866.5" y="-1455.17" font-family="Times Roman,serif" font-size="10.00">4,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="864,-1436 864,-1450 909,-1450 909,-1436 864,-1436"/>
+<text text-anchor="start" x="868" y="-1440.67" font-family="Times Roman,serif" font-size="10.00">GAGGG</text>
+<polygon fill="#218559" stroke="#218559" points="864,-1422 864,-1436 909,-1436 909,-1422 864,-1422"/>
+<text text-anchor="start" x="870.5" y="-1426.67" font-family="Times Roman,serif" font-size="10.00">CCCTC</text>
+</g>
+<!-- 4,1->4,2 -->
+<g id="edge64" class="edge"><title>4,1->4,2</title>
+<path fill="none" stroke="#dd1e2f" d="M928.398,-1437.67C940.741,-1437.23 954.377,-1437.14 967.273,-1437.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="967.326,-1440.89 977.417,-1437.67 967.517,-1433.89 967.326,-1440.89"/>
+</g>
+<!-- 4,2->5,4 -->
+<g id="edge68" class="edge"><title>4,2->5,4</title>
+<path fill="none" stroke="#ebb035" d="M1028.26,-1479.41C1043.84,-1546.17 1076,-1684 1076,-1684 1076,-1684 1088,-1915 1088,-1915 1088,-1915 1106,-2354 1106,-2354 1106,-2354 1125.09,-2411.26 1139.19,-2453.56"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1135.9,-2454.76 1142.38,-2463.14 1142.54,-2452.55 1135.9,-2454.76"/>
+</g>
+<!-- 4,2->4,1 -->
+<g id="edge70" class="edge"><title>4,2->4,1</title>
+<path fill="none" stroke="#218559" d="M977.417,-1450.33C965.062,-1450.77 951.424,-1450.86 938.535,-1450.61"/>
+<polygon fill="#218559" stroke="#218559" points="938.491,-1447.11 928.398,-1450.33 938.298,-1454.1 938.491,-1447.11"/>
+</g>
+<!-- 4,2->4,3 -->
+<g id="edge66" class="edge"><title>4,2->4,3</title>
+<path fill="none" stroke="#dd1e2f" d="M1062.4,-1437.67C1074.74,-1437.23 1088.38,-1437.14 1101.27,-1437.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1101.33,-1440.89 1111.42,-1437.67 1101.52,-1433.89 1101.33,-1440.89"/>
+</g>
+<!-- 4,3->3,4 -->
+<g id="edge74" class="edge"><title>4,3->3,4</title>
+<path fill="none" stroke="#218559" d="M1148.63,-1408C1136.11,-1323.98 1106,-1122 1106,-1122 1106,-1122 1088,-453 1088,-453 1088,-453 1045.65,-203.273 1027.79,-97.9643"/>
+<polygon fill="#218559" stroke="#218559" points="1031.22,-97.213 1026.09,-87.939 1024.32,-98.3834 1031.22,-97.213"/>
+</g>
+<!-- 4,3->4,2 -->
+<g id="edge76" class="edge"><title>4,3->4,2</title>
+<path fill="none" stroke="#218559" d="M1111.42,-1450.33C1099.06,-1450.77 1085.42,-1450.86 1072.54,-1450.61"/>
+<polygon fill="#218559" stroke="#218559" points="1072.49,-1447.11 1062.4,-1450.33 1072.3,-1454.1 1072.49,-1447.11"/>
+</g>
+<!-- 4,3->4,4 -->
+<g id="edge72" class="edge"><title>4,3->4,4</title>
+<path fill="none" stroke="#dd1e2f" d="M1196.4,-1437.67C1208.74,-1437.23 1222.38,-1437.14 1235.27,-1437.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1235.33,-1440.89 1245.42,-1437.67 1235.52,-1433.89 1235.33,-1440.89"/>
+</g>
+<!-- 4,4->5,2 -->
+<g id="edge78" class="edge"><title>4,4->5,2</title>
+<path fill="none" stroke="#ebb035" d="M1284.24,-1479.88C1268.66,-1628.5 1210,-2188 1210,-2188 1210,-2188 964,-2228 964,-2228 964,-2228 942,-2450 942,-2450 942,-2450 934.056,-2456.81 924.151,-2465.3"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="921.609,-2462.87 916.295,-2472.03 926.165,-2468.18 921.609,-2462.87"/>
+</g>
+<!-- 4,4->4,3 -->
+<g id="edge80" class="edge"><title>4,4->4,3</title>
+<path fill="none" stroke="#218559" d="M1245.42,-1450.33C1233.06,-1450.77 1219.42,-1450.86 1206.54,-1450.61"/>
+<polygon fill="#218559" stroke="#218559" points="1206.49,-1447.11 1196.4,-1450.33 1206.3,-1454.1 1206.49,-1447.11"/>
+</g>
+<!-- 7,1->24,2 -->
+<g id="edge504" class="edge"><title>7,1->24,2</title>
+<path fill="none" stroke="#06a2cb" d="M514.295,-4654.97C527.489,-4666.28 540,-4677 540,-4677 540,-4677 562,-5072 562,-5072 562,-5072 569.283,-5078.24 578.596,-5086.22"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="576.529,-5089.06 586.4,-5092.91 581.085,-5083.75 576.529,-5089.06"/>
+</g>
+<!-- 7,1->23,1 -->
+<g id="edge508" class="edge"><title>7,1->23,1</title>
+<path fill="none" stroke="#218559" d="M451.161,-4652.53C433.641,-4665.08 411.867,-4680.68 393.012,-4694.19"/>
+<polygon fill="#218559" stroke="#218559" points="390.948,-4691.36 384.857,-4700.03 395.025,-4697.05 390.948,-4691.36"/>
+</g>
+<!-- 7,1->7,2 -->
+<g id="edge500" class="edge"><title>7,1->7,2</title>
+<path fill="none" stroke="#dd1e2f" d="M526.398,-4622.67C538.741,-4622.23 552.377,-4622.14 565.273,-4622.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="565.326,-4625.89 575.417,-4622.67 565.517,-4618.89 565.326,-4625.89"/>
+</g>
+<!-- 8,3 -->
+<g id="node106" class="node"><title>8,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-5024" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="598.5" y="-5035.17" font-family="Times Roman,serif" font-size="10.00">8,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="596,-5016 596,-5030 641,-5030 641,-5016 596,-5016"/>
+<text text-anchor="start" x="602" y="-5020.67" font-family="Times Roman,serif" font-size="10.00">GAAAT</text>
+<polygon fill="#218559" stroke="#218559" points="596,-5002 596,-5016 641,-5016 641,-5002 596,-5002"/>
+<text text-anchor="start" x="604.5" y="-5006.67" font-family="Times Roman,serif" font-size="10.00">ATTTC</text>
+</g>
+<!-- 7,1->8,3 -->
+<g id="edge502" class="edge"><title>7,1->8,3</title>
+<path fill="none" stroke="#06a2cb" d="M513.64,-4602.54C527.072,-4590.54 540,-4579 540,-4579 540,-4579 550,-4340 550,-4340 550,-4340 552,-4340 552,-4340 552,-4340 562,-4773 562,-4773 562,-4773 591.791,-4906.53 607.915,-4978.8"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="604.502,-4979.57 610.095,-4988.57 611.334,-4978.05 604.502,-4979.57"/>
+</g>
+<!-- 18,2 -->
+<g id="node155" class="node"><title>18,2</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-4463" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-4474.17" font-family="Times Roman,serif" font-size="10.00">18,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-4455 325,-4469 376,-4469 376,-4455 325,-4455"/>
+<text text-anchor="start" x="336" y="-4459.67" font-family="Times Roman,serif" font-size="10.00">GTTTC</text>
+<polygon fill="#218559" stroke="#218559" points="325,-4441 325,-4455 376,-4455 376,-4441 325,-4441"/>
+<text text-anchor="start" x="333" y="-4445.67" font-family="Times Roman,serif" font-size="10.00">GAAAC</text>
+</g>
+<!-- 7,1->18,2 -->
+<g id="edge506" class="edge"><title>7,1->18,2</title>
+<path fill="none" stroke="#218559" d="M468.102,-4595.5C451.546,-4560.62 428,-4511 428,-4511 428,-4511 418,-4336 418,-4336 418,-4336 416,-4336 416,-4336 416,-4336 391.411,-4383.32 372.25,-4420.19"/>
+<polygon fill="#218559" stroke="#218559" points="369.019,-4418.81 367.514,-4429.3 375.231,-4422.04 369.019,-4418.81"/>
+</g>
+<!-- 7,2->24,1 -->
+<g id="edge514" class="edge"><title>7,2->24,1</title>
+<path fill="none" stroke="#06a2cb" d="M587.705,-4654.97C574.511,-4666.28 562,-4677 562,-4677 562,-4677 552,-4873 552,-4873 552,-4873 550,-4901 550,-4901 550,-4901 540,-5072 540,-5072 540,-5072 532.717,-5078.24 523.404,-5086.22"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="520.915,-5083.75 515.6,-5092.91 525.471,-5089.06 520.915,-5083.75"/>
+</g>
+<!-- 7,2->7,1 -->
+<g id="edge516" class="edge"><title>7,2->7,1</title>
+<path fill="none" stroke="#218559" d="M575.417,-4635.33C563.062,-4635.77 549.424,-4635.86 536.535,-4635.61"/>
+<polygon fill="#218559" stroke="#218559" points="536.491,-4632.11 526.398,-4635.33 536.298,-4639.1 536.491,-4632.11"/>
+</g>
+<!-- 7,3 -->
+<g id="node91" class="node"><title>7,3</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-4629" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="732.5" y="-4640.17" font-family="Times Roman,serif" font-size="10.00">7,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="730,-4621 730,-4635 775,-4635 775,-4621 730,-4621"/>
+<text text-anchor="start" x="736" y="-4625.67" font-family="Times Roman,serif" font-size="10.00">TCAGC</text>
+<polygon fill="#218559" stroke="#218559" points="730,-4607 730,-4621 775,-4621 775,-4607 730,-4607"/>
+<text text-anchor="start" x="735.5" y="-4611.67" font-family="Times Roman,serif" font-size="10.00">GCTGA</text>
+</g>
+<!-- 7,2->7,3 -->
+<g id="edge510" class="edge"><title>7,2->7,3</title>
+<path fill="none" stroke="#dd1e2f" d="M660.398,-4622.67C672.741,-4622.23 686.377,-4622.14 699.273,-4622.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="699.326,-4625.89 709.417,-4622.67 699.517,-4618.89 699.326,-4625.89"/>
+</g>
+<!-- 6,4 -->
+<g id="node97" class="node"><title>6,4</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-3131" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="1000.5" y="-3142.17" font-family="Times Roman,serif" font-size="10.00">6,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="998,-3123 998,-3137 1043,-3137 1043,-3123 998,-3123"/>
+<text text-anchor="start" x="1003.5" y="-3127.67" font-family="Times Roman,serif" font-size="10.00">GCTGA</text>
+<polygon fill="#218559" stroke="#218559" points="998,-3109 998,-3123 1043,-3123 1043,-3109 998,-3109"/>
+<text text-anchor="start" x="1004" y="-3113.67" font-family="Times Roman,serif" font-size="10.00">TCAGC</text>
+</g>
+<!-- 7,2->6,4 -->
+<g id="edge512" class="edge"><title>7,2->6,4</title>
+<path fill="none" stroke="#ebb035" d="M653.804,-4608.34C674.309,-4596.51 696,-4584 696,-4584 696,-4584 808,-4541 808,-4541 808,-4541 820,-4518 820,-4518 820,-4518 942,-4081 942,-4081 942,-4081 1000.55,-3367.83 1016.2,-3177.32"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1019.71,-3177.33 1017.04,-3167.08 1012.73,-3176.76 1019.71,-3177.33"/>
+</g>
+<!-- 7,3->7,2 -->
+<g id="edge522" class="edge"><title>7,3->7,2</title>
+<path fill="none" stroke="#218559" d="M709.417,-4635.33C697.062,-4635.77 683.424,-4635.86 670.535,-4635.61"/>
+<polygon fill="#218559" stroke="#218559" points="670.491,-4632.11 660.398,-4635.33 670.298,-4639.1 670.491,-4632.11"/>
+</g>
+<!-- 7,4 -->
+<g id="node92" class="node"><title>7,4</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-4629" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="866.5" y="-4640.17" font-family="Times Roman,serif" font-size="10.00">7,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="864,-4621 864,-4635 909,-4635 909,-4621 864,-4621"/>
+<text text-anchor="start" x="869" y="-4625.67" font-family="Times Roman,serif" font-size="10.00">CAGCA</text>
+<polygon fill="#218559" stroke="#218559" points="864,-4607 864,-4621 909,-4621 909,-4607 864,-4607"/>
+<text text-anchor="start" x="870" y="-4611.67" font-family="Times Roman,serif" font-size="10.00">TGCTG</text>
+</g>
+<!-- 7,3->7,4 -->
+<g id="edge518" class="edge"><title>7,3->7,4</title>
+<path fill="none" stroke="#dd1e2f" d="M794.398,-4622.67C806.741,-4622.23 820.377,-4622.14 833.273,-4622.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="833.326,-4625.89 843.417,-4622.67 833.517,-4618.89 833.326,-4625.89"/>
+</g>
+<!-- 7,3->8,1 -->
+<g id="edge520" class="edge"><title>7,3->8,1</title>
+<path fill="none" stroke="#06a2cb" d="M739.294,-4594.14C717.192,-4533.5 674,-4415 674,-4415 674,-4415 552,-4311 552,-4311 552,-4311 550,-4311 550,-4311 550,-4311 540,-4545 540,-4545 540,-4545 428,-4584 428,-4584 428,-4584 398,-4773 398,-4773 398,-4773 372.635,-4905.64 358.782,-4978.08"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="355.267,-4977.82 356.826,-4988.3 362.142,-4979.14 355.267,-4977.82"/>
+</g>
+<!-- 7,4->7,3 -->
+<g id="edge526" class="edge"><title>7,4->7,3</title>
+<path fill="none" stroke="#218559" d="M843.417,-4635.33C831.062,-4635.77 817.424,-4635.86 804.535,-4635.61"/>
+<polygon fill="#218559" stroke="#218559" points="804.491,-4632.11 794.398,-4635.33 804.298,-4639.1 804.491,-4632.11"/>
+</g>
+<!-- 6,2 -->
+<g id="node95" class="node"><title>6,2</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-3131" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="732.5" y="-3142.17" font-family="Times Roman,serif" font-size="10.00">6,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="730,-3123 730,-3137 775,-3137 775,-3123 730,-3123"/>
+<text text-anchor="start" x="737.5" y="-3127.67" font-family="Times Roman,serif" font-size="10.00">TTGCT</text>
+<polygon fill="#218559" stroke="#218559" points="730,-3109 730,-3123 775,-3123 775,-3109 730,-3109"/>
+<text text-anchor="start" x="735" y="-3113.67" font-family="Times Roman,serif" font-size="10.00">AGCAA</text>
+</g>
+<!-- 7,4->6,2 -->
+<g id="edge524" class="edge"><title>7,4->6,2</title>
+<path fill="none" stroke="#ebb035" d="M883.839,-4593.04C873.548,-4421.76 830,-3697 830,-3697 830,-3697 808,-3186 808,-3186 808,-3186 798.399,-3176.57 787.148,-3165.52"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="789.587,-3163.01 780,-3158.5 784.682,-3168 789.587,-3163.01"/>
+</g>
+<!-- 6,1->5,3 -->
+<g id="edge84" class="edge"><title>6,1->5,3</title>
+<path fill="none" stroke="#06a2cb" d="M648.295,-3105.03C661.489,-3093.72 674,-3083 674,-3083 674,-3083 696,-2650 696,-2650 696,-2650 818,-2587 818,-2587 818,-2587 942,-2543 942,-2543 942,-2543 957.73,-2533.93 975.061,-2523.93"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="977.283,-2526.69 984.196,-2518.66 973.785,-2520.62 977.283,-2526.69"/>
+</g>
+<!-- 6,1->6,2 -->
+<g id="edge82" class="edge"><title>6,1->6,2</title>
+<path fill="none" stroke="#dd1e2f" d="M660.398,-3124.67C672.741,-3124.23 686.377,-3124.14 699.273,-3124.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="699.326,-3127.89 709.417,-3124.67 699.517,-3120.89 699.326,-3127.89"/>
+</g>
+<!-- 6,2->7,4 -->
+<g id="edge88" class="edge"><title>6,2->7,4</title>
+<path fill="none" stroke="#ebb035" d="M790.372,-3147.93C805.796,-3154.73 820,-3161 820,-3161 820,-3161 830,-3697 830,-3697 830,-3697 871.947,-4395.11 883.232,-4582.94"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="879.745,-4583.26 883.839,-4593.04 886.733,-4582.84 879.745,-4583.26"/>
+</g>
+<!-- 6,2->6,1 -->
+<g id="edge90" class="edge"><title>6,2->6,1</title>
+<path fill="none" stroke="#218559" d="M709.417,-3137.33C697.062,-3137.77 683.424,-3137.86 670.535,-3137.61"/>
+<polygon fill="#218559" stroke="#218559" points="670.491,-3134.11 660.398,-3137.33 670.298,-3141.1 670.491,-3134.11"/>
+</g>
+<!-- 6,2->6,3 -->
+<g id="edge86" class="edge"><title>6,2->6,3</title>
+<path fill="none" stroke="#dd1e2f" d="M794.398,-3124.67C806.741,-3124.23 820.377,-3124.14 833.273,-3124.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="833.326,-3127.89 843.417,-3124.67 833.517,-3120.89 833.326,-3127.89"/>
+</g>
+<!-- 6,3->5,1 -->
+<g id="edge94" class="edge"><title>6,3->5,1</title>
+<path fill="none" stroke="#06a2cb" d="M855.705,-3105.03C842.511,-3093.72 830,-3083 830,-3083 830,-3083 820,-2834 820,-2834 820,-2834 808,-2692 808,-2692 808,-2692 781.55,-2600.37 765.006,-2543.06"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="768.277,-2541.77 762.141,-2533.13 761.552,-2543.71 768.277,-2541.77"/>
+</g>
+<!-- 6,3->6,2 -->
+<g id="edge96" class="edge"><title>6,3->6,2</title>
+<path fill="none" stroke="#218559" d="M843.417,-3137.33C831.062,-3137.77 817.424,-3137.86 804.535,-3137.61"/>
+<polygon fill="#218559" stroke="#218559" points="804.491,-3134.11 794.398,-3137.33 804.298,-3141.1 804.491,-3134.11"/>
+</g>
+<!-- 6,3->6,4 -->
+<g id="edge92" class="edge"><title>6,3->6,4</title>
+<path fill="none" stroke="#dd1e2f" d="M928.398,-3124.67C940.741,-3124.23 954.377,-3124.14 967.273,-3124.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="967.326,-3127.89 977.417,-3124.67 967.517,-3120.89 967.326,-3127.89"/>
+</g>
+<!-- 6,4->7,2 -->
+<g id="edge98" class="edge"><title>6,4->7,2</title>
+<path fill="none" stroke="#ebb035" d="M1015.06,-3166.84C997.916,-3291.25 942,-3697 942,-3697 942,-3697 820,-4503 820,-4503 820,-4503 684,-4551 684,-4551 684,-4551 674,-4579 674,-4579 674,-4579 665.629,-4586.47 655.36,-4595.64"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="652.768,-4593.26 647.64,-4602.54 657.43,-4598.49 652.768,-4593.26"/>
+</g>
+<!-- 6,4->6,3 -->
+<g id="edge100" class="edge"><title>6,4->6,3</title>
+<path fill="none" stroke="#218559" d="M977.417,-3137.33C965.062,-3137.77 951.424,-3137.86 938.535,-3137.61"/>
+<polygon fill="#218559" stroke="#218559" points="938.491,-3134.11 928.398,-3137.33 938.298,-3141.1 938.491,-3134.11"/>
+</g>
+<!-- 9,1 -->
+<g id="node99" class="node"><title>9,1</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-3483" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="464.5" y="-3494.17" font-family="Times Roman,serif" font-size="10.00">9,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="462,-3475 462,-3489 507,-3489 507,-3475 462,-3475"/>
+<text text-anchor="start" x="468" y="-3479.67" font-family="Times Roman,serif" font-size="10.00">CAGAT</text>
+<polygon fill="#218559" stroke="#218559" points="462,-3461 462,-3475 507,-3475 507,-3461 462,-3461"/>
+<text text-anchor="start" x="469" y="-3465.67" font-family="Times Roman,serif" font-size="10.00">ATCTG</text>
+</g>
+<!-- 9,2 -->
+<g id="node100" class="node"><title>9,2</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-3483" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="598.5" y="-3494.17" font-family="Times Roman,serif" font-size="10.00">9,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="596,-3475 596,-3489 641,-3489 641,-3475 596,-3475"/>
+<text text-anchor="start" x="603" y="-3479.67" font-family="Times Roman,serif" font-size="10.00">AGATT</text>
+<polygon fill="#218559" stroke="#218559" points="596,-3461 596,-3475 641,-3475 641,-3461 596,-3461"/>
+<text text-anchor="start" x="603.5" y="-3465.67" font-family="Times Roman,serif" font-size="10.00">AATCT</text>
+</g>
+<!-- 9,1->9,2 -->
+<g id="edge528" class="edge"><title>9,1->9,2</title>
+<path fill="none" stroke="#dd1e2f" d="M526.398,-3476.67C538.741,-3476.23 552.377,-3476.14 565.273,-3476.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="565.326,-3479.89 575.417,-3476.67 565.517,-3472.89 565.326,-3479.89"/>
+</g>
+<!-- 10,2 -->
+<g id="node115" class="node"><title>10,2</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-3035" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-3046.17" font-family="Times Roman,serif" font-size="10.00">10,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-3027 593,-3041 644,-3041 644,-3027 593,-3027"/>
+<text text-anchor="start" x="600.5" y="-3031.67" font-family="Times Roman,serif" font-size="10.00">GCAGA</text>
+<polygon fill="#218559" stroke="#218559" points="593,-3013 593,-3027 644,-3027 644,-3013 593,-3013"/>
+<text text-anchor="start" x="603" y="-3017.67" font-family="Times Roman,serif" font-size="10.00">TCTGC</text>
+</g>
+<!-- 9,1->10,2 -->
+<g id="edge530" class="edge"><title>9,1->10,2</title>
+<path fill="none" stroke="#218559" d="M493.883,-3447.53C509.959,-3389.82 540,-3282 540,-3282 540,-3282 552,-3197 552,-3197 552,-3197 562,-3083 562,-3083 562,-3083 569.283,-3076.76 578.596,-3068.78"/>
+<polygon fill="#218559" stroke="#218559" points="581.085,-3071.25 586.4,-3062.09 576.529,-3065.94 581.085,-3071.25"/>
+</g>
+<!-- 9,2->9,1 -->
+<g id="edge536" class="edge"><title>9,2->9,1</title>
+<path fill="none" stroke="#218559" d="M575.417,-3489.33C563.062,-3489.77 549.424,-3489.86 536.535,-3489.61"/>
+<polygon fill="#218559" stroke="#218559" points="536.491,-3486.11 526.398,-3489.33 536.298,-3493.1 536.491,-3486.11"/>
+</g>
+<!-- 9,3 -->
+<g id="node101" class="node"><title>9,3</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-3483" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="732.5" y="-3494.17" font-family="Times Roman,serif" font-size="10.00">9,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="730,-3475 730,-3489 775,-3489 775,-3475 730,-3475"/>
+<text text-anchor="start" x="738" y="-3479.67" font-family="Times Roman,serif" font-size="10.00">GATTT</text>
+<polygon fill="#218559" stroke="#218559" points="730,-3461 730,-3475 775,-3475 775,-3461 730,-3461"/>
+<text text-anchor="start" x="736.5" y="-3465.67" font-family="Times Roman,serif" font-size="10.00">AAATC</text>
+</g>
+<!-- 9,2->9,3 -->
+<g id="edge532" class="edge"><title>9,2->9,3</title>
+<path fill="none" stroke="#dd1e2f" d="M660.398,-3476.67C672.741,-3476.23 686.377,-3476.14 699.273,-3476.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="699.326,-3479.89 709.417,-3476.67 699.517,-3472.89 699.326,-3479.89"/>
+</g>
+<!-- 8,4 -->
+<g id="node107" class="node"><title>8,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-5024" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="732.5" y="-5035.17" font-family="Times Roman,serif" font-size="10.00">8,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="730,-5016 730,-5030 775,-5030 775,-5016 730,-5016"/>
+<text text-anchor="start" x="736.5" y="-5020.67" font-family="Times Roman,serif" font-size="10.00">AAATC</text>
+<polygon fill="#218559" stroke="#218559" points="730,-5002 730,-5016 775,-5016 775,-5002 730,-5002"/>
+<text text-anchor="start" x="738" y="-5006.67" font-family="Times Roman,serif" font-size="10.00">GATTT</text>
+</g>
+<!-- 9,2->8,4 -->
+<g id="edge534" class="edge"><title>9,2->8,4</title>
+<path fill="none" stroke="#ebb035" d="M633.898,-3516.5C650.454,-3551.38 674,-3601 674,-3601 674,-3601 686,-3822 686,-3822 686,-3822 696,-4773 696,-4773 696,-4773 725.791,-4906.53 741.915,-4978.8"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="738.502,-4979.57 744.095,-4988.57 745.334,-4978.05 738.502,-4979.57"/>
+</g>
+<!-- 9,3->9,2 -->
+<g id="edge542" class="edge"><title>9,3->9,2</title>
+<path fill="none" stroke="#218559" d="M709.417,-3489.33C697.062,-3489.77 683.424,-3489.86 670.535,-3489.61"/>
+<polygon fill="#218559" stroke="#218559" points="670.491,-3486.11 660.398,-3489.33 670.298,-3493.1 670.491,-3486.11"/>
+</g>
+<!-- 9,4 -->
+<g id="node102" class="node"><title>9,4</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-3483" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="866.5" y="-3494.17" font-family="Times Roman,serif" font-size="10.00">9,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="864,-3475 864,-3489 909,-3489 909,-3475 864,-3475"/>
+<text text-anchor="start" x="872.5" y="-3479.67" font-family="Times Roman,serif" font-size="10.00">ATTTC</text>
+<polygon fill="#218559" stroke="#218559" points="864,-3461 864,-3475 909,-3475 909,-3461 864,-3461"/>
+<text text-anchor="start" x="870" y="-3465.67" font-family="Times Roman,serif" font-size="10.00">GAAAT</text>
+</g>
+<!-- 9,3->9,4 -->
+<g id="edge538" class="edge"><title>9,3->9,4</title>
+<path fill="none" stroke="#dd1e2f" d="M794.398,-3476.67C806.741,-3476.23 820.377,-3476.14 833.273,-3476.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="833.326,-3479.89 843.417,-3476.67 833.517,-3472.89 833.326,-3479.89"/>
+</g>
+<!-- 10,4 -->
+<g id="node117" class="node"><title>10,4</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-3035" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-3046.17" font-family="Times Roman,serif" font-size="10.00">10,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-3027 861,-3041 912,-3041 912,-3027 861,-3027"/>
+<text text-anchor="start" x="871" y="-3031.67" font-family="Times Roman,serif" font-size="10.00">AGATT</text>
+<polygon fill="#218559" stroke="#218559" points="861,-3013 861,-3027 912,-3027 912,-3013 861,-3013"/>
+<text text-anchor="start" x="871.5" y="-3017.67" font-family="Times Roman,serif" font-size="10.00">AATCT</text>
+</g>
+<!-- 9,3->10,4 -->
+<g id="edge540" class="edge"><title>9,3->10,4</title>
+<path fill="none" stroke="#218559" d="M761.883,-3447.53C777.959,-3389.82 808,-3282 808,-3282 808,-3282 820,-3197 820,-3197 820,-3197 830,-3083 830,-3083 830,-3083 837.283,-3076.76 846.596,-3068.78"/>
+<polygon fill="#218559" stroke="#218559" points="849.085,-3071.25 854.4,-3062.09 844.529,-3065.94 849.085,-3071.25"/>
+</g>
+<!-- 9,4->9,3 -->
+<g id="edge546" class="edge"><title>9,4->9,3</title>
+<path fill="none" stroke="#218559" d="M843.417,-3489.33C831.062,-3489.77 817.424,-3489.86 804.535,-3489.61"/>
+<polygon fill="#218559" stroke="#218559" points="804.491,-3486.11 794.398,-3489.33 804.298,-3493.1 804.491,-3486.11"/>
+</g>
+<!-- 8,2 -->
+<g id="node105" class="node"><title>8,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-5024" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="464.5" y="-5035.17" font-family="Times Roman,serif" font-size="10.00">8,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="462,-5016 462,-5030 507,-5030 507,-5016 462,-5016"/>
+<text text-anchor="start" x="467.5" y="-5020.67" font-family="Times Roman,serif" font-size="10.00">TGAAA</text>
+<polygon fill="#218559" stroke="#218559" points="462,-5002 462,-5016 507,-5016 507,-5002 462,-5002"/>
+<text text-anchor="start" x="470" y="-5006.67" font-family="Times Roman,serif" font-size="10.00">TTTCA</text>
+</g>
+<!-- 9,4->8,2 -->
+<g id="edge544" class="edge"><title>9,4->8,2</title>
+<path fill="none" stroke="#ebb035" d="M870.102,-3516.5C853.546,-3551.38 830,-3601 830,-3601 830,-3601 808,-3996 808,-3996 808,-3996 674,-4511 674,-4511 674,-4511 562,-4541 562,-4541 562,-4541 540,-4855 540,-4855 540,-4855 515.396,-4929.25 498.805,-4979.32"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="495.426,-4978.39 495.603,-4988.98 502.071,-4980.59 495.426,-4978.39"/>
+</g>
+<!-- 8,1->24,1 -->
+<g id="edge102" class="edge"><title>8,1->24,1</title>
+<path fill="none" stroke="#dd1e2f" d="M382.78,-5047.48C400.328,-5060.06 422.158,-5075.7 441.047,-5089.23"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="439.049,-5092.1 449.217,-5095.08 443.126,-5086.41 439.049,-5092.1"/>
+</g>
+<!-- 8,1->7,3 -->
+<g id="edge106" class="edge"><title>8,1->7,3</title>
+<path fill="none" stroke="#06a2cb" d="M356.826,-4988.3C370.078,-4919.01 398,-4773 398,-4773 398,-4773 428,-4551 428,-4551 428,-4551 540,-4511 540,-4511 540,-4511 550,-4259 550,-4259 550,-4259 562,-4248 562,-4248 562,-4248 674,-4162 674,-4162 674,-4162 684,-3800 684,-3800 684,-3800 686,-3800 686,-3800 686,-3800 696,-3996 696,-3996 696,-3996 734.995,-4436.78 747.905,-4582.71"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="744.42,-4583.04 748.788,-4592.69 751.393,-4582.42 744.42,-4583.04"/>
+</g>
+<!-- 8,1->8,2 -->
+<g id="edge104" class="edge"><title>8,1->8,2</title>
+<path fill="none" stroke="#dd1e2f" d="M392.398,-5017.67C404.741,-5017.23 418.377,-5017.14 431.273,-5017.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="431.326,-5020.89 441.417,-5017.67 431.517,-5013.89 431.326,-5020.89"/>
+</g>
+<!-- 8,2->9,4 -->
+<g id="edge110" class="edge"><title>8,2->9,4</title>
+<path fill="none" stroke="#ebb035" d="M495.845,-4988.99C515.327,-4931.4 552,-4823 552,-4823 552,-4823 562,-4581 562,-4581 562,-4581 674,-4541 674,-4541 674,-4541 696,-4504 696,-4504 696,-4504 820,-4465 820,-4465 820,-4465 830,-3601 830,-3601 830,-3601 849.673,-3559.55 865.699,-3525.78"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="868.977,-3527.03 870.102,-3516.5 862.653,-3524.03 868.977,-3527.03"/>
+</g>
+<!-- 8,2->8,1 -->
+<g id="edge112" class="edge"><title>8,2->8,1</title>
+<path fill="none" stroke="#218559" d="M441.417,-5030.33C429.062,-5030.77 415.424,-5030.86 402.535,-5030.61"/>
+<polygon fill="#218559" stroke="#218559" points="402.491,-5027.11 392.398,-5030.33 402.298,-5034.1 402.491,-5027.11"/>
+</g>
+<!-- 8,2->8,3 -->
+<g id="edge108" class="edge"><title>8,2->8,3</title>
+<path fill="none" stroke="#dd1e2f" d="M526.398,-5017.67C538.741,-5017.23 552.377,-5017.14 565.273,-5017.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="565.326,-5020.89 575.417,-5017.67 565.517,-5013.89 565.326,-5020.89"/>
+</g>
+<!-- 8,3->7,1 -->
+<g id="edge116" class="edge"><title>8,3->7,1</title>
+<path fill="none" stroke="#06a2cb" d="M610.095,-4988.57C594.659,-4919.38 562,-4773 562,-4773 562,-4773 552,-4315 552,-4315 552,-4315 550,-4315 550,-4315 550,-4315 540,-4579 540,-4579 540,-4579 531.629,-4586.47 521.36,-4595.64"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="518.768,-4593.26 513.64,-4602.54 523.43,-4598.49 518.768,-4593.26"/>
+</g>
+<!-- 8,3->8,2 -->
+<g id="edge120" class="edge"><title>8,3->8,2</title>
+<path fill="none" stroke="#218559" d="M575.417,-5030.33C563.062,-5030.77 549.424,-5030.86 536.535,-5030.61"/>
+<polygon fill="#218559" stroke="#218559" points="536.491,-5027.11 526.398,-5030.33 536.298,-5034.1 536.491,-5027.11"/>
+</g>
+<!-- 8,3->8,4 -->
+<g id="edge114" class="edge"><title>8,3->8,4</title>
+<path fill="none" stroke="#dd1e2f" d="M660.398,-5017.67C672.741,-5017.23 686.377,-5017.14 699.273,-5017.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="699.326,-5020.89 709.417,-5017.67 699.517,-5013.89 699.326,-5020.89"/>
+</g>
+<!-- 8,3->19,4 -->
+<g id="edge118" class="edge"><title>8,3->19,4</title>
+<path fill="none" stroke="#218559" d="M587.705,-5049.97C574.511,-5061.28 562,-5072 562,-5072 562,-5072 552,-5193 552,-5193 552,-5193 545.818,-5195.09 537.182,-5198.01"/>
+<polygon fill="#218559" stroke="#218559" points="535.802,-5194.78 527.45,-5201.3 538.045,-5201.41 535.802,-5194.78"/>
+</g>
+<!-- 8,4->9,2 -->
+<g id="edge122" class="edge"><title>8,4->9,2</title>
+<path fill="none" stroke="#ebb035" d="M744.095,-4988.57C728.659,-4919.38 696,-4773 696,-4773 696,-4773 686,-3557 686,-3557 686,-3557 668.526,-3537.98 651.293,-3519.23"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="653.829,-3516.82 644.485,-3511.82 648.674,-3521.55 653.829,-3516.82"/>
+</g>
+<!-- 8,4->8,3 -->
+<g id="edge124" class="edge"><title>8,4->8,3</title>
+<path fill="none" stroke="#218559" d="M709.417,-5030.33C697.062,-5030.77 683.424,-5030.86 670.535,-5030.61"/>
+<polygon fill="#218559" stroke="#218559" points="670.491,-5027.11 660.398,-5030.33 670.298,-5034.1 670.491,-5027.11"/>
+</g>
+<!-- 11,1 -->
+<g id="node109" class="node"><title>11,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-660" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-671.167" font-family="Times Roman,serif" font-size="10.00">11,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-652 325,-666 376,-666 376,-652 325,-652"/>
+<text text-anchor="start" x="333.5" y="-656.667" font-family="Times Roman,serif" font-size="10.00">CTGGC</text>
+<polygon fill="#218559" stroke="#218559" points="325,-638 325,-652 376,-652 376,-638 325,-638"/>
+<text text-anchor="start" x="332.5" y="-642.667" font-family="Times Roman,serif" font-size="10.00">GCCAG</text>
+</g>
+<!-- 11,2 -->
+<g id="node110" class="node"><title>11,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-660" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-671.167" font-family="Times Roman,serif" font-size="10.00">11,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-652 459,-666 510,-666 510,-652 459,-652"/>
+<text text-anchor="start" x="467.5" y="-656.667" font-family="Times Roman,serif" font-size="10.00">TGGCA</text>
+<polygon fill="#218559" stroke="#218559" points="459,-638 459,-652 510,-652 510,-638 459,-638"/>
+<text text-anchor="start" x="467.5" y="-642.667" font-family="Times Roman,serif" font-size="10.00">TGCCA</text>
+</g>
+<!-- 11,1->11,2 -->
+<g id="edge548" class="edge"><title>11,1->11,2</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-653.529C406.501,-653.254 416.748,-653.183 426.703,-653.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-656.817 436.933,-653.523 427.007,-649.819 426.864,-656.817"/>
+</g>
+<!-- 12,2 -->
+<g id="node125" class="node"><title>12,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-564" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-575.167" font-family="Times Roman,serif" font-size="10.00">12,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-556 459,-570 510,-570 510,-556 459,-556"/>
+<text text-anchor="start" x="468.5" y="-560.667" font-family="Times Roman,serif" font-size="10.00">TCTGG</text>
+<polygon fill="#218559" stroke="#218559" points="459,-542 459,-556 510,-556 510,-542 459,-542"/>
+<text text-anchor="start" x="467" y="-546.667" font-family="Times Roman,serif" font-size="10.00">CCAGA</text>
+</g>
+<!-- 11,1->12,2 -->
+<g id="edge550" class="edge"><title>11,1->12,2</title>
+<path fill="none" stroke="#218559" d="M387.58,-637.894C403.307,-628.643 418,-620 418,-620 418,-620 430.304,-609.561 444.286,-597.696"/>
+<polygon fill="#218559" stroke="#218559" points="446.799,-600.154 452.16,-591.016 442.27,-594.817 446.799,-600.154"/>
+</g>
+<!-- 11,2->11,1 -->
+<g id="edge556" class="edge"><title>11,2->11,1</title>
+<path fill="none" stroke="#218559" d="M436.933,-666.477C427.29,-666.749 417.041,-666.817 407.091,-666.684"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-663.18 396.867,-666.471 406.792,-670.179 406.937,-663.18"/>
+</g>
+<!-- 11,3 -->
+<g id="node111" class="node"><title>11,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-660" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-671.167" font-family="Times Roman,serif" font-size="10.00">11,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-652 593,-666 644,-666 644,-652 593,-652"/>
+<text text-anchor="start" x="600.5" y="-656.667" font-family="Times Roman,serif" font-size="10.00">GGCAG</text>
+<polygon fill="#218559" stroke="#218559" points="593,-638 593,-652 644,-652 644,-638 593,-638"/>
+<text text-anchor="start" x="601.5" y="-642.667" font-family="Times Roman,serif" font-size="10.00">CTGCC</text>
+</g>
+<!-- 11,2->11,3 -->
+<g id="edge554" class="edge"><title>11,2->11,3</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-653.529C540.501,-653.254 550.748,-653.183 560.703,-653.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-656.817 570.933,-653.523 561.007,-649.819 560.864,-656.817"/>
+</g>
+<!-- 10,1 -->
+<g id="node114" class="node"><title>10,1</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-3035" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-3046.17" font-family="Times Roman,serif" font-size="10.00">10,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-3027 459,-3041 510,-3041 510,-3027 459,-3027"/>
+<text text-anchor="start" x="466.5" y="-3031.67" font-family="Times Roman,serif" font-size="10.00">GGCAG</text>
+<polygon fill="#218559" stroke="#218559" points="459,-3013 459,-3027 510,-3027 510,-3013 459,-3013"/>
+<text text-anchor="start" x="467.5" y="-3017.67" font-family="Times Roman,serif" font-size="10.00">CTGCC</text>
+</g>
+<!-- 11,2->10,1 -->
+<g id="edge552" class="edge"><title>11,2->10,1</title>
+<path fill="none" stroke="#dd1e2f" d="M480.949,-696.137C467.359,-857.109 413,-1501 413,-1501 413,-1501 413,-2875 413,-2875 413,-2875 443.68,-2944.14 464.704,-2991.52"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="461.513,-2992.96 468.769,-3000.68 467.912,-2990.12 461.513,-2992.96"/>
+</g>
+<!-- 11,3->11,2 -->
+<g id="edge562" class="edge"><title>11,3->11,2</title>
+<path fill="none" stroke="#218559" d="M570.933,-666.477C561.29,-666.749 551.041,-666.817 541.091,-666.684"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-663.18 530.867,-666.471 540.792,-670.179 540.937,-663.18"/>
+</g>
+<!-- 11,4 -->
+<g id="node112" class="node"><title>11,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-660" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-671.167" font-family="Times Roman,serif" font-size="10.00">11,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-652 727,-666 778,-666 778,-652 727,-652"/>
+<text text-anchor="start" x="734.5" y="-656.667" font-family="Times Roman,serif" font-size="10.00">GCAGA</text>
+<polygon fill="#218559" stroke="#218559" points="727,-638 727,-652 778,-652 778,-638 727,-638"/>
+<text text-anchor="start" x="737" y="-642.667" font-family="Times Roman,serif" font-size="10.00">TCTGC</text>
+</g>
+<!-- 11,3->11,4 -->
+<g id="edge558" class="edge"><title>11,3->11,4</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-653.529C674.501,-653.254 684.748,-653.183 694.703,-653.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-656.817 704.933,-653.523 695.007,-649.819 694.864,-656.817"/>
+</g>
+<!-- 12,4 -->
+<g id="node127" class="node"><title>12,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-564" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-575.167" font-family="Times Roman,serif" font-size="10.00">12,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-556 727,-570 778,-570 778,-556 727,-556"/>
+<text text-anchor="start" x="735.5" y="-560.667" font-family="Times Roman,serif" font-size="10.00">TGGCA</text>
+<polygon fill="#218559" stroke="#218559" points="727,-542 727,-556 778,-556 778,-542 727,-542"/>
+<text text-anchor="start" x="735.5" y="-546.667" font-family="Times Roman,serif" font-size="10.00">TGCCA</text>
+</g>
+<!-- 11,3->12,4 -->
+<g id="edge560" class="edge"><title>11,3->12,4</title>
+<path fill="none" stroke="#218559" d="M655.58,-637.894C671.307,-628.643 686,-620 686,-620 686,-620 698.304,-609.561 712.286,-597.696"/>
+<polygon fill="#218559" stroke="#218559" points="714.799,-600.154 720.16,-591.016 710.27,-594.817 714.799,-600.154"/>
+</g>
+<!-- 11,4->11,3 -->
+<g id="edge566" class="edge"><title>11,4->11,3</title>
+<path fill="none" stroke="#218559" d="M704.933,-666.477C695.29,-666.749 685.041,-666.817 675.091,-666.684"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-663.18 664.867,-666.471 674.792,-670.179 674.937,-663.18"/>
+</g>
+<!-- 10,3 -->
+<g id="node116" class="node"><title>10,3</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-3035" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-3046.17" font-family="Times Roman,serif" font-size="10.00">10,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-3027 727,-3041 778,-3041 778,-3027 727,-3027"/>
+<text text-anchor="start" x="736" y="-3031.67" font-family="Times Roman,serif" font-size="10.00">CAGAT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-3013 727,-3027 778,-3027 778,-3013 727,-3013"/>
+<text text-anchor="start" x="737" y="-3017.67" font-family="Times Roman,serif" font-size="10.00">ATCTG</text>
+</g>
+<!-- 11,4->10,3 -->
+<g id="edge564" class="edge"><title>11,4->10,3</title>
+<path fill="none" stroke="#dd1e2f" d="M720.07,-687.164C702.813,-701.845 685,-717 685,-717 685,-717 685,-2875 685,-2875 685,-2875 713.831,-2943.85 733.667,-2991.22"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="730.536,-2992.8 737.627,-3000.68 736.993,-2990.1 730.536,-2992.8"/>
+</g>
+<!-- 10,1->11,2 -->
+<g id="edge128" class="edge"><title>10,1->11,2</title>
+<path fill="none" stroke="#218559" d="M468.769,-3000.68C447.907,-2953.66 413,-2875 413,-2875 413,-2875 413,-1501 413,-1501 413,-1501 465.257,-882.016 480.107,-706.113"/>
+<polygon fill="#218559" stroke="#218559" points="483.595,-706.396 480.949,-696.137 476.62,-705.807 483.595,-706.396"/>
+</g>
+<!-- 10,1->10,2 -->
+<g id="edge126" class="edge"><title>10,1->10,2</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-3028.53C540.501,-3028.25 550.748,-3028.18 560.703,-3028.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-3031.82 570.933,-3028.52 561.007,-3024.82 560.864,-3031.82"/>
+</g>
+<!-- 10,2->9,1 -->
+<g id="edge130" class="edge"><title>10,2->9,1</title>
+<path fill="none" stroke="#dd1e2f" d="M586.4,-3062.09C573.697,-3072.97 562,-3083 562,-3083 562,-3083 540,-3282 540,-3282 540,-3282 513.155,-3378.35 496.638,-3437.64"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="493.195,-3436.96 493.883,-3447.53 499.938,-3438.83 493.195,-3436.96"/>
+</g>
+<!-- 10,2->10,1 -->
+<g id="edge134" class="edge"><title>10,2->10,1</title>
+<path fill="none" stroke="#218559" d="M570.933,-3041.48C561.29,-3041.75 551.041,-3041.82 541.091,-3041.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-3038.18 530.867,-3041.47 540.792,-3045.18 540.937,-3038.18"/>
+</g>
+<!-- 10,2->10,3 -->
+<g id="edge132" class="edge"><title>10,2->10,3</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-3028.53C674.501,-3028.25 684.748,-3028.18 694.703,-3028.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-3031.82 704.933,-3028.52 695.007,-3024.82 694.864,-3031.82"/>
+</g>
+<!-- 10,3->11,4 -->
+<g id="edge138" class="edge"><title>10,3->11,4</title>
+<path fill="none" stroke="#218559" d="M737.627,-3000.68C717.94,-2953.66 685,-2875 685,-2875 685,-2875 685,-717 685,-717 685,-717 697.917,-706.011 712.409,-693.682"/>
+<polygon fill="#218559" stroke="#218559" points="714.721,-696.31 720.07,-687.164 710.185,-690.979 714.721,-696.31"/>
+</g>
+<!-- 10,3->10,2 -->
+<g id="edge140" class="edge"><title>10,3->10,2</title>
+<path fill="none" stroke="#218559" d="M704.933,-3041.48C695.29,-3041.75 685.041,-3041.82 675.091,-3041.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-3038.18 664.867,-3041.47 674.792,-3045.18 674.937,-3038.18"/>
+</g>
+<!-- 10,3->10,4 -->
+<g id="edge136" class="edge"><title>10,3->10,4</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-3028.53C808.501,-3028.25 818.748,-3028.18 828.703,-3028.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-3031.82 838.933,-3028.52 829.007,-3024.82 828.864,-3031.82"/>
+</g>
+<!-- 10,4->9,3 -->
+<g id="edge142" class="edge"><title>10,4->9,3</title>
+<path fill="none" stroke="#dd1e2f" d="M854.4,-3062.09C841.697,-3072.97 830,-3083 830,-3083 830,-3083 808,-3282 808,-3282 808,-3282 781.155,-3378.35 764.638,-3437.64"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="761.195,-3436.96 761.883,-3447.53 767.938,-3438.83 761.195,-3436.96"/>
+</g>
+<!-- 10,4->10,3 -->
+<g id="edge144" class="edge"><title>10,4->10,3</title>
+<path fill="none" stroke="#218559" d="M838.933,-3041.48C829.29,-3041.75 819.041,-3041.82 809.091,-3041.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-3038.18 798.867,-3041.47 808.792,-3045.18 808.937,-3038.18"/>
+</g>
+<!-- 13,1 -->
+<g id="node119" class="node"><title>13,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-468" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-479.167" font-family="Times Roman,serif" font-size="10.00">13,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-460 325,-474 376,-474 376,-460 325,-460"/>
+<text text-anchor="start" x="336" y="-464.667" font-family="Times Roman,serif" font-size="10.00">ATCTC</text>
+<polygon fill="#218559" stroke="#218559" points="325,-446 325,-460 376,-460 376,-446 325,-446"/>
+<text text-anchor="start" x="334" y="-450.667" font-family="Times Roman,serif" font-size="10.00">GAGAT</text>
+</g>
+<!-- 13,2 -->
+<g id="node120" class="node"><title>13,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-468" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-479.167" font-family="Times Roman,serif" font-size="10.00">13,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-460 459,-474 510,-474 510,-460 459,-460"/>
+<text text-anchor="start" x="470" y="-464.667" font-family="Times Roman,serif" font-size="10.00">TCTCT</text>
+<polygon fill="#218559" stroke="#218559" points="459,-446 459,-460 510,-460 510,-446 459,-446"/>
+<text text-anchor="start" x="467" y="-450.667" font-family="Times Roman,serif" font-size="10.00">AGAGA</text>
+</g>
+<!-- 13,1->13,2 -->
+<g id="edge568" class="edge"><title>13,1->13,2</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-461.529C406.501,-461.254 416.748,-461.183 426.703,-461.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-464.817 436.933,-461.523 427.007,-457.819 426.864,-464.817"/>
+</g>
+<!-- 14,2 -->
+<g id="node135" class="node"><title>14,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-372" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-383.167" font-family="Times Roman,serif" font-size="10.00">14,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-364 459,-378 510,-378 510,-364 459,-364"/>
+<text text-anchor="start" x="469.5" y="-368.667" font-family="Times Roman,serif" font-size="10.00">CATCT</text>
+<polygon fill="#218559" stroke="#218559" points="459,-350 459,-364 510,-364 510,-350 459,-350"/>
+<text text-anchor="start" x="468" y="-354.667" font-family="Times Roman,serif" font-size="10.00">AGATG</text>
+</g>
+<!-- 13,1->14,2 -->
+<g id="edge570" class="edge"><title>13,1->14,2</title>
+<path fill="none" stroke="#218559" d="M387.58,-445.894C403.307,-436.643 418,-428 418,-428 418,-428 430.304,-417.561 444.286,-405.696"/>
+<polygon fill="#218559" stroke="#218559" points="446.799,-408.154 452.16,-399.016 442.27,-402.817 446.799,-408.154"/>
+</g>
+<!-- 13,2->13,1 -->
+<g id="edge576" class="edge"><title>13,2->13,1</title>
+<path fill="none" stroke="#218559" d="M436.933,-474.477C427.29,-474.749 417.041,-474.817 407.091,-474.684"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-471.18 396.867,-474.471 406.792,-478.179 406.937,-471.18"/>
+</g>
+<!-- 13,3 -->
+<g id="node121" class="node"><title>13,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-468" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-479.167" font-family="Times Roman,serif" font-size="10.00">13,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-460 593,-474 644,-474 644,-460 593,-460"/>
+<text text-anchor="start" x="603" y="-464.667" font-family="Times Roman,serif" font-size="10.00">CTCTG</text>
+<polygon fill="#218559" stroke="#218559" points="593,-446 593,-460 644,-460 644,-446 593,-446"/>
+<text text-anchor="start" x="601" y="-450.667" font-family="Times Roman,serif" font-size="10.00">CAGAG</text>
+</g>
+<!-- 13,2->13,3 -->
+<g id="edge574" class="edge"><title>13,2->13,3</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-461.529C540.501,-461.254 550.748,-461.183 560.703,-461.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-464.817 570.933,-461.523 561.007,-457.819 560.864,-464.817"/>
+</g>
+<!-- 12,1 -->
+<g id="node124" class="node"><title>12,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-564" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-575.167" font-family="Times Roman,serif" font-size="10.00">12,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-556 325,-570 376,-570 376,-556 325,-556"/>
+<text text-anchor="start" x="335" y="-560.667" font-family="Times Roman,serif" font-size="10.00">CTCTG</text>
+<polygon fill="#218559" stroke="#218559" points="325,-542 325,-556 376,-556 376,-542 325,-542"/>
+<text text-anchor="start" x="333" y="-546.667" font-family="Times Roman,serif" font-size="10.00">CAGAG</text>
+</g>
+<!-- 13,2->12,1 -->
+<g id="edge572" class="edge"><title>13,2->12,1</title>
+<path fill="none" stroke="#dd1e2f" d="M452.16,-495.016C435.283,-509.336 418,-524 418,-524 418,-524 408.3,-529.706 396.227,-536.808"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="394.425,-533.807 387.58,-541.894 397.974,-539.841 394.425,-533.807"/>
+</g>
+<!-- 13,3->13,2 -->
+<g id="edge582" class="edge"><title>13,3->13,2</title>
+<path fill="none" stroke="#218559" d="M570.933,-474.477C561.29,-474.749 551.041,-474.817 541.091,-474.684"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-471.18 530.867,-474.471 540.792,-478.179 540.937,-471.18"/>
+</g>
+<!-- 13,4 -->
+<g id="node122" class="node"><title>13,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-468" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-479.167" font-family="Times Roman,serif" font-size="10.00">13,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-460 727,-474 778,-474 778,-460 727,-460"/>
+<text text-anchor="start" x="736.5" y="-464.667" font-family="Times Roman,serif" font-size="10.00">TCTGG</text>
+<polygon fill="#218559" stroke="#218559" points="727,-446 727,-460 778,-460 778,-446 727,-446"/>
+<text text-anchor="start" x="735" y="-450.667" font-family="Times Roman,serif" font-size="10.00">CCAGA</text>
+</g>
+<!-- 13,3->13,4 -->
+<g id="edge578" class="edge"><title>13,3->13,4</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-461.529C674.501,-461.254 684.748,-461.183 694.703,-461.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-464.817 704.933,-461.523 695.007,-457.819 694.864,-464.817"/>
+</g>
+<!-- 14,4 -->
+<g id="node137" class="node"><title>14,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-372" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-383.167" font-family="Times Roman,serif" font-size="10.00">14,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-364 727,-378 778,-378 778,-364 727,-364"/>
+<text text-anchor="start" x="738" y="-368.667" font-family="Times Roman,serif" font-size="10.00">TCTCT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-350 727,-364 778,-364 778,-350 727,-350"/>
+<text text-anchor="start" x="735" y="-354.667" font-family="Times Roman,serif" font-size="10.00">AGAGA</text>
+</g>
+<!-- 13,3->14,4 -->
+<g id="edge580" class="edge"><title>13,3->14,4</title>
+<path fill="none" stroke="#218559" d="M655.58,-445.894C671.307,-436.643 686,-428 686,-428 686,-428 698.304,-417.561 712.286,-405.696"/>
+<polygon fill="#218559" stroke="#218559" points="714.799,-408.154 720.16,-399.016 710.27,-402.817 714.799,-408.154"/>
+</g>
+<!-- 13,4->13,3 -->
+<g id="edge586" class="edge"><title>13,4->13,3</title>
+<path fill="none" stroke="#218559" d="M704.933,-474.477C695.29,-474.749 685.041,-474.817 675.091,-474.684"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-471.18 664.867,-474.471 674.792,-478.179 674.937,-471.18"/>
+</g>
+<!-- 12,3 -->
+<g id="node126" class="node"><title>12,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-564" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-575.167" font-family="Times Roman,serif" font-size="10.00">12,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-556 593,-570 644,-570 644,-556 593,-556"/>
+<text text-anchor="start" x="601.5" y="-560.667" font-family="Times Roman,serif" font-size="10.00">CTGGC</text>
+<polygon fill="#218559" stroke="#218559" points="593,-542 593,-556 644,-556 644,-542 593,-542"/>
+<text text-anchor="start" x="600.5" y="-546.667" font-family="Times Roman,serif" font-size="10.00">GCCAG</text>
+</g>
+<!-- 13,4->12,3 -->
+<g id="edge584" class="edge"><title>13,4->12,3</title>
+<path fill="none" stroke="#dd1e2f" d="M720.16,-495.016C703.283,-509.336 686,-524 686,-524 686,-524 676.3,-529.706 664.227,-536.808"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="662.425,-533.807 655.58,-541.894 665.974,-539.841 662.425,-533.807"/>
+</g>
+<!-- 12,1->13,2 -->
+<g id="edge148" class="edge"><title>12,1->13,2</title>
+<path fill="none" stroke="#218559" d="M384.857,-539.028C402.072,-526.694 422.998,-511.703 441.18,-498.677"/>
+<polygon fill="#218559" stroke="#218559" points="443.227,-501.516 449.317,-492.847 439.15,-495.826 443.227,-501.516"/>
+</g>
+<!-- 12,1->12,2 -->
+<g id="edge146" class="edge"><title>12,1->12,2</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-557.529C406.501,-557.254 416.748,-557.183 426.703,-557.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-560.817 436.933,-557.523 427.007,-553.819 426.864,-560.817"/>
+</g>
+<!-- 12,2->11,1 -->
+<g id="edge150" class="edge"><title>12,2->11,1</title>
+<path fill="none" stroke="#dd1e2f" d="M449.317,-588.847C432.127,-601.162 411.205,-616.152 393.005,-629.191"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="390.948,-626.359 384.857,-635.028 395.025,-632.049 390.948,-626.359"/>
+</g>
+<!-- 12,2->12,1 -->
+<g id="edge154" class="edge"><title>12,2->12,1</title>
+<path fill="none" stroke="#218559" d="M436.933,-570.477C427.29,-570.749 417.041,-570.817 407.091,-570.684"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-567.18 396.867,-570.471 406.792,-574.179 406.937,-567.18"/>
+</g>
+<!-- 12,2->12,3 -->
+<g id="edge152" class="edge"><title>12,2->12,3</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-557.529C540.501,-557.254 550.748,-557.183 560.703,-557.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-560.817 570.933,-557.523 561.007,-553.819 560.864,-560.817"/>
+</g>
+<!-- 12,3->13,4 -->
+<g id="edge158" class="edge"><title>12,3->13,4</title>
+<path fill="none" stroke="#218559" d="M652.857,-539.028C670.072,-526.694 690.998,-511.703 709.18,-498.677"/>
+<polygon fill="#218559" stroke="#218559" points="711.227,-501.516 717.317,-492.847 707.15,-495.826 711.227,-501.516"/>
+</g>
+<!-- 12,3->12,2 -->
+<g id="edge160" class="edge"><title>12,3->12,2</title>
+<path fill="none" stroke="#218559" d="M570.933,-570.477C561.29,-570.749 551.041,-570.817 541.091,-570.684"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-567.18 530.867,-570.471 540.792,-574.179 540.937,-567.18"/>
+</g>
+<!-- 12,3->12,4 -->
+<g id="edge156" class="edge"><title>12,3->12,4</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-557.529C674.501,-557.254 684.748,-557.183 694.703,-557.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-560.817 704.933,-557.523 695.007,-553.819 694.864,-560.817"/>
+</g>
+<!-- 12,4->11,3 -->
+<g id="edge162" class="edge"><title>12,4->11,3</title>
+<path fill="none" stroke="#dd1e2f" d="M717.317,-588.847C700.127,-601.162 679.205,-616.152 661.005,-629.191"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="658.948,-626.359 652.857,-635.028 663.025,-632.049 658.948,-626.359"/>
+</g>
+<!-- 12,4->12,3 -->
+<g id="edge164" class="edge"><title>12,4->12,3</title>
+<path fill="none" stroke="#218559" d="M704.933,-570.477C695.29,-570.749 685.041,-570.817 675.091,-570.684"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-567.18 664.867,-570.471 674.792,-574.179 674.937,-567.18"/>
+</g>
+<!-- 15,1 -->
+<g id="node129" class="node"><title>15,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-276" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-287.167" font-family="Times Roman,serif" font-size="10.00">15,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-268 325,-282 376,-282 376,-268 325,-268"/>
+<text text-anchor="start" x="332.5" y="-272.667" font-family="Times Roman,serif" font-size="10.00">CGGCA</text>
+<polygon fill="#218559" stroke="#218559" points="325,-254 325,-268 376,-268 376,-254 325,-254"/>
+<text text-anchor="start" x="333.5" y="-258.667" font-family="Times Roman,serif" font-size="10.00">TGCCG</text>
+</g>
+<!-- 15,2 -->
+<g id="node130" class="node"><title>15,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-276" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-287.167" font-family="Times Roman,serif" font-size="10.00">15,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-268 459,-282 510,-282 510,-268 459,-268"/>
+<text text-anchor="start" x="468" y="-272.667" font-family="Times Roman,serif" font-size="10.00">GGCAT</text>
+<polygon fill="#218559" stroke="#218559" points="459,-254 459,-268 510,-268 510,-254 459,-254"/>
+<text text-anchor="start" x="468" y="-258.667" font-family="Times Roman,serif" font-size="10.00">ATGCC</text>
+</g>
+<!-- 15,1->15,2 -->
+<g id="edge588" class="edge"><title>15,1->15,2</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-269.529C406.501,-269.254 416.748,-269.183 426.703,-269.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-272.817 436.933,-269.523 427.007,-265.819 426.864,-272.817"/>
+</g>
+<!-- 16,2 -->
+<g id="node145" class="node"><title>16,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-148" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-159.167" font-family="Times Roman,serif" font-size="10.00">16,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-140 459,-154 510,-154 510,-140 459,-140"/>
+<text text-anchor="start" x="466.5" y="-144.667" font-family="Times Roman,serif" font-size="10.00">ACGGC</text>
+<polygon fill="#218559" stroke="#218559" points="459,-126 459,-140 510,-140 510,-126 459,-126"/>
+<text text-anchor="start" x="467.5" y="-130.667" font-family="Times Roman,serif" font-size="10.00">GCCGT</text>
+</g>
+<!-- 15,1->16,2 -->
+<g id="edge590" class="edge"><title>15,1->16,2</title>
+<path fill="none" stroke="#218559" d="M384.398,-250.707C401.265,-238.305 418,-226 418,-226 418,-226 434.96,-205.956 451.686,-186.19"/>
+<polygon fill="#218559" stroke="#218559" points="454.506,-188.275 458.294,-178.38 449.162,-183.753 454.506,-188.275"/>
+</g>
+<!-- 15,2->15,1 -->
+<g id="edge596" class="edge"><title>15,2->15,1</title>
+<path fill="none" stroke="#218559" d="M436.933,-282.477C427.29,-282.749 417.041,-282.817 407.091,-282.684"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-279.18 396.867,-282.471 406.792,-286.179 406.937,-279.18"/>
+</g>
+<!-- 15,3 -->
+<g id="node131" class="node"><title>15,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-276" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-287.167" font-family="Times Roman,serif" font-size="10.00">15,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-268 593,-282 644,-282 644,-268 593,-268"/>
+<text text-anchor="start" x="602.5" y="-272.667" font-family="Times Roman,serif" font-size="10.00">GCATC</text>
+<polygon fill="#218559" stroke="#218559" points="593,-254 593,-268 644,-268 644,-254 593,-254"/>
+<text text-anchor="start" x="602" y="-258.667" font-family="Times Roman,serif" font-size="10.00">GATGC</text>
+</g>
+<!-- 15,2->15,3 -->
+<g id="edge594" class="edge"><title>15,2->15,3</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-269.529C540.501,-269.254 550.748,-269.183 560.703,-269.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-272.817 570.933,-269.523 561.007,-265.819 560.864,-272.817"/>
+</g>
+<!-- 14,1 -->
+<g id="node134" class="node"><title>14,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-372" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-383.167" font-family="Times Roman,serif" font-size="10.00">14,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-364 325,-378 376,-378 376,-364 325,-364"/>
+<text text-anchor="start" x="334.5" y="-368.667" font-family="Times Roman,serif" font-size="10.00">GCATC</text>
+<polygon fill="#218559" stroke="#218559" points="325,-350 325,-364 376,-364 376,-350 325,-350"/>
+<text text-anchor="start" x="334" y="-354.667" font-family="Times Roman,serif" font-size="10.00">GATGC</text>
+</g>
+<!-- 15,2->14,1 -->
+<g id="edge592" class="edge"><title>15,2->14,1</title>
+<path fill="none" stroke="#dd1e2f" d="M452.16,-303.016C435.283,-317.336 418,-332 418,-332 418,-332 408.3,-337.706 396.227,-344.808"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="394.425,-341.807 387.58,-349.894 397.974,-347.841 394.425,-341.807"/>
+</g>
+<!-- 15,3->15,2 -->
+<g id="edge602" class="edge"><title>15,3->15,2</title>
+<path fill="none" stroke="#218559" d="M570.933,-282.477C561.29,-282.749 551.041,-282.817 541.091,-282.684"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-279.18 530.867,-282.471 540.792,-286.179 540.937,-279.18"/>
+</g>
+<!-- 15,4 -->
+<g id="node132" class="node"><title>15,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-276" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-287.167" font-family="Times Roman,serif" font-size="10.00">15,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-268 727,-282 778,-282 778,-268 727,-268"/>
+<text text-anchor="start" x="737.5" y="-272.667" font-family="Times Roman,serif" font-size="10.00">CATCT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-254 727,-268 778,-268 778,-254 727,-254"/>
+<text text-anchor="start" x="736" y="-258.667" font-family="Times Roman,serif" font-size="10.00">AGATG</text>
+</g>
+<!-- 15,3->15,4 -->
+<g id="edge598" class="edge"><title>15,3->15,4</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-269.529C674.501,-269.254 684.748,-269.183 694.703,-269.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-272.817 704.933,-269.523 695.007,-265.819 694.864,-272.817"/>
+</g>
+<!-- 16,4 -->
+<g id="node147" class="node"><title>16,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-148" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-159.167" font-family="Times Roman,serif" font-size="10.00">16,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-140 727,-154 778,-154 778,-140 727,-140"/>
+<text text-anchor="start" x="736" y="-144.667" font-family="Times Roman,serif" font-size="10.00">GGCAT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-126 727,-140 778,-140 778,-126 727,-126"/>
+<text text-anchor="start" x="736" y="-130.667" font-family="Times Roman,serif" font-size="10.00">ATGCC</text>
+</g>
+<!-- 15,3->16,4 -->
+<g id="edge600" class="edge"><title>15,3->16,4</title>
+<path fill="none" stroke="#218559" d="M652.398,-250.707C669.265,-238.305 686,-226 686,-226 686,-226 702.96,-205.956 719.686,-186.19"/>
+<polygon fill="#218559" stroke="#218559" points="722.506,-188.275 726.294,-178.38 717.162,-183.753 722.506,-188.275"/>
+</g>
+<!-- 15,4->15,3 -->
+<g id="edge606" class="edge"><title>15,4->15,3</title>
+<path fill="none" stroke="#218559" d="M704.933,-282.477C695.29,-282.749 685.041,-282.817 675.091,-282.684"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-279.18 664.867,-282.471 674.792,-286.179 674.937,-279.18"/>
+</g>
+<!-- 14,3 -->
+<g id="node136" class="node"><title>14,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-372" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-383.167" font-family="Times Roman,serif" font-size="10.00">14,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-364 593,-378 644,-378 644,-364 593,-364"/>
+<text text-anchor="start" x="604" y="-368.667" font-family="Times Roman,serif" font-size="10.00">ATCTC</text>
+<polygon fill="#218559" stroke="#218559" points="593,-350 593,-364 644,-364 644,-350 593,-350"/>
+<text text-anchor="start" x="602" y="-354.667" font-family="Times Roman,serif" font-size="10.00">GAGAT</text>
+</g>
+<!-- 15,4->14,3 -->
+<g id="edge604" class="edge"><title>15,4->14,3</title>
+<path fill="none" stroke="#dd1e2f" d="M720.16,-303.016C703.283,-317.336 686,-332 686,-332 686,-332 676.3,-337.706 664.227,-344.808"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="662.425,-341.807 655.58,-349.894 665.974,-347.841 662.425,-341.807"/>
+</g>
+<!-- 14,1->15,2 -->
+<g id="edge168" class="edge"><title>14,1->15,2</title>
+<path fill="none" stroke="#218559" d="M384.857,-347.028C402.072,-334.694 422.998,-319.703 441.18,-306.677"/>
+<polygon fill="#218559" stroke="#218559" points="443.227,-309.516 449.317,-300.847 439.15,-303.826 443.227,-309.516"/>
+</g>
+<!-- 14,1->14,2 -->
+<g id="edge166" class="edge"><title>14,1->14,2</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-365.529C406.501,-365.254 416.748,-365.183 426.703,-365.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-368.817 436.933,-365.523 427.007,-361.819 426.864,-368.817"/>
+</g>
+<!-- 14,2->13,1 -->
+<g id="edge170" class="edge"><title>14,2->13,1</title>
+<path fill="none" stroke="#dd1e2f" d="M449.317,-396.847C432.127,-409.162 411.205,-424.152 393.005,-437.191"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="390.948,-434.359 384.857,-443.028 395.025,-440.049 390.948,-434.359"/>
+</g>
+<!-- 14,2->14,1 -->
+<g id="edge174" class="edge"><title>14,2->14,1</title>
+<path fill="none" stroke="#218559" d="M436.933,-378.477C427.29,-378.749 417.041,-378.817 407.091,-378.684"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-375.18 396.867,-378.471 406.792,-382.179 406.937,-375.18"/>
+</g>
+<!-- 14,2->14,3 -->
+<g id="edge172" class="edge"><title>14,2->14,3</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-365.529C540.501,-365.254 550.748,-365.183 560.703,-365.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-368.817 570.933,-365.523 561.007,-361.819 560.864,-368.817"/>
+</g>
+<!-- 14,3->15,4 -->
+<g id="edge178" class="edge"><title>14,3->15,4</title>
+<path fill="none" stroke="#218559" d="M652.857,-347.028C670.072,-334.694 690.998,-319.703 709.18,-306.677"/>
+<polygon fill="#218559" stroke="#218559" points="711.227,-309.516 717.317,-300.847 707.15,-303.826 711.227,-309.516"/>
+</g>
+<!-- 14,3->14,2 -->
+<g id="edge180" class="edge"><title>14,3->14,2</title>
+<path fill="none" stroke="#218559" d="M570.933,-378.477C561.29,-378.749 551.041,-378.817 541.091,-378.684"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-375.18 530.867,-378.471 540.792,-382.179 540.937,-375.18"/>
+</g>
+<!-- 14,3->14,4 -->
+<g id="edge176" class="edge"><title>14,3->14,4</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-365.529C674.501,-365.254 684.748,-365.183 694.703,-365.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-368.817 704.933,-365.523 695.007,-361.819 694.864,-368.817"/>
+</g>
+<!-- 14,4->13,3 -->
+<g id="edge182" class="edge"><title>14,4->13,3</title>
+<path fill="none" stroke="#dd1e2f" d="M717.317,-396.847C700.127,-409.162 679.205,-424.152 661.005,-437.191"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="658.948,-434.359 652.857,-443.028 663.025,-440.049 658.948,-434.359"/>
+</g>
+<!-- 14,4->14,3 -->
+<g id="edge184" class="edge"><title>14,4->14,3</title>
+<path fill="none" stroke="#218559" d="M704.933,-378.477C695.29,-378.749 685.041,-378.817 675.091,-378.684"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-375.18 664.867,-378.471 674.792,-382.179 674.937,-375.18"/>
+</g>
+<!-- 17,1->24,1 -->
+<g id="edge614" class="edge"><title>17,1->24,1</title>
+<path fill="none" stroke="#218559" d="M364.013,-3982.45C378.18,-4017.28 398,-4066 398,-4066 398,-4066 418,-4361 418,-4361 418,-4361 428,-5072 428,-5072 428,-5072 435.283,-5078.24 444.596,-5086.22"/>
+<polygon fill="#218559" stroke="#218559" points="442.529,-5089.06 452.4,-5092.91 447.085,-5083.75 442.529,-5089.06"/>
+</g>
+<!-- 17,1->23,2 -->
+<g id="edge610" class="edge"><title>17,1->23,2</title>
+<path fill="none" stroke="#06a2cb" d="M365.308,-3913.67C384.805,-3869.95 416,-3800 416,-3800 416,-3800 418,-3800 418,-3800 418,-3800 428,-4677 428,-4677 428,-4677 435.283,-4683.24 444.596,-4691.22"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="442.529,-4694.06 452.4,-4697.91 447.085,-4688.75 442.529,-4694.06"/>
+</g>
+<!-- 17,1->17,2 -->
+<g id="edge608" class="edge"><title>17,1->17,2</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-3941.53C406.501,-3941.25 416.748,-3941.18 426.703,-3941.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-3944.82 436.933,-3941.52 427.007,-3937.82 426.864,-3944.82"/>
+</g>
+<!-- 18,3 -->
+<g id="node156" class="node"><title>18,3</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-4463" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-4474.17" font-family="Times Roman,serif" font-size="10.00">18,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-4455 459,-4469 510,-4469 510,-4455 459,-4455"/>
+<text text-anchor="start" x="470" y="-4459.67" font-family="Times Roman,serif" font-size="10.00">TTTCA</text>
+<polygon fill="#218559" stroke="#218559" points="459,-4441 459,-4455 510,-4455 510,-4441 459,-4441"/>
+<text text-anchor="start" x="467.5" y="-4445.67" font-family="Times Roman,serif" font-size="10.00">TGAAA</text>
+</g>
+<!-- 17,1->18,3 -->
+<g id="edge612" class="edge"><title>17,1->18,3</title>
+<path fill="none" stroke="#06a2cb" d="M365.188,-3913.83C379.25,-3882.19 398,-3840 398,-3840 398,-3840 416,-3775 416,-3775 416,-3775 418,-3775 418,-3775 418,-3775 428,-4162 428,-4162 428,-4162 459.813,-4333 475.483,-4417.22"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="472.086,-4418.1 477.356,-4427.29 478.968,-4416.82 472.086,-4418.1"/>
+</g>
+<!-- 17,2->23,1 -->
+<g id="edge620" class="edge"><title>17,2->23,1</title>
+<path fill="none" stroke="#06a2cb" d="M453.052,-3975.63C440.1,-3987.2 428,-3998 428,-3998 428,-3998 416,-4190 416,-4190 416,-4190 398,-4511 398,-4511 398,-4511 374.399,-4616.22 360.296,-4679.1"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="356.808,-4678.66 358.034,-4689.18 363.638,-4680.19 356.808,-4678.66"/>
+</g>
+<!-- 17,2->17,1 -->
+<g id="edge622" class="edge"><title>17,2->17,1</title>
+<path fill="none" stroke="#218559" d="M436.933,-3954.48C427.29,-3954.75 417.041,-3954.82 407.091,-3954.68"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-3951.18 396.867,-3954.47 406.792,-3958.18 406.937,-3951.18"/>
+</g>
+<!-- 17,3 -->
+<g id="node141" class="node"><title>17,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-3948" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-3959.17" font-family="Times Roman,serif" font-size="10.00">17,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-3940 593,-3954 644,-3954 644,-3940 593,-3940"/>
+<text text-anchor="start" x="600.5" y="-3944.67" font-family="Times Roman,serif" font-size="10.00">AACGG</text>
+<polygon fill="#218559" stroke="#218559" points="593,-3926 593,-3940 644,-3940 644,-3926 593,-3926"/>
+<text text-anchor="start" x="603" y="-3930.67" font-family="Times Roman,serif" font-size="10.00">CCGTT</text>
+</g>
+<!-- 17,2->17,3 -->
+<g id="edge618" class="edge"><title>17,2->17,3</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-3941.53C540.501,-3941.25 550.748,-3941.18 560.703,-3941.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-3944.82 570.933,-3941.52 561.007,-3937.82 560.864,-3944.82"/>
+</g>
+<!-- 16,1 -->
+<g id="node144" class="node"><title>16,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-148" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-159.167" font-family="Times Roman,serif" font-size="10.00">16,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-140 325,-154 376,-154 376,-140 325,-140"/>
+<text text-anchor="start" x="332.5" y="-144.667" font-family="Times Roman,serif" font-size="10.00">AACGG</text>
+<polygon fill="#218559" stroke="#218559" points="325,-126 325,-140 376,-140 376,-126 325,-126"/>
+<text text-anchor="start" x="335" y="-130.667" font-family="Times Roman,serif" font-size="10.00">CCGTT</text>
+</g>
+<!-- 17,2->16,1 -->
+<g id="edge616" class="edge"><title>17,2->16,1</title>
+<path fill="none" stroke="#dd1e2f" d="M476.036,-3912.3C460.575,-3843.01 428,-3697 428,-3697 428,-3697 418,-2372 418,-2372 418,-2372 398,-228 398,-228 398,-228 386.635,-209.059 374.965,-189.609"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="377.947,-187.775 369.801,-181.001 371.944,-191.377 377.947,-187.775"/>
+</g>
+<!-- 17,3->17,2 -->
+<g id="edge628" class="edge"><title>17,3->17,2</title>
+<path fill="none" stroke="#218559" d="M570.933,-3954.48C561.29,-3954.75 551.041,-3954.82 541.091,-3954.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-3951.18 530.867,-3954.47 540.792,-3958.18 540.937,-3951.18"/>
+</g>
+<!-- 17,4 -->
+<g id="node142" class="node"><title>17,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-3948" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-3959.17" font-family="Times Roman,serif" font-size="10.00">17,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-3940 727,-3954 778,-3954 778,-3940 727,-3940"/>
+<text text-anchor="start" x="734.5" y="-3944.67" font-family="Times Roman,serif" font-size="10.00">ACGGC</text>
+<polygon fill="#218559" stroke="#218559" points="727,-3926 727,-3940 778,-3940 778,-3926 727,-3926"/>
+<text text-anchor="start" x="735.5" y="-3930.67" font-family="Times Roman,serif" font-size="10.00">GCCGT</text>
+</g>
+<!-- 17,3->17,4 -->
+<g id="edge624" class="edge"><title>17,3->17,4</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-3941.53C674.501,-3941.25 684.748,-3941.18 694.703,-3941.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-3944.82 704.933,-3941.52 695.007,-3937.82 694.864,-3944.82"/>
+</g>
+<!-- 17,3->18,1 -->
+<g id="edge626" class="edge"><title>17,3->18,1</title>
+<path fill="none" stroke="#06a2cb" d="M610.036,-3912.3C594.575,-3843.01 562,-3697 562,-3697 562,-3697 540,-3435 540,-3435 540,-3435 428,-3435 428,-3435 428,-3435 398,-3669 398,-3669 398,-3669 294,-3903 294,-3903 294,-3903 264,-4162 264,-4162 264,-4162 236.799,-4332.57 223.35,-4416.91"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="219.861,-4416.56 221.742,-4426.99 226.773,-4417.67 219.861,-4416.56"/>
+</g>
+<!-- 17,4->17,3 -->
+<g id="edge632" class="edge"><title>17,4->17,3</title>
+<path fill="none" stroke="#218559" d="M704.933,-3954.48C695.29,-3954.75 685.041,-3954.82 675.091,-3954.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-3951.18 664.867,-3954.47 674.792,-3958.18 674.937,-3951.18"/>
+</g>
+<!-- 16,3 -->
+<g id="node146" class="node"><title>16,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-148" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-159.167" font-family="Times Roman,serif" font-size="10.00">16,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-140 593,-154 644,-154 644,-140 593,-140"/>
+<text text-anchor="start" x="600.5" y="-144.667" font-family="Times Roman,serif" font-size="10.00">CGGCA</text>
+<polygon fill="#218559" stroke="#218559" points="593,-126 593,-140 644,-140 644,-126 593,-126"/>
+<text text-anchor="start" x="601.5" y="-130.667" font-family="Times Roman,serif" font-size="10.00">TGCCG</text>
+</g>
+<!-- 17,4->16,3 -->
+<g id="edge630" class="edge"><title>17,4->16,3</title>
+<path fill="none" stroke="#dd1e2f" d="M744.036,-3912.3C728.575,-3843.01 696,-3697 696,-3697 696,-3697 686,-1855 686,-1855 686,-1855 674,-228 674,-228 674,-228 660.245,-208.35 646.369,-188.527"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="649.057,-186.263 640.455,-180.078 643.322,-190.278 649.057,-186.263"/>
+</g>
+<!-- 16,1->17,2 -->
+<g id="edge188" class="edge"><title>16,1->17,2</title>
+<path fill="none" stroke="#218559" d="M369.801,-181.001C383.023,-203.038 398,-228 398,-228 398,-228 428,-3697 428,-3697 428,-3697 457.714,-3830.18 473.853,-3902.52"/>
+<polygon fill="#218559" stroke="#218559" points="470.442,-3903.31 476.036,-3912.3 477.274,-3901.78 470.442,-3903.31"/>
+</g>
+<!-- 16,1->16,2 -->
+<g id="edge186" class="edge"><title>16,1->16,2</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-141.529C406.501,-141.254 416.748,-141.183 426.703,-141.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-144.817 436.933,-141.523 427.007,-137.819 426.864,-144.817"/>
+</g>
+<!-- 16,2->15,1 -->
+<g id="edge190" class="edge"><title>16,2->15,1</title>
+<path fill="none" stroke="#dd1e2f" d="M454.344,-176.328C434.498,-195.285 408.229,-220.378 387.013,-240.644"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="384.553,-238.154 379.739,-247.593 389.388,-243.216 384.553,-238.154"/>
+</g>
+<!-- 16,2->16,1 -->
+<g id="edge194" class="edge"><title>16,2->16,1</title>
+<path fill="none" stroke="#218559" d="M436.933,-154.477C427.29,-154.749 417.041,-154.817 407.091,-154.684"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-151.18 396.867,-154.471 406.792,-158.179 406.937,-151.18"/>
+</g>
+<!-- 16,2->16,3 -->
+<g id="edge192" class="edge"><title>16,2->16,3</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-141.529C540.501,-141.254 550.748,-141.183 560.703,-141.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-144.817 570.933,-141.523 561.007,-137.819 560.864,-144.817"/>
+</g>
+<!-- 16,3->17,4 -->
+<g id="edge198" class="edge"><title>16,3->17,4</title>
+<path fill="none" stroke="#218559" d="M640.455,-180.078C656.034,-202.335 674,-228 674,-228 674,-228 696,-3697 696,-3697 696,-3697 725.714,-3830.18 741.853,-3902.52"/>
+<polygon fill="#218559" stroke="#218559" points="738.442,-3903.31 744.036,-3912.3 745.274,-3901.78 738.442,-3903.31"/>
+</g>
+<!-- 16,3->16,2 -->
+<g id="edge200" class="edge"><title>16,3->16,2</title>
+<path fill="none" stroke="#218559" d="M570.933,-154.477C561.29,-154.749 551.041,-154.817 541.091,-154.684"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-151.18 530.867,-154.471 540.792,-158.179 540.937,-151.18"/>
+</g>
+<!-- 16,3->16,4 -->
+<g id="edge196" class="edge"><title>16,3->16,4</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-141.529C674.501,-141.254 684.748,-141.183 694.703,-141.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-144.817 704.933,-141.523 695.007,-137.819 694.864,-144.817"/>
+</g>
+<!-- 16,4->15,3 -->
+<g id="edge202" class="edge"><title>16,4->15,3</title>
+<path fill="none" stroke="#dd1e2f" d="M722.344,-176.328C702.498,-195.285 676.229,-220.378 655.013,-240.644"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="652.553,-238.154 647.739,-247.593 657.388,-243.216 652.553,-238.154"/>
+</g>
+<!-- 16,4->16,3 -->
+<g id="edge204" class="edge"><title>16,4->16,3</title>
+<path fill="none" stroke="#218559" d="M704.933,-154.477C695.29,-154.749 685.041,-154.817 675.091,-154.684"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-151.18 664.867,-154.471 674.792,-158.179 674.937,-151.18"/>
+</g>
+<!-- 19,1->20,3 -->
+<g id="edge636" class="edge"><title>19,1->20,3</title>
+<path fill="none" stroke="#06a2cb" d="M71.2944,-5180.29C93.6974,-5070.61 160,-4746 160,-4746 160,-4746 294,-4541 294,-4541 294,-4541 398,-4511 398,-4511 398,-4511 451.557,-4263.76 474.142,-4159.51"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="477.596,-4160.09 476.292,-4149.58 470.755,-4158.61 477.596,-4160.09"/>
+</g>
+<!-- 19,2 -->
+<g id="node150" class="node"><title>19,2</title>
+<ellipse fill="none" stroke="black" cx="216" cy="-5216" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="193.5" y="-5227.17" font-family="Times Roman,serif" font-size="10.00">19,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="191,-5208 191,-5222 242,-5222 242,-5208 191,-5208"/>
+<text text-anchor="start" x="201" y="-5212.67" font-family="Times Roman,serif" font-size="10.00">ATTGA</text>
+<polygon fill="#218559" stroke="#218559" points="191,-5194 191,-5208 242,-5208 242,-5194 191,-5194"/>
+<text text-anchor="start" x="201.5" y="-5198.67" font-family="Times Roman,serif" font-size="10.00">TCAAT</text>
+</g>
+<!-- 19,1->19,2 -->
+<g id="edge634" class="edge"><title>19,1->19,2</title>
+<path fill="none" stroke="#dd1e2f" d="M110.845,-5209.71C126.015,-5209.21 143.011,-5209.12 158.843,-5209.44"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="158.958,-5212.94 169.048,-5209.71 159.144,-5205.94 158.958,-5212.94"/>
+</g>
+<!-- 19,2->19,1 -->
+<g id="edge642" class="edge"><title>19,2->19,1</title>
+<path fill="none" stroke="#218559" d="M169.048,-5222.29C153.87,-5222.79 136.872,-5222.88 121.045,-5222.56"/>
+<polygon fill="#218559" stroke="#218559" points="120.935,-5219.06 110.845,-5222.29 120.748,-5226.05 120.935,-5219.06"/>
+</g>
+<!-- 19,2->19,3 -->
+<g id="edge638" class="edge"><title>19,2->19,3</title>
+<path fill="none" stroke="#dd1e2f" d="M262.867,-5209.53C272.501,-5209.25 282.748,-5209.18 292.703,-5209.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="292.864,-5212.82 302.933,-5209.52 293.007,-5205.82 292.864,-5212.82"/>
+</g>
+<!-- 19,2->18,4 -->
+<g id="edge640" class="edge"><title>19,2->18,4</title>
+<path fill="none" stroke="#ebb035" d="M227.425,-5180.84C249.091,-5114.18 294,-4976 294,-4976 294,-4976 540,-4801 540,-4801 540,-4801 562,-4513 562,-4513 562,-4513 569.684,-4506.14 579.359,-4497.5"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="581.923,-4499.9 587.052,-4490.63 577.261,-4494.68 581.923,-4499.9"/>
+</g>
+<!-- 19,3->24,1 -->
+<g id="edge644" class="edge"><title>19,3->24,1</title>
+<path fill="none" stroke="#dd1e2f" d="M383,-5190C399.5,-5177 416,-5164 416,-5164 416,-5164 426.392,-5157.28 439.051,-5149.08"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="441.117,-5151.92 447.611,-5143.55 437.314,-5146.04 441.117,-5151.92"/>
+</g>
+<!-- 19,3->20,1 -->
+<g id="edge648" class="edge"><title>19,3->20,1</title>
+<path fill="none" stroke="#06a2cb" d="M336.444,-5181.14C319.871,-5138.52 294,-5072 294,-5072 294,-5072 264,-4415 264,-4415 264,-4415 236.799,-4244.43 223.35,-4160.09"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="226.773,-4159.33 221.742,-4150.01 219.861,-4160.44 226.773,-4159.33"/>
+</g>
+<!-- 19,3->19,2 -->
+<g id="edge650" class="edge"><title>19,3->19,2</title>
+<path fill="none" stroke="#218559" d="M302.933,-5222.48C293.29,-5222.75 283.041,-5222.82 273.091,-5222.68"/>
+<polygon fill="#218559" stroke="#218559" points="272.937,-5219.18 262.867,-5222.47 272.792,-5226.18 272.937,-5219.18"/>
+</g>
+<!-- 19,3->19,4 -->
+<g id="edge646" class="edge"><title>19,3->19,4</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-5209.53C406.501,-5209.25 416.748,-5209.18 426.703,-5209.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-5212.82 436.933,-5209.52 427.007,-5205.82 426.864,-5212.82"/>
+</g>
+<!-- 19,4->24,2 -->
+<g id="edge654" class="edge"><title>19,4->24,2</title>
+<path fill="none" stroke="#dd1e2f" d="M531.615,-5217.44C542.023,-5217.76 550,-5218 550,-5218 550,-5218 552,-5218 552,-5218 552,-5218 572.47,-5187.6 590.61,-5160.67"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593.706,-5162.34 596.389,-5152.09 587.9,-5158.43 593.706,-5162.34"/>
+</g>
+<!-- 19,4->23,1 -->
+<g id="edge658" class="edge"><title>19,4->23,1</title>
+<path fill="none" stroke="#ebb035" d="M452.4,-5188.91C439.697,-5178.03 428,-5168 428,-5168 428,-5168 398,-4976 398,-4976 398,-4976 372.635,-4843.36 358.782,-4770.92"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="362.142,-4769.86 356.826,-4760.7 355.267,-4771.18 362.142,-4769.86"/>
+</g>
+<!-- 19,4->8,3 -->
+<g id="edge652" class="edge"><title>19,4->8,3</title>
+<path fill="none" stroke="#dd1e2f" d="M515.6,-5188.91C528.303,-5178.03 540,-5168 540,-5168 540,-5168 562,-5072 562,-5072 562,-5072 569.944,-5065.19 579.849,-5056.7"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="582.391,-5059.13 587.705,-5049.97 577.835,-5053.82 582.391,-5059.13"/>
+</g>
+<!-- 19,4->19,3 -->
+<g id="edge660" class="edge"><title>19,4->19,3</title>
+<path fill="none" stroke="#218559" d="M436.933,-5222.48C427.29,-5222.75 417.041,-5222.82 407.091,-5222.68"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-5219.18 396.867,-5222.47 406.792,-5226.18 406.937,-5219.18"/>
+</g>
+<!-- 19,4->18,2 -->
+<g id="edge656" class="edge"><title>19,4->18,2</title>
+<path fill="none" stroke="#ebb035" d="M452.4,-5188.91C439.697,-5178.03 428,-5168 428,-5168 428,-5168 398,-4677 398,-4677 398,-4677 374.399,-4571.78 360.296,-4508.9"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="363.638,-4507.81 358.034,-4498.82 356.808,-4509.34 363.638,-4507.81"/>
+</g>
+<!-- 18,1->23,1 -->
+<g id="edge206" class="edge"><title>18,1->23,1</title>
+<path fill="none" stroke="#dd1e2f" d="M233.244,-4496.72C257.516,-4544.17 301.748,-4630.66 328.249,-4682.47"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325.153,-4684.1 332.822,-4691.41 331.385,-4680.92 325.153,-4684.1"/>
+</g>
+<!-- 18,1->17,3 -->
+<g id="edge210" class="edge"><title>18,1->17,3</title>
+<path fill="none" stroke="#06a2cb" d="M221.742,-4426.99C234.483,-4347.1 264,-4162 264,-4162 264,-4162 294,-3812 294,-3812 294,-3812 428,-3405 428,-3405 428,-3405 552,-3405 552,-3405 552,-3405 562,-3697 562,-3697 562,-3697 591.714,-3830.18 607.853,-3902.52"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="604.442,-3903.31 610.036,-3912.3 611.274,-3901.78 604.442,-3903.31"/>
+</g>
+<!-- 18,1->18,2 -->
+<g id="edge208" class="edge"><title>18,1->18,2</title>
+<path fill="none" stroke="#dd1e2f" d="M262.867,-4456.53C272.501,-4456.25 282.748,-4456.18 292.703,-4456.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="292.864,-4459.82 302.933,-4456.52 293.007,-4452.82 292.864,-4459.82"/>
+</g>
+<!-- 18,2->7,1 -->
+<g id="edge212" class="edge"><title>18,2->7,1</title>
+<path fill="none" stroke="#dd1e2f" d="M368.035,-4429.39C387.576,-4392.97 416,-4340 416,-4340 416,-4340 418,-4340 418,-4340 418,-4340 428,-4511 428,-4511 428,-4511 447.673,-4552.45 463.699,-4586.22"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="460.653,-4587.97 468.102,-4595.5 466.977,-4584.97 460.653,-4587.97"/>
+</g>
+<!-- 18,2->19,4 -->
+<g id="edge216" class="edge"><title>18,2->19,4</title>
+<path fill="none" stroke="#ebb035" d="M371.611,-4430.91C390.859,-4402.33 416,-4365 416,-4365 416,-4365 418,-4365 418,-4365 418,-4365 428,-5168 428,-5168 428,-5168 435.283,-5174.24 444.596,-5182.22"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="442.529,-5185.06 452.4,-5188.91 447.085,-5179.75 442.529,-5185.06"/>
+</g>
+<!-- 18,2->18,1 -->
+<g id="edge218" class="edge"><title>18,2->18,1</title>
+<path fill="none" stroke="#218559" d="M302.933,-4469.48C293.29,-4469.75 283.041,-4469.82 273.091,-4469.68"/>
+<polygon fill="#218559" stroke="#218559" points="272.937,-4466.18 262.867,-4469.47 272.792,-4473.18 272.937,-4466.18"/>
+</g>
+<!-- 18,2->18,3 -->
+<g id="edge214" class="edge"><title>18,2->18,3</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-4456.53C406.501,-4456.25 416.748,-4456.18 426.703,-4456.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-4459.82 436.933,-4456.52 427.007,-4452.82 426.864,-4459.82"/>
+</g>
+<!-- 18,3->17,1 -->
+<g id="edge222" class="edge"><title>18,3->17,1</title>
+<path fill="none" stroke="#06a2cb" d="M477.356,-4427.29C462.522,-4347.56 428,-4162 428,-4162 428,-4162 418,-3750 418,-3750 418,-3750 416,-3750 416,-3750 416,-3750 398,-3840 398,-3840 398,-3840 382.733,-3874.35 369.42,-3904.31"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="366.051,-3903.27 365.188,-3913.83 372.447,-3906.11 366.051,-3903.27"/>
+</g>
+<!-- 18,3->18,2 -->
+<g id="edge224" class="edge"><title>18,3->18,2</title>
+<path fill="none" stroke="#218559" d="M436.933,-4469.48C427.29,-4469.75 417.041,-4469.82 407.091,-4469.68"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-4466.18 396.867,-4469.47 406.792,-4473.18 406.937,-4466.18"/>
+</g>
+<!-- 18,3->18,4 -->
+<g id="edge220" class="edge"><title>18,3->18,4</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-4456.53C540.501,-4456.25 550.748,-4456.18 560.703,-4456.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-4459.82 570.933,-4456.52 561.007,-4452.82 560.864,-4459.82"/>
+</g>
+<!-- 18,4->24,1 -->
+<g id="edge228" class="edge"><title>18,4->24,1</title>
+<path fill="none" stroke="#06a2cb" d="M601.802,-4497.13C585.264,-4531.98 562,-4581 562,-4581 562,-4581 550,-4848 550,-4848 550,-4848 540,-5072 540,-5072 540,-5072 532.717,-5078.24 523.404,-5086.22"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="520.915,-5083.75 515.6,-5092.91 525.471,-5089.06 520.915,-5083.75"/>
+</g>
+<!-- 18,4->19,2 -->
+<g id="edge226" class="edge"><title>18,4->19,2</title>
+<path fill="none" stroke="#ebb035" d="M587.052,-4490.63C574.1,-4502.2 562,-4513 562,-4513 562,-4513 550,-4556 550,-4556 550,-4556 540,-4773 540,-4773 540,-4773 282,-4836 282,-4836 282,-4836 241.471,-5069.35 223.932,-5170.33"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="220.472,-5169.8 222.209,-5180.25 227.369,-5171 220.472,-5169.8"/>
+</g>
+<!-- 18,4->18,3 -->
+<g id="edge230" class="edge"><title>18,4->18,3</title>
+<path fill="none" stroke="#218559" d="M570.933,-4469.48C561.29,-4469.75 551.041,-4469.82 541.091,-4469.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-4466.18 530.867,-4469.47 540.792,-4473.18 540.937,-4466.18"/>
+</g>
+<!-- 31,1 -->
+<g id="node159" class="node"><title>31,1</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-1732" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-1743.17" font-family="Times Roman,serif" font-size="10.00">31,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-1724 593,-1738 644,-1738 644,-1724 593,-1724"/>
+<text text-anchor="start" x="602" y="-1728.67" font-family="Times Roman,serif" font-size="10.00">GCGTC</text>
+<polygon fill="#218559" stroke="#218559" points="593,-1710 593,-1724 644,-1724 644,-1710 593,-1710"/>
+<text text-anchor="start" x="600.5" y="-1714.67" font-family="Times Roman,serif" font-size="10.00">GACGC</text>
+</g>
+<!-- 31,2 -->
+<g id="node160" class="node"><title>31,2</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-1732" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-1743.17" font-family="Times Roman,serif" font-size="10.00">31,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-1724 727,-1738 778,-1738 778,-1724 727,-1724"/>
+<text text-anchor="start" x="736" y="-1728.67" font-family="Times Roman,serif" font-size="10.00">CGTCA</text>
+<polygon fill="#218559" stroke="#218559" points="727,-1710 727,-1724 778,-1724 778,-1710 727,-1710"/>
+<text text-anchor="start" x="735.5" y="-1714.67" font-family="Times Roman,serif" font-size="10.00">TGACG</text>
+</g>
+<!-- 31,1->31,2 -->
+<g id="edge786" class="edge"><title>31,1->31,2</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-1725.53C674.501,-1725.25 684.748,-1725.18 694.703,-1725.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-1728.82 704.933,-1725.52 695.007,-1721.82 694.864,-1728.82"/>
+</g>
+<!-- 32,2 -->
+<g id="node195" class="node"><title>32,2</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-1974" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-1985.17" font-family="Times Roman,serif" font-size="10.00">32,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-1966 727,-1980 778,-1980 778,-1966 727,-1966"/>
+<text text-anchor="start" x="735.5" y="-1970.67" font-family="Times Roman,serif" font-size="10.00">AGCGT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-1952 727,-1966 778,-1966 778,-1952 727,-1952"/>
+<text text-anchor="start" x="736" y="-1956.67" font-family="Times Roman,serif" font-size="10.00">ACGCT</text>
+</g>
+<!-- 31,1->32,2 -->
+<g id="edge788" class="edge"><title>31,1->32,2</title>
+<path fill="none" stroke="#218559" d="M636.491,-1765.39C660.503,-1808.76 702.432,-1884.48 728.624,-1931.78"/>
+<polygon fill="#218559" stroke="#218559" points="725.577,-1933.51 733.483,-1940.56 731.701,-1930.12 725.577,-1933.51"/>
+</g>
+<!-- 31,2->31,1 -->
+<g id="edge796" class="edge"><title>31,2->31,1</title>
+<path fill="none" stroke="#218559" d="M704.933,-1738.48C695.29,-1738.75 685.041,-1738.82 675.091,-1738.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-1735.18 664.867,-1738.47 674.792,-1742.18 674.937,-1735.18"/>
+</g>
+<!-- 31,2->31,3 -->
+<g id="edge792" class="edge"><title>31,2->31,3</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-1725.53C808.501,-1725.25 818.748,-1725.18 828.703,-1725.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-1728.82 838.933,-1725.52 829.007,-1721.82 828.864,-1728.82"/>
+</g>
+<!-- 31,2->30,1 -->
+<g id="edge790" class="edge"><title>31,2->30,1</title>
+<path fill="none" stroke="#dd1e2f" d="M740.329,-1767.36C721.134,-1825.52 685,-1935 685,-1935 685,-1935 685,-2013 685,-2013 685,-2013 709.962,-2060.32 729.413,-2097.19"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="726.459,-2099.09 734.221,-2106.3 732.651,-2095.82 726.459,-2099.09"/>
+</g>
+<!-- 31,2->30,4 -->
+<g id="edge794" class="edge"><title>31,2->30,4</title>
+<path fill="none" stroke="#ebb035" d="M783.6,-1704.91C796.303,-1694.03 808,-1684 808,-1684 808,-1684 820,-1354 820,-1354 820,-1354 830,-1026 830,-1026 830,-1026 1076,-1026 1076,-1026 1076,-1026 1106,-1492 1106,-1492 1106,-1492 1139.58,-1945.29 1150.57,-2093.63"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1147.09,-2094.06 1151.32,-2103.77 1154.07,-2093.54 1147.09,-2094.06"/>
+</g>
+<!-- 31,3->28,4 -->
+<g id="edge802" class="edge"><title>31,3->28,4</title>
+<path fill="none" stroke="#218559" d="M896.271,-1767.58C912.438,-1823.59 942,-1926 942,-1926 942,-1926 964,-2546 964,-2546 964,-2546 990.308,-2637.14 1006.86,-2694.48"/>
+<polygon fill="#218559" stroke="#218559" points="1003.59,-2695.78 1009.73,-2704.42 1010.32,-2693.84 1003.59,-2695.78"/>
+</g>
+<!-- 31,3->31,2 -->
+<g id="edge804" class="edge"><title>31,3->31,2</title>
+<path fill="none" stroke="#218559" d="M838.933,-1738.48C829.29,-1738.75 819.041,-1738.82 809.091,-1738.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-1735.18 798.867,-1738.47 808.792,-1742.18 808.937,-1735.18"/>
+</g>
+<!-- 31,4 -->
+<g id="node162" class="node"><title>31,4</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-1732" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="997.5" y="-1743.17" font-family="Times Roman,serif" font-size="10.00">31,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="995,-1724 995,-1738 1046,-1738 1046,-1724 995,-1724"/>
+<text text-anchor="start" x="1005" y="-1728.67" font-family="Times Roman,serif" font-size="10.00">TCATG</text>
+<polygon fill="#218559" stroke="#218559" points="995,-1710 995,-1724 1046,-1724 1046,-1710 995,-1710"/>
+<text text-anchor="start" x="1004" y="-1714.67" font-family="Times Roman,serif" font-size="10.00">CATGA</text>
+</g>
+<!-- 31,3->31,4 -->
+<g id="edge798" class="edge"><title>31,3->31,4</title>
+<path fill="none" stroke="#dd1e2f" d="M932.867,-1725.53C942.501,-1725.25 952.748,-1725.18 962.703,-1725.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="962.864,-1728.82 972.933,-1725.52 963.007,-1721.82 962.864,-1728.82"/>
+</g>
+<!-- 31,3->32,4 -->
+<g id="edge800" class="edge"><title>31,3->32,4</title>
+<path fill="none" stroke="#218559" d="M917.6,-1704.91C930.303,-1694.03 942,-1684 942,-1684 942,-1684 952,-1293 952,-1293 952,-1293 954,-1293 954,-1293 954,-1293 964,-1780 964,-1780 964,-1780 990.308,-1871.14 1006.86,-1928.48"/>
+<polygon fill="#218559" stroke="#218559" points="1003.59,-1929.78 1009.73,-1938.42 1010.32,-1927.84 1003.59,-1929.78"/>
+</g>
+<!-- 31,4->31,3 -->
+<g id="edge810" class="edge"><title>31,4->31,3</title>
+<path fill="none" stroke="#218559" d="M972.933,-1738.48C963.29,-1738.75 953.041,-1738.82 943.091,-1738.68"/>
+<polygon fill="#218559" stroke="#218559" points="942.937,-1735.18 932.867,-1738.47 942.792,-1742.18 942.937,-1735.18"/>
+</g>
+<!-- 31,4->30,2 -->
+<g id="edge808" class="edge"><title>31,4->30,2</title>
+<path fill="none" stroke="#ebb035" d="M1009.73,-1767.58C993.562,-1823.59 964,-1926 964,-1926 964,-1926 942,-2022 942,-2022 942,-2022 922.563,-2062.96 906.591,-2096.61"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="903.323,-2095.33 902.198,-2105.87 909.647,-2098.34 903.323,-2095.33"/>
+</g>
+<!-- 31,4->30,3 -->
+<g id="edge806" class="edge"><title>31,4->30,3</title>
+<path fill="none" stroke="#dd1e2f" d="M1008.33,-1767.36C989.134,-1825.52 953,-1935 953,-1935 953,-1935 953,-2013 953,-2013 953,-2013 977.962,-2060.32 997.413,-2097.19"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="994.459,-2099.09 1002.22,-2106.3 1000.65,-2095.82 994.459,-2099.09"/>
+</g>
+<!-- 30,1->29,3 -->
+<g id="edge358" class="edge"><title>30,1->29,3</title>
+<path fill="none" stroke="#06a2cb" d="M782.948,-2112.37C795.9,-2100.8 808,-2090 808,-2090 808,-2090 818,-1896 818,-1896 818,-1896 942,-1848 942,-1848 942,-1848 952,-1368 952,-1368 952,-1368 954,-1368 954,-1368 954,-1368 964,-2188 964,-2188 964,-2188 983.437,-2228.96 999.409,-2262.61"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="996.353,-2264.34 1003.8,-2271.87 1002.68,-2261.33 996.353,-2264.34"/>
+</g>
+<!-- 30,1->31,2 -->
+<g id="edge360" class="edge"><title>30,1->31,2</title>
+<path fill="none" stroke="#218559" d="M734.221,-2106.3C714.372,-2068.68 685,-2013 685,-2013 685,-2013 685,-1935 685,-1935 685,-1935 717.423,-1836.76 737.159,-1776.97"/>
+<polygon fill="#218559" stroke="#218559" points="740.519,-1777.95 740.329,-1767.36 733.872,-1775.76 740.519,-1777.95"/>
+</g>
+<!-- 30,1->30,2 -->
+<g id="edge356" class="edge"><title>30,1->30,2</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-2133.53C808.501,-2133.25 818.748,-2133.18 828.703,-2133.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-2136.82 838.933,-2133.52 829.007,-2129.82 828.864,-2136.82"/>
+</g>
+<!-- 30,2->29,1 -->
+<g id="edge362" class="edge"><title>30,2->29,1</title>
+<path fill="none" stroke="#dd1e2f" d="M861.016,-2170.95C839.056,-2198.15 807,-2237.87 783.329,-2267.19"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="780.448,-2265.19 776.89,-2275.17 785.895,-2269.58 780.448,-2265.19"/>
+</g>
+<!-- 30,2->31,4 -->
+<g id="edge366" class="edge"><title>30,2->31,4</title>
+<path fill="none" stroke="#ebb035" d="M902.198,-2105.87C918.736,-2071.02 942,-2022 942,-2022 942,-2022 952,-1898 952,-1898 952,-1898 981.876,-1825.07 1002.02,-1775.89"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1005.36,-1776.98 1005.91,-1766.4 998.881,-1774.32 1005.36,-1776.98"/>
+</g>
+<!-- 30,2->30,1 -->
+<g id="edge368" class="edge"><title>30,2->30,1</title>
+<path fill="none" stroke="#218559" d="M838.933,-2146.48C829.29,-2146.75 819.041,-2146.82 809.091,-2146.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-2143.18 798.867,-2146.47 808.792,-2150.18 808.937,-2143.18"/>
+</g>
+<!-- 30,2->30,3 -->
+<g id="edge364" class="edge"><title>30,2->30,3</title>
+<path fill="none" stroke="#dd1e2f" d="M932.867,-2133.53C942.501,-2133.25 952.748,-2133.18 962.703,-2133.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="962.864,-2136.82 972.933,-2133.52 963.007,-2129.82 962.864,-2136.82"/>
+</g>
+<!-- 30,3->29,1 -->
+<g id="edge372" class="edge"><title>30,3->29,1</title>
+<path fill="none" stroke="#06a2cb" d="M1003.8,-2105.87C987.264,-2071.02 964,-2022 964,-2022 964,-2022 954,-1876 954,-1876 954,-1876 952,-1876 952,-1876 952,-1876 942,-2022 942,-2022 942,-2022 830,-2062 830,-2062 830,-2062 808,-2188 808,-2188 808,-2188 788.563,-2228.96 772.591,-2262.61"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="769.323,-2261.33 768.198,-2271.87 775.647,-2264.34 769.323,-2261.33"/>
+</g>
+<!-- 30,3->31,4 -->
+<g id="edge374" class="edge"><title>30,3->31,4</title>
+<path fill="none" stroke="#218559" d="M1002.22,-2106.3C982.372,-2068.68 953,-2013 953,-2013 953,-2013 953,-1935 953,-1935 953,-1935 985.423,-1836.76 1005.16,-1776.97"/>
+<polygon fill="#218559" stroke="#218559" points="1008.52,-1777.95 1008.33,-1767.36 1001.87,-1775.76 1008.52,-1777.95"/>
+</g>
+<!-- 30,3->30,2 -->
+<g id="edge376" class="edge"><title>30,3->30,2</title>
+<path fill="none" stroke="#218559" d="M972.933,-2146.48C963.29,-2146.75 953.041,-2146.82 943.091,-2146.68"/>
+<polygon fill="#218559" stroke="#218559" points="942.937,-2143.18 932.867,-2146.47 942.792,-2150.18 942.937,-2143.18"/>
+</g>
+<!-- 30,3->30,4 -->
+<g id="edge370" class="edge"><title>30,3->30,4</title>
+<path fill="none" stroke="#dd1e2f" d="M1066.87,-2133.53C1076.5,-2133.25 1086.75,-2133.18 1096.7,-2133.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1096.86,-2136.82 1106.93,-2133.52 1097.01,-2129.82 1096.86,-2136.82"/>
+</g>
+<!-- 30,4->29,3 -->
+<g id="edge378" class="edge"><title>30,4->29,3</title>
+<path fill="none" stroke="#dd1e2f" d="M1129.02,-2170.95C1107.06,-2198.15 1075,-2237.87 1051.33,-2267.19"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1048.45,-2265.19 1044.89,-2275.17 1053.89,-2269.58 1048.45,-2265.19"/>
+</g>
+<!-- 30,4->31,2 -->
+<g id="edge380" class="edge"><title>30,4->31,2</title>
+<path fill="none" stroke="#ebb035" d="M1151.32,-2103.77C1141.21,-1967.31 1106,-1492 1106,-1492 1106,-1492 1088,-996 1088,-996 1088,-996 830,-996 830,-996 830,-996 808,-1684 808,-1684 808,-1684 800.717,-1690.24 791.404,-1698.22"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="788.915,-1695.75 783.6,-1704.91 793.471,-1701.06 788.915,-1695.75"/>
+</g>
+<!-- 30,4->30,3 -->
+<g id="edge382" class="edge"><title>30,4->30,3</title>
+<path fill="none" stroke="#218559" d="M1106.93,-2146.48C1097.29,-2146.75 1087.04,-2146.82 1077.09,-2146.68"/>
+<polygon fill="#218559" stroke="#218559" points="1076.94,-2143.18 1066.87,-2146.47 1076.79,-2150.18 1076.94,-2143.18"/>
+</g>
+<!-- 37,1 -->
+<g id="node169" class="node"><title>37,1</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-660" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="997.5" y="-671.167" font-family="Times Roman,serif" font-size="10.00">37,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="995,-652 995,-666 1046,-666 1046,-652 995,-652"/>
+<text text-anchor="start" x="1003.5" y="-656.667" font-family="Times Roman,serif" font-size="10.00">CCACA</text>
+<polygon fill="#218559" stroke="#218559" points="995,-638 995,-652 1046,-652 1046,-638 995,-638"/>
+<text text-anchor="start" x="1004" y="-642.667" font-family="Times Roman,serif" font-size="10.00">TGTGG</text>
+</g>
+<!-- 37,2 -->
+<g id="node170" class="node"><title>37,2</title>
+<ellipse fill="none" stroke="black" cx="1154" cy="-660" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="1131.5" y="-671.167" font-family="Times Roman,serif" font-size="10.00">37,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1129,-652 1129,-666 1180,-666 1180,-652 1129,-652"/>
+<text text-anchor="start" x="1137.5" y="-656.667" font-family="Times Roman,serif" font-size="10.00">CACAC</text>
+<polygon fill="#218559" stroke="#218559" points="1129,-638 1129,-652 1180,-652 1180,-638 1129,-638"/>
+<text text-anchor="start" x="1138" y="-642.667" font-family="Times Roman,serif" font-size="10.00">GTGTG</text>
+</g>
+<!-- 37,1->37,2 -->
+<g id="edge860" class="edge"><title>37,1->37,2</title>
+<path fill="none" stroke="#dd1e2f" d="M1066.87,-653.529C1076.5,-653.254 1086.75,-653.183 1096.7,-653.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1096.86,-656.817 1106.93,-653.523 1097.01,-649.819 1096.86,-656.817"/>
+</g>
+<!-- 37,2->37,1 -->
+<g id="edge866" class="edge"><title>37,2->37,1</title>
+<path fill="none" stroke="#218559" d="M1106.93,-666.477C1097.29,-666.749 1087.04,-666.817 1077.09,-666.684"/>
+<polygon fill="#218559" stroke="#218559" points="1076.94,-663.18 1066.87,-666.471 1076.79,-670.179 1076.94,-663.18"/>
+</g>
+<!-- 37,3 -->
+<g id="node171" class="node"><title>37,3</title>
+<ellipse fill="none" stroke="black" cx="1288" cy="-660" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="1265.5" y="-671.167" font-family="Times Roman,serif" font-size="10.00">37,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1263,-652 1263,-666 1314,-666 1314,-652 1263,-652"/>
+<text text-anchor="start" x="1271" y="-656.667" font-family="Times Roman,serif" font-size="10.00">ACACG</text>
+<polygon fill="#218559" stroke="#218559" points="1263,-638 1263,-652 1314,-652 1314,-638 1263,-638"/>
+<text text-anchor="start" x="1272.5" y="-642.667" font-family="Times Roman,serif" font-size="10.00">CGTGT</text>
+</g>
+<!-- 37,2->37,3 -->
+<g id="edge862" class="edge"><title>37,2->37,3</title>
+<path fill="none" stroke="#dd1e2f" d="M1200.87,-653.529C1210.5,-653.254 1220.75,-653.183 1230.7,-653.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1230.86,-656.817 1240.93,-653.523 1231.01,-649.819 1230.86,-656.817"/>
+</g>
+<!-- 36,4 -->
+<g id="node177" class="node"><title>36,4</title>
+<ellipse fill="none" stroke="black" cx="1288" cy="-1074" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="1265.5" y="-1085.17" font-family="Times Roman,serif" font-size="10.00">36,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1263,-1066 1263,-1080 1314,-1080 1314,-1066 1263,-1066"/>
+<text text-anchor="start" x="1272.5" y="-1070.67" font-family="Times Roman,serif" font-size="10.00">CGTGT</text>
+<polygon fill="#218559" stroke="#218559" points="1263,-1052 1263,-1066 1314,-1066 1314,-1052 1263,-1052"/>
+<text text-anchor="start" x="1271" y="-1056.67" font-family="Times Roman,serif" font-size="10.00">ACACG</text>
+</g>
+<!-- 37,2->36,4 -->
+<g id="edge864" class="edge"><title>37,2->36,4</title>
+<path fill="none" stroke="#ebb035" d="M1165.42,-695.297C1189.75,-770.458 1246.64,-946.204 1273.48,-1029.15"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1270.19,-1030.36 1276.6,-1038.79 1276.85,-1028.2 1270.19,-1030.36"/>
+</g>
+<!-- 37,3->37,2 -->
+<g id="edge870" class="edge"><title>37,3->37,2</title>
+<path fill="none" stroke="#218559" d="M1240.93,-666.477C1231.29,-666.749 1221.04,-666.817 1211.09,-666.684"/>
+<polygon fill="#218559" stroke="#218559" points="1210.94,-663.18 1200.87,-666.471 1210.79,-670.179 1210.94,-663.18"/>
+</g>
+<!-- 37,4 -->
+<g id="node172" class="node"><title>37,4</title>
+<ellipse fill="none" stroke="black" cx="1422" cy="-660" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="1399.5" y="-671.167" font-family="Times Roman,serif" font-size="10.00">37,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1397,-652 1397,-666 1448,-666 1448,-652 1397,-652"/>
+<text text-anchor="start" x="1405" y="-656.667" font-family="Times Roman,serif" font-size="10.00">CACGC</text>
+<polygon fill="#218559" stroke="#218559" points="1397,-638 1397,-652 1448,-652 1448,-638 1397,-638"/>
+<text text-anchor="start" x="1405" y="-642.667" font-family="Times Roman,serif" font-size="10.00">GCGTG</text>
+</g>
+<!-- 37,3->37,4 -->
+<g id="edge868" class="edge"><title>37,3->37,4</title>
+<path fill="none" stroke="#dd1e2f" d="M1334.87,-653.529C1344.5,-653.254 1354.75,-653.183 1364.7,-653.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1364.86,-656.817 1374.93,-653.523 1365.01,-649.819 1364.86,-656.817"/>
+</g>
+<!-- 37,4->37,3 -->
+<g id="edge874" class="edge"><title>37,4->37,3</title>
+<path fill="none" stroke="#218559" d="M1374.93,-666.477C1365.29,-666.749 1355.04,-666.817 1345.09,-666.684"/>
+<polygon fill="#218559" stroke="#218559" points="1344.94,-663.18 1334.87,-666.471 1344.79,-670.179 1344.94,-663.18"/>
+</g>
+<!-- 36,2 -->
+<g id="node175" class="node"><title>36,2</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-1074" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="997.5" y="-1085.17" font-family="Times Roman,serif" font-size="10.00">36,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="995,-1066 995,-1080 1046,-1080 1046,-1066 995,-1066"/>
+<text text-anchor="start" x="1003.5" y="-1070.67" font-family="Times Roman,serif" font-size="10.00">AGCGT</text>
+<polygon fill="#218559" stroke="#218559" points="995,-1052 995,-1066 1046,-1066 1046,-1052 995,-1052"/>
+<text text-anchor="start" x="1004" y="-1056.67" font-family="Times Roman,serif" font-size="10.00">ACGCT</text>
+</g>
+<!-- 37,4->36,2 -->
+<g id="edge872" class="edge"><title>37,4->36,2</title>
+<path fill="none" stroke="#ebb035" d="M1404.3,-693.583C1381.29,-737.25 1344,-808 1344,-808 1344,-808 1210,-946 1210,-946 1210,-946 1120.23,-1006.48 1064.1,-1044.29"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1062.08,-1041.43 1055.74,-1049.92 1065.99,-1047.24 1062.08,-1041.43"/>
+</g>
+<!-- 36,1 -->
+<g id="node174" class="node"><title>36,1</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-1074" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-1085.17" font-family="Times Roman,serif" font-size="10.00">36,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-1066 861,-1080 912,-1080 912,-1066 861,-1066"/>
+<text text-anchor="start" x="868.5" y="-1070.67" font-family="Times Roman,serif" font-size="10.00">AAGCG</text>
+<polygon fill="#218559" stroke="#218559" points="861,-1052 861,-1066 912,-1066 912,-1052 861,-1052"/>
+<text text-anchor="start" x="870.5" y="-1056.67" font-family="Times Roman,serif" font-size="10.00">CGCTT</text>
+</g>
+<!-- 36,1->36,2 -->
+<g id="edge434" class="edge"><title>36,1->36,2</title>
+<path fill="none" stroke="#dd1e2f" d="M932.867,-1067.53C942.501,-1067.25 952.748,-1067.18 962.703,-1067.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="962.864,-1070.82 972.933,-1067.52 963.007,-1063.82 962.864,-1070.82"/>
+</g>
+<!-- 35,3 -->
+<g id="node181" class="node"><title>35,3</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-1636" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-1647.17" font-family="Times Roman,serif" font-size="10.00">35,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-1628 727,-1642 778,-1642 778,-1628 727,-1628"/>
+<text text-anchor="start" x="737" y="-1632.67" font-family="Times Roman,serif" font-size="10.00">GCTTA</text>
+<polygon fill="#218559" stroke="#218559" points="727,-1614 727,-1628 778,-1628 778,-1614 727,-1614"/>
+<text text-anchor="start" x="736" y="-1618.67" font-family="Times Roman,serif" font-size="10.00">TAAGC</text>
+</g>
+<!-- 36,1->35,3 -->
+<g id="edge438" class="edge"><title>36,1->35,3</title>
+<path fill="none" stroke="#06a2cb" d="M854.4,-1101.09C841.697,-1111.97 830,-1122 830,-1122 830,-1122 818,-1321 818,-1321 818,-1321 808,-1588 808,-1588 808,-1588 800.717,-1594.24 791.404,-1602.22"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="788.915,-1599.75 783.6,-1608.91 793.471,-1605.06 788.915,-1599.75"/>
+</g>
+<!-- 33,3 -->
+<g id="node191" class="node"><title>33,3</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-1540" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-1551.17" font-family="Times Roman,serif" font-size="10.00">33,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-1532 727,-1546 778,-1546 778,-1532 727,-1532"/>
+<text text-anchor="start" x="737" y="-1536.67" font-family="Times Roman,serif" font-size="10.00">GCTTA</text>
+<polygon fill="#218559" stroke="#218559" points="727,-1518 727,-1532 778,-1532 778,-1518 727,-1518"/>
+<text text-anchor="start" x="736" y="-1522.67" font-family="Times Roman,serif" font-size="10.00">TAAGC</text>
+</g>
+<!-- 36,1->33,3 -->
+<g id="edge436" class="edge"><title>36,1->33,3</title>
+<path fill="none" stroke="#06a2cb" d="M854.4,-1101.09C841.697,-1111.97 830,-1122 830,-1122 830,-1122 780.859,-1385.34 760.587,-1493.98"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="757.112,-1493.53 758.718,-1504 763.993,-1494.81 757.112,-1493.53"/>
+</g>
+<!-- 36,2->37,4 -->
+<g id="edge442" class="edge"><title>36,2->37,4</title>
+<path fill="none" stroke="#ebb035" d="M1048.27,-1044.89C1119.77,-971.25 1307.74,-777.671 1386.49,-696.569"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1389.31,-698.692 1393.76,-689.08 1384.29,-693.816 1389.31,-698.692"/>
+</g>
+<!-- 36,2->36,1 -->
+<g id="edge444" class="edge"><title>36,2->36,1</title>
+<path fill="none" stroke="#218559" d="M972.933,-1080.48C963.29,-1080.75 953.041,-1080.82 943.091,-1080.68"/>
+<polygon fill="#218559" stroke="#218559" points="942.937,-1077.18 932.867,-1080.47 942.792,-1084.18 942.937,-1077.18"/>
+</g>
+<!-- 36,3 -->
+<g id="node176" class="node"><title>36,3</title>
+<ellipse fill="none" stroke="black" cx="1154" cy="-1074" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="1131.5" y="-1085.17" font-family="Times Roman,serif" font-size="10.00">36,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1129,-1066 1129,-1080 1180,-1080 1180,-1066 1129,-1066"/>
+<text text-anchor="start" x="1137" y="-1070.67" font-family="Times Roman,serif" font-size="10.00">GCGTG</text>
+<polygon fill="#218559" stroke="#218559" points="1129,-1052 1129,-1066 1180,-1066 1180,-1052 1129,-1052"/>
+<text text-anchor="start" x="1137" y="-1056.67" font-family="Times Roman,serif" font-size="10.00">CACGC</text>
+</g>
+<!-- 36,2->36,3 -->
+<g id="edge440" class="edge"><title>36,2->36,3</title>
+<path fill="none" stroke="#dd1e2f" d="M1066.87,-1067.53C1076.5,-1067.25 1086.75,-1067.18 1096.7,-1067.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1096.86,-1070.82 1106.93,-1067.52 1097.01,-1063.82 1096.86,-1070.82"/>
+</g>
+<!-- 36,3->36,2 -->
+<g id="edge448" class="edge"><title>36,3->36,2</title>
+<path fill="none" stroke="#218559" d="M1106.93,-1080.48C1097.29,-1080.75 1087.04,-1080.82 1077.09,-1080.68"/>
+<polygon fill="#218559" stroke="#218559" points="1076.94,-1077.18 1066.87,-1080.47 1076.79,-1084.18 1076.94,-1077.18"/>
+</g>
+<!-- 36,3->36,4 -->
+<g id="edge446" class="edge"><title>36,3->36,4</title>
+<path fill="none" stroke="#dd1e2f" d="M1200.87,-1067.53C1210.5,-1067.25 1220.75,-1067.18 1230.7,-1067.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1230.86,-1070.82 1240.93,-1067.52 1231.01,-1063.82 1230.86,-1070.82"/>
+</g>
+<!-- 36,4->37,2 -->
+<g id="edge450" class="edge"><title>36,4->37,2</title>
+<path fill="none" stroke="#ebb035" d="M1274.73,-1039.06C1254.94,-986.97 1220,-895 1220,-895 1220,-895 1185.83,-773.346 1166.67,-705.097"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1170.03,-704.117 1163.95,-695.436 1163.29,-706.01 1170.03,-704.117"/>
+</g>
+<!-- 36,4->36,3 -->
+<g id="edge452" class="edge"><title>36,4->36,3</title>
+<path fill="none" stroke="#218559" d="M1240.93,-1080.48C1231.29,-1080.75 1221.04,-1080.82 1211.09,-1080.68"/>
+<polygon fill="#218559" stroke="#218559" points="1210.94,-1077.18 1200.87,-1080.47 1210.79,-1084.18 1210.94,-1077.18"/>
+</g>
+<!-- 35,1 -->
+<g id="node179" class="node"><title>35,1</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-1636" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-1647.17" font-family="Times Roman,serif" font-size="10.00">35,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-1628 459,-1642 510,-1642 510,-1628 459,-1628"/>
+<text text-anchor="start" x="469" y="-1632.67" font-family="Times Roman,serif" font-size="10.00">TCGCT</text>
+<polygon fill="#218559" stroke="#218559" points="459,-1614 459,-1628 510,-1628 510,-1614 459,-1614"/>
+<text text-anchor="start" x="466.5" y="-1618.67" font-family="Times Roman,serif" font-size="10.00">AGCGA</text>
+</g>
+<!-- 35,2 -->
+<g id="node180" class="node"><title>35,2</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-1636" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-1647.17" font-family="Times Roman,serif" font-size="10.00">35,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-1628 593,-1642 644,-1642 644,-1628 593,-1628"/>
+<text text-anchor="start" x="602.5" y="-1632.67" font-family="Times Roman,serif" font-size="10.00">CGCTT</text>
+<polygon fill="#218559" stroke="#218559" points="593,-1614 593,-1628 644,-1628 644,-1614 593,-1614"/>
+<text text-anchor="start" x="600.5" y="-1618.67" font-family="Times Roman,serif" font-size="10.00">AAGCG</text>
+</g>
+<!-- 35,1->35,2 -->
+<g id="edge836" class="edge"><title>35,1->35,2</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-1629.53C540.501,-1629.25 550.748,-1629.18 560.703,-1629.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-1632.82 570.933,-1629.52 561.007,-1625.82 560.864,-1632.82"/>
+</g>
+<!-- 35,2->35,1 -->
+<g id="edge844" class="edge"><title>35,2->35,1</title>
+<path fill="none" stroke="#218559" d="M570.933,-1642.48C561.29,-1642.75 551.041,-1642.82 541.091,-1642.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-1639.18 530.867,-1642.47 540.792,-1646.18 540.937,-1639.18"/>
+</g>
+<!-- 35,2->35,3 -->
+<g id="edge840" class="edge"><title>35,2->35,3</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-1629.53C674.501,-1629.25 684.748,-1629.18 694.703,-1629.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-1632.82 704.933,-1629.52 695.007,-1625.82 694.864,-1632.82"/>
+</g>
+<!-- 34,1 -->
+<g id="node184" class="node"><title>34,1</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-756" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-767.167" font-family="Times Roman,serif" font-size="10.00">34,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-748 593,-762 644,-762 644,-748 593,-748"/>
+<text text-anchor="start" x="603" y="-752.667" font-family="Times Roman,serif" font-size="10.00">GCTTA</text>
+<polygon fill="#218559" stroke="#218559" points="593,-734 593,-748 644,-748 644,-734 593,-734"/>
+<text text-anchor="start" x="602" y="-738.667" font-family="Times Roman,serif" font-size="10.00">TAAGC</text>
+</g>
+<!-- 35,2->34,1 -->
+<g id="edge838" class="edge"><title>35,2->34,1</title>
+<path fill="none" stroke="#dd1e2f" d="M586.07,-1608.84C568.813,-1594.15 551,-1579 551,-1579 551,-1579 551,-1131 551,-1131 551,-1131 591.863,-902.29 609.764,-802.095"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="613.262,-802.416 611.576,-791.957 606.371,-801.185 613.262,-802.416"/>
+</g>
+<!-- 34,4 -->
+<g id="node187" class="node"><title>34,4</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-756" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="997.5" y="-767.167" font-family="Times Roman,serif" font-size="10.00">34,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="995,-748 995,-762 1046,-762 1046,-748 995,-748"/>
+<text text-anchor="start" x="1004" y="-752.667" font-family="Times Roman,serif" font-size="10.00">TAAGC</text>
+<polygon fill="#218559" stroke="#218559" points="995,-734 995,-748 1046,-748 1046,-734 995,-734"/>
+<text text-anchor="start" x="1005" y="-738.667" font-family="Times Roman,serif" font-size="10.00">GCTTA</text>
+</g>
+<!-- 35,2->34,4 -->
+<g id="edge842" class="edge"><title>35,2->34,4</title>
+<path fill="none" stroke="#ebb035" d="M649.6,-1608.91C662.303,-1598.03 674,-1588 674,-1588 674,-1588 696,-1122 696,-1122 696,-1122 808,-1060 808,-1060 808,-1060 830,-992 830,-992 830,-992 942,-911 942,-911 942,-911 975.191,-845.043 998.271,-799.18"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1001.51,-800.521 1002.88,-790.015 995.261,-797.375 1001.51,-800.521"/>
+</g>
+<!-- 35,3->36,1 -->
+<g id="edge848" class="edge"><title>35,3->36,1</title>
+<path fill="none" stroke="#06a2cb" d="M783.6,-1608.91C796.303,-1598.03 808,-1588 808,-1588 808,-1588 830,-1122 830,-1122 830,-1122 837.283,-1115.76 846.596,-1107.78"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="849.085,-1110.25 854.4,-1101.09 844.529,-1104.94 849.085,-1110.25"/>
+</g>
+<!-- 35,3->35,2 -->
+<g id="edge852" class="edge"><title>35,3->35,2</title>
+<path fill="none" stroke="#218559" d="M704.933,-1642.48C695.29,-1642.75 685.041,-1642.82 675.091,-1642.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-1639.18 664.867,-1642.47 674.792,-1646.18 674.937,-1639.18"/>
+</g>
+<!-- 35,4 -->
+<g id="node182" class="node"><title>35,4</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-1636" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-1647.17" font-family="Times Roman,serif" font-size="10.00">35,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-1628 861,-1642 912,-1642 912,-1628 861,-1628"/>
+<text text-anchor="start" x="871" y="-1632.67" font-family="Times Roman,serif" font-size="10.00">CTTAA</text>
+<polygon fill="#218559" stroke="#218559" points="861,-1614 861,-1628 912,-1628 912,-1614 861,-1614"/>
+<text text-anchor="start" x="871" y="-1618.67" font-family="Times Roman,serif" font-size="10.00">TTAAG</text>
+</g>
+<!-- 35,3->35,4 -->
+<g id="edge846" class="edge"><title>35,3->35,4</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-1629.53C808.501,-1629.25 818.748,-1629.18 828.703,-1629.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-1632.82 838.933,-1629.52 829.007,-1625.82 828.864,-1632.82"/>
+</g>
+<!-- 32,1 -->
+<g id="node194" class="node"><title>32,1</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-1974" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-1985.17" font-family="Times Roman,serif" font-size="10.00">32,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-1966 593,-1980 644,-1980 644,-1966 593,-1966"/>
+<text text-anchor="start" x="600.5" y="-1970.67" font-family="Times Roman,serif" font-size="10.00">AAGCG</text>
+<polygon fill="#218559" stroke="#218559" points="593,-1952 593,-1966 644,-1966 644,-1952 593,-1952"/>
+<text text-anchor="start" x="602.5" y="-1956.67" font-family="Times Roman,serif" font-size="10.00">CGCTT</text>
+</g>
+<!-- 35,3->32,1 -->
+<g id="edge850" class="edge"><title>35,3->32,1</title>
+<path fill="none" stroke="#06a2cb" d="M720.4,-1663.09C707.697,-1673.97 696,-1684 696,-1684 696,-1684 686,-1878 686,-1878 686,-1878 665.176,-1907.4 646.575,-1933.66"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="643.567,-1931.85 640.643,-1942.03 649.279,-1935.9 643.567,-1931.85"/>
+</g>
+<!-- 35,4->35,3 -->
+<g id="edge858" class="edge"><title>35,4->35,3</title>
+<path fill="none" stroke="#218559" d="M838.933,-1642.48C829.29,-1642.75 819.041,-1642.82 809.091,-1642.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-1639.18 798.867,-1642.47 808.792,-1646.18 808.937,-1639.18"/>
+</g>
+<!-- 34,2 -->
+<g id="node185" class="node"><title>34,2</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-756" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-767.167" font-family="Times Roman,serif" font-size="10.00">34,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-748 727,-762 778,-762 778,-748 727,-748"/>
+<text text-anchor="start" x="737" y="-752.667" font-family="Times Roman,serif" font-size="10.00">CTTAA</text>
+<polygon fill="#218559" stroke="#218559" points="727,-734 727,-748 778,-748 778,-734 727,-734"/>
+<text text-anchor="start" x="737" y="-738.667" font-family="Times Roman,serif" font-size="10.00">TTAAG</text>
+</g>
+<!-- 35,4->34,2 -->
+<g id="edge856" class="edge"><title>35,4->34,2</title>
+<path fill="none" stroke="#ebb035" d="M854.4,-1608.91C841.697,-1598.03 830,-1588 830,-1588 830,-1588 820,-1343 820,-1343 820,-1343 808,-1122 808,-1122 808,-1122 774.093,-900.394 759.047,-802.057"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="762.495,-801.451 757.523,-792.095 755.576,-802.509 762.495,-801.451"/>
+</g>
+<!-- 34,3 -->
+<g id="node186" class="node"><title>34,3</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-756" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-767.167" font-family="Times Roman,serif" font-size="10.00">34,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-748 861,-762 912,-762 912,-748 861,-748"/>
+<text text-anchor="start" x="871" y="-752.667" font-family="Times Roman,serif" font-size="10.00">TTAAG</text>
+<polygon fill="#218559" stroke="#218559" points="861,-734 861,-748 912,-748 912,-734 861,-734"/>
+<text text-anchor="start" x="871" y="-738.667" font-family="Times Roman,serif" font-size="10.00">CTTAA</text>
+</g>
+<!-- 35,4->34,3 -->
+<g id="edge854" class="edge"><title>35,4->34,3</title>
+<path fill="none" stroke="#dd1e2f" d="M854.07,-1608.84C836.813,-1594.15 819,-1579 819,-1579 819,-1579 819,-1035 819,-1035 819,-1035 855.988,-880.975 874.999,-801.809"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="878.514,-802.16 877.446,-791.619 871.708,-800.525 878.514,-802.16"/>
+</g>
+<!-- 34,1->35,2 -->
+<g id="edge410" class="edge"><title>34,1->35,2</title>
+<path fill="none" stroke="#218559" d="M611.576,-791.957C594.806,-885.816 551,-1131 551,-1131 551,-1131 551,-1579 551,-1579 551,-1579 563.917,-1589.99 578.409,-1602.32"/>
+<polygon fill="#218559" stroke="#218559" points="576.185,-1605.02 586.07,-1608.84 580.721,-1599.69 576.185,-1605.02"/>
+</g>
+<!-- 34,1->34,2 -->
+<g id="edge406" class="edge"><title>34,1->34,2</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-749.529C674.501,-749.254 684.748,-749.183 694.703,-749.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-752.817 704.933,-749.523 695.007,-745.819 694.864,-752.817"/>
+</g>
+<!-- 33,2 -->
+<g id="node190" class="node"><title>33,2</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-1540" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-1551.17" font-family="Times Roman,serif" font-size="10.00">33,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-1532 593,-1546 644,-1546 644,-1532 593,-1532"/>
+<text text-anchor="start" x="602.5" y="-1536.67" font-family="Times Roman,serif" font-size="10.00">CGCTT</text>
+<polygon fill="#218559" stroke="#218559" points="593,-1518 593,-1532 644,-1532 644,-1518 593,-1518"/>
+<text text-anchor="start" x="600.5" y="-1522.67" font-family="Times Roman,serif" font-size="10.00">AAGCG</text>
+</g>
+<!-- 34,1->33,2 -->
+<g id="edge408" class="edge"><title>34,1->33,2</title>
+<path fill="none" stroke="#218559" d="M611.576,-791.957C594.806,-885.816 551,-1131 551,-1131 551,-1131 551,-1209 551,-1209 551,-1209 590.35,-1403.4 608.737,-1494.24"/>
+<polygon fill="#218559" stroke="#218559" points="605.352,-1495.16 610.766,-1504.26 612.213,-1493.77 605.352,-1495.16"/>
+</g>
+<!-- 34,2->35,4 -->
+<g id="edge414" class="edge"><title>34,2->35,4</title>
+<path fill="none" stroke="#ebb035" d="M759.934,-791.701C777.871,-872.42 820,-1062 820,-1062 820,-1062 830,-1588 830,-1588 830,-1588 837.283,-1594.24 846.596,-1602.22"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="844.529,-1605.06 854.4,-1608.91 849.085,-1599.75 844.529,-1605.06"/>
+</g>
+<!-- 34,2->34,1 -->
+<g id="edge418" class="edge"><title>34,2->34,1</title>
+<path fill="none" stroke="#218559" d="M704.933,-762.477C695.29,-762.749 685.041,-762.817 675.091,-762.684"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-759.18 664.867,-762.471 674.792,-766.179 674.937,-759.18"/>
+</g>
+<!-- 34,2->34,3 -->
+<g id="edge412" class="edge"><title>34,2->34,3</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-749.529C808.501,-749.254 818.748,-749.183 828.703,-749.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-752.817 838.933,-749.523 829.007,-745.819 828.864,-752.817"/>
+</g>
+<!-- 33,4 -->
+<g id="node192" class="node"><title>33,4</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-1540" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-1551.17" font-family="Times Roman,serif" font-size="10.00">33,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-1532 861,-1546 912,-1546 912,-1532 861,-1532"/>
+<text text-anchor="start" x="871" y="-1536.67" font-family="Times Roman,serif" font-size="10.00">CTTAA</text>
+<polygon fill="#218559" stroke="#218559" points="861,-1518 861,-1532 912,-1532 912,-1518 861,-1518"/>
+<text text-anchor="start" x="871" y="-1522.67" font-family="Times Roman,serif" font-size="10.00">TTAAG</text>
+</g>
+<!-- 34,2->33,4 -->
+<g id="edge416" class="edge"><title>34,2->33,4</title>
+<path fill="none" stroke="#ebb035" d="M762.034,-791.413C780.949,-858.173 820,-996 820,-996 820,-996 830,-1492 830,-1492 830,-1492 837.283,-1498.24 846.596,-1506.22"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="844.529,-1509.06 854.4,-1512.91 849.085,-1503.75 844.529,-1509.06"/>
+</g>
+<!-- 34,3->35,4 -->
+<g id="edge422" class="edge"><title>34,3->35,4</title>
+<path fill="none" stroke="#218559" d="M877.446,-791.619C859.384,-866.834 819,-1035 819,-1035 819,-1035 819,-1579 819,-1579 819,-1579 831.917,-1589.99 846.409,-1602.32"/>
+<polygon fill="#218559" stroke="#218559" points="844.185,-1605.02 854.07,-1608.84 848.721,-1599.69 844.185,-1605.02"/>
+</g>
+<!-- 34,3->34,2 -->
+<g id="edge426" class="edge"><title>34,3->34,2</title>
+<path fill="none" stroke="#218559" d="M838.933,-762.477C829.29,-762.749 819.041,-762.817 809.091,-762.684"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-759.18 798.867,-762.471 808.792,-766.179 808.937,-759.18"/>
+</g>
+<!-- 34,3->34,4 -->
+<g id="edge420" class="edge"><title>34,3->34,4</title>
+<path fill="none" stroke="#dd1e2f" d="M932.867,-749.529C942.501,-749.254 952.748,-749.183 962.703,-749.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="962.864,-752.817 972.933,-749.523 963.007,-745.819 962.864,-752.817"/>
+</g>
+<!-- 34,3->33,4 -->
+<g id="edge424" class="edge"><title>34,3->33,4</title>
+<path fill="none" stroke="#218559" d="M877.446,-791.619C859.384,-866.834 819,-1035 819,-1035 819,-1035 819,-1483 819,-1483 819,-1483 831.917,-1493.99 846.409,-1506.32"/>
+<polygon fill="#218559" stroke="#218559" points="844.185,-1509.02 854.07,-1512.84 848.721,-1503.69 844.185,-1509.02"/>
+</g>
+<!-- 34,4->35,2 -->
+<g id="edge430" class="edge"><title>34,4->35,2</title>
+<path fill="none" stroke="#ebb035" d="M998.48,-788.28C975.392,-822.912 942,-873 942,-873 942,-873 830,-920 830,-920 830,-920 808,-1000 808,-1000 808,-1000 696,-1091 696,-1091 696,-1091 686,-1262 686,-1262 686,-1262 684,-1296 684,-1296 684,-1296 674,-1588 674,-1588 674,-1588 666.717,-1594.24 657.404,-1602.22"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="654.915,-1599.75 649.6,-1608.91 659.471,-1605.06 654.915,-1599.75"/>
+</g>
+<!-- 34,4->34,3 -->
+<g id="edge432" class="edge"><title>34,4->34,3</title>
+<path fill="none" stroke="#218559" d="M972.933,-762.477C963.29,-762.749 953.041,-762.817 943.091,-762.684"/>
+<polygon fill="#218559" stroke="#218559" points="942.937,-759.18 932.867,-762.471 942.792,-766.179 942.937,-759.18"/>
+</g>
+<!-- 34,4->33,2 -->
+<g id="edge428" class="edge"><title>34,4->33,2</title>
+<path fill="none" stroke="#ebb035" d="M994.294,-786.38C975.762,-808.281 954,-834 954,-834 954,-834 818,-883 818,-883 818,-883 808,-929 808,-929 808,-929 696,-1042 696,-1042 696,-1042 686,-1243 686,-1243 686,-1243 647.466,-1411.3 628.413,-1494.52"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="624.955,-1493.94 626.135,-1504.47 631.778,-1495.5 624.955,-1493.94"/>
+</g>
+<!-- 33,1 -->
+<g id="node189" class="node"><title>33,1</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-1540" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-1551.17" font-family="Times Roman,serif" font-size="10.00">33,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-1532 459,-1546 510,-1546 510,-1532 459,-1532"/>
+<text text-anchor="start" x="469" y="-1536.67" font-family="Times Roman,serif" font-size="10.00">TCGCT</text>
+<polygon fill="#218559" stroke="#218559" points="459,-1518 459,-1532 510,-1532 510,-1518 459,-1518"/>
+<text text-anchor="start" x="466.5" y="-1522.67" font-family="Times Roman,serif" font-size="10.00">AGCGA</text>
+</g>
+<!-- 33,1->33,2 -->
+<g id="edge812" class="edge"><title>33,1->33,2</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-1533.53C540.501,-1533.25 550.748,-1533.18 560.703,-1533.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-1536.82 570.933,-1533.52 561.007,-1529.82 560.864,-1536.82"/>
+</g>
+<!-- 33,2->34,1 -->
+<g id="edge814" class="edge"><title>33,2->34,1</title>
+<path fill="none" stroke="#dd1e2f" d="M610.766,-1504.26C593.441,-1418.67 551,-1209 551,-1209 551,-1209 551,-1131 551,-1131 551,-1131 591.863,-902.29 609.764,-802.095"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="613.262,-802.416 611.576,-791.957 606.371,-801.185 613.262,-802.416"/>
+</g>
+<!-- 33,2->34,4 -->
+<g id="edge818" class="edge"><title>33,2->34,4</title>
+<path fill="none" stroke="#ebb035" d="M624.261,-1504C638.874,-1419.98 674,-1218 674,-1218 674,-1218 696,-929 696,-929 696,-929 830,-835 830,-835 830,-835 942,-801 942,-801 942,-801 956.586,-792.585 973.143,-783.033"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="975.001,-786.002 981.914,-777.973 971.503,-779.938 975.001,-786.002"/>
+</g>
+<!-- 33,2->33,1 -->
+<g id="edge820" class="edge"><title>33,2->33,1</title>
+<path fill="none" stroke="#218559" d="M570.933,-1546.48C561.29,-1546.75 551.041,-1546.82 541.091,-1546.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-1543.18 530.867,-1546.47 540.792,-1550.18 540.937,-1543.18"/>
+</g>
+<!-- 33,2->33,3 -->
+<g id="edge816" class="edge"><title>33,2->33,3</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-1533.53C674.501,-1533.25 684.748,-1533.18 694.703,-1533.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-1536.82 704.933,-1533.52 695.007,-1529.82 694.864,-1536.82"/>
+</g>
+<!-- 33,3->36,1 -->
+<g id="edge824" class="edge"><title>33,3->36,1</title>
+<path fill="none" stroke="#06a2cb" d="M759.085,-1504.26C776.501,-1416.41 820,-1197 820,-1197 820,-1197 830,-1122 830,-1122 830,-1122 837.283,-1115.76 846.596,-1107.78"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="849.085,-1110.25 854.4,-1101.09 844.529,-1104.94 849.085,-1110.25"/>
+</g>
+<!-- 33,3->33,2 -->
+<g id="edge828" class="edge"><title>33,3->33,2</title>
+<path fill="none" stroke="#218559" d="M704.933,-1546.48C695.29,-1546.75 685.041,-1546.82 675.091,-1546.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-1543.18 664.867,-1546.47 674.792,-1550.18 674.937,-1543.18"/>
+</g>
+<!-- 33,3->33,4 -->
+<g id="edge822" class="edge"><title>33,3->33,4</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-1533.53C808.501,-1533.25 818.748,-1533.18 828.703,-1533.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-1536.82 838.933,-1533.52 829.007,-1529.82 828.864,-1536.82"/>
+</g>
+<!-- 33,3->32,1 -->
+<g id="edge826" class="edge"><title>33,3->32,1</title>
+<path fill="none" stroke="#06a2cb" d="M720.4,-1567.09C707.697,-1577.97 696,-1588 696,-1588 696,-1588 686,-1698 686,-1698 686,-1698 674,-1780 674,-1780 674,-1780 647.692,-1871.14 631.139,-1928.48"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="627.682,-1927.84 628.271,-1938.42 634.407,-1929.78 627.682,-1927.84"/>
+</g>
+<!-- 33,4->34,2 -->
+<g id="edge832" class="edge"><title>33,4->34,2</title>
+<path fill="none" stroke="#ebb035" d="M854.4,-1512.91C841.697,-1502.03 830,-1492 830,-1492 830,-1492 820,-1032 820,-1032 820,-1032 782.554,-880.014 763.238,-801.612"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="766.541,-800.389 760.751,-791.517 759.745,-802.064 766.541,-800.389"/>
+</g>
+<!-- 33,4->34,3 -->
+<g id="edge830" class="edge"><title>33,4->34,3</title>
+<path fill="none" stroke="#dd1e2f" d="M854.07,-1512.84C836.813,-1498.15 819,-1483 819,-1483 819,-1483 819,-1035 819,-1035 819,-1035 855.988,-880.975 874.999,-801.809"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="878.514,-802.16 877.446,-791.619 871.708,-800.525 878.514,-802.16"/>
+</g>
+<!-- 33,4->33,3 -->
+<g id="edge834" class="edge"><title>33,4->33,3</title>
+<path fill="none" stroke="#218559" d="M838.933,-1546.48C829.29,-1546.75 819.041,-1546.82 809.091,-1546.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-1543.18 798.867,-1546.47 808.792,-1550.18 808.937,-1543.18"/>
+</g>
+<!-- 32,1->35,3 -->
+<g id="edge388" class="edge"><title>32,1->35,3</title>
+<path fill="none" stroke="#06a2cb" d="M638.034,-1941C658.093,-1907.96 686,-1862 686,-1862 686,-1862 696,-1684 696,-1684 696,-1684 703.283,-1677.76 712.596,-1669.78"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="715.085,-1672.25 720.4,-1663.09 710.529,-1666.94 715.085,-1672.25"/>
+</g>
+<!-- 32,1->33,3 -->
+<g id="edge386" class="edge"><title>32,1->33,3</title>
+<path fill="none" stroke="#06a2cb" d="M628.271,-1938.42C644.438,-1882.41 674,-1780 674,-1780 674,-1780 696,-1588 696,-1588 696,-1588 703.283,-1581.76 712.596,-1573.78"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="715.085,-1576.25 720.4,-1567.09 710.529,-1570.94 715.085,-1576.25"/>
+</g>
+<!-- 32,1->32,2 -->
+<g id="edge384" class="edge"><title>32,1->32,2</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-1967.53C674.501,-1967.25 684.748,-1967.18 694.703,-1967.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-1970.82 704.933,-1967.52 695.007,-1963.82 694.864,-1970.82"/>
+</g>
+<!-- 32,2->31,1 -->
+<g id="edge390" class="edge"><title>32,2->31,1</title>
+<path fill="none" stroke="#dd1e2f" d="M719.991,-1947.17C702.354,-1932.39 684,-1917 684,-1917 684,-1917 653.327,-1831.02 633.801,-1776.29"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="637.071,-1775.04 630.414,-1766.8 630.478,-1777.39 637.071,-1775.04"/>
+</g>
+<!-- 32,2->32,1 -->
+<g id="edge394" class="edge"><title>32,2->32,1</title>
+<path fill="none" stroke="#218559" d="M704.933,-1980.48C695.29,-1980.75 685.041,-1980.82 675.091,-1980.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-1977.18 664.867,-1980.47 674.792,-1984.18 674.937,-1977.18"/>
+</g>
+<!-- 32,3 -->
+<g id="node196" class="node"><title>32,3</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-1974" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-1985.17" font-family="Times Roman,serif" font-size="10.00">32,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-1966 861,-1980 912,-1980 912,-1966 861,-1966"/>
+<text text-anchor="start" x="870" y="-1970.67" font-family="Times Roman,serif" font-size="10.00">GCGTC</text>
+<polygon fill="#218559" stroke="#218559" points="861,-1952 861,-1966 912,-1966 912,-1952 861,-1952"/>
+<text text-anchor="start" x="868.5" y="-1956.67" font-family="Times Roman,serif" font-size="10.00">GACGC</text>
+</g>
+<!-- 32,2->32,3 -->
+<g id="edge392" class="edge"><title>32,2->32,3</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-1967.53C808.501,-1967.25 818.748,-1967.18 828.703,-1967.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-1970.82 838.933,-1967.52 829.007,-1963.82 828.864,-1970.82"/>
+</g>
+<!-- 32,3->32,2 -->
+<g id="edge398" class="edge"><title>32,3->32,2</title>
+<path fill="none" stroke="#218559" d="M838.933,-1980.48C829.29,-1980.75 819.041,-1980.82 809.091,-1980.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-1977.18 798.867,-1980.47 808.792,-1984.18 808.937,-1977.18"/>
+</g>
+<!-- 32,3->32,4 -->
+<g id="edge396" class="edge"><title>32,3->32,4</title>
+<path fill="none" stroke="#dd1e2f" d="M932.867,-1967.53C942.501,-1967.25 952.748,-1967.18 962.703,-1967.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="962.864,-1970.82 972.933,-1967.52 963.007,-1963.82 962.864,-1970.82"/>
+</g>
+<!-- 32,4->29,2 -->
+<g id="edge402" class="edge"><title>32,4->29,2</title>
+<path fill="none" stroke="#ebb035" d="M1003.8,-2008.13C987.264,-2042.98 964,-2092 964,-2092 964,-2092 942,-2188 942,-2188 942,-2188 922.563,-2228.96 906.591,-2262.61"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="903.323,-2261.33 902.198,-2271.87 909.647,-2264.34 903.323,-2261.33"/>
+</g>
+<!-- 32,4->31,3 -->
+<g id="edge400" class="edge"><title>32,4->31,3</title>
+<path fill="none" stroke="#dd1e2f" d="M1009.73,-1938.42C993.562,-1882.41 964,-1780 964,-1780 964,-1780 954,-1246 954,-1246 954,-1246 952,-1246 952,-1246 952,-1246 942,-1684 942,-1684 942,-1684 934.717,-1690.24 925.404,-1698.22"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="922.915,-1695.75 917.6,-1704.91 927.471,-1701.06 922.915,-1695.75"/>
+</g>
+<!-- 32,4->32,3 -->
+<g id="edge404" class="edge"><title>32,4->32,3</title>
+<path fill="none" stroke="#218559" d="M972.933,-1980.48C963.29,-1980.75 953.041,-1980.82 943.091,-1980.68"/>
+<polygon fill="#218559" stroke="#218559" points="942.937,-1977.18 932.867,-1980.47 942.792,-1984.18 942.937,-1977.18"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq1/.part-0.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq1/.part-0.crc
new file mode 100644
index 0000000..32c39ff
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq1/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq1/.part-1.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq1/.part-1.crc
new file mode 100644
index 0000000..72c5a6b
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq1/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq1/part-0 b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq1/part-0
new file mode 100755
index 0000000..7db79d9
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq1/part-0
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq1/part-1 b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq1/part-1
new file mode 100755
index 0000000..54cde64
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq1/part-1
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq2.txt b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq2.txt
new file mode 100644
index 0000000..474c862
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq2.txt
@@ -0,0 +1,24 @@
+((2,1) [(2,2)] [] [(3,3)] [] CTAGC) (null)
+((2,2) [(6,1),(2,3)] [(1,4)] [] [(2,1)] TAGCG) (null)
+((2,3) [(2,4)] [] [(3,1)] [(2,2)] AGCGC) (null)
+((2,4) [(6,3)] [(1,2)] [] [(2,3)] GCGCA) (null)
+((4,1) [(4,2)] [] [] [(3,2)] CTAGG) (null)
+((4,2) [(5,1),(4,3)] [] [] [(4,1)] TAGGA) (null)
+((4,3) [(4,4)] [] [] [(3,4),(4,2)] AGGAG) (null)
+((4,4) [(5,3)] [] [] [(4,3)] GGAGT) (null)
+((6,1) [(6,2)] [] [] [(2,2)] AGCGC) (null)
+((6,2) [(6,3)] [] [] [(6,1)] GCGCA) (null)
+((6,3) [(6,4)] [] [] [(2,4),(6,2)] CGCAT) (null)
+((6,4) [] [] [] [(6,3)] GCATT) (null)
+((1,1) [(1,2)] [] [] [] AATGC) (null)
+((1,2) [(1,3)] [(2,4)] [] [(1,1)] ATGCG) (null)
+((1,3) [(1,4)] [] [] [(1,2)] TGCGC) (null)
+((1,4) [] [(2,2)] [] [(1,3)] GCGCT) (null)
+((3,1) [(3,2)] [] [(2,3)] [] CGCTA) (null)
+((3,2) [(4,1),(3,3)] [] [] [(3,1)] GCTAG) (null)
+((3,3) [(3,4)] [] [(2,1)] [(3,2)] CTAGG) (null)
+((3,4) [(4,3)] [] [] [(3,3)] TAGGA) (null)
+((5,1) [(5,2)] [] [] [(4,2)] AGGAG) (null)
+((5,2) [(5,3)] [] [] [(5,1)] GGAGT) (null)
+((5,3) [(5,4)] [] [] [(4,4),(5,2)] GAGTT) (null)
+((5,4) [] [] [] [(5,3)] AGTTG) (null)
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq2.txt.svg b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq2.txt.svg
new file mode 100644
index 0000000..70ffb5c
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq2.txt.svg
@@ -0,0 +1,587 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: walk_random_seq2_txt Pages: 1 -->
+<svg width="1260pt" height="669pt"
+ viewBox="0.00 0.00 1260.00 669.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 665)">
+<title>walk_random_seq2_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-665 1257,-665 1257,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-130 49,-331 211,-331 211,-130 49,-130"/>
+<text text-anchor="middle" x="130" y="-314.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-558 8,-646 500,-646 500,-558 8,-558"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_3</title>
+<polygon fill="none" stroke="black" points="274,-200 274,-288 748,-288 748,-200 274,-200"/>
+</g>
+<g id="graph5" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="150,-462 150,-550 624,-550 624,-462 150,-462"/>
+</g>
+<g id="graph6" class="cluster"><title>cluster_5</title>
+<polygon fill="none" stroke="black" points="770,-8 770,-96 1244,-96 1244,-8 770,-8"/>
+</g>
+<g id="graph7" class="cluster"><title>cluster_4</title>
+<polygon fill="none" stroke="black" points="522,-104 522,-192 996,-192 996,-104 522,-104"/>
+</g>
+<g id="graph8" class="cluster"><title>cluster_6</title>
+<polygon fill="none" stroke="black" points="398,-366 398,-454 872,-454 872,-366 398,-366"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-279" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-279" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.0074,-279C74.8673,-279 156.744,-279 188.46,-279"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="188.862,-282.5 198.862,-279 188.861,-275.5 188.862,-282.5"/>
+<text text-anchor="middle" x="130" y="-284.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-238" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-238" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.0074,-238C74.8673,-238 156.744,-238 188.46,-238"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="188.862,-241.5 198.862,-238 188.861,-234.5 188.862,-241.5"/>
+<text text-anchor="middle" x="130" y="-243.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-197" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-197" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.0074,-197C74.8673,-197 156.744,-197 188.46,-197"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="188.862,-200.5 198.862,-197 188.861,-193.5 188.862,-200.5"/>
+<text text-anchor="middle" x="130" y="-202.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-156" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-156" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.0074,-156C74.8673,-156 156.744,-156 188.46,-156"/>
+<polygon fill="#218559" stroke="#218559" points="188.862,-159.5 198.862,-156 188.861,-152.5 188.862,-159.5"/>
+<text text-anchor="middle" x="130" y="-161.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-602" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-613.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-594 37,-608 82,-608 82,-594 37,-594"/>
+<text text-anchor="start" x="43" y="-598.667" font-family="Times Roman,serif" font-size="10.00">AATGC</text>
+<polygon fill="#218559" stroke="#218559" points="37,-580 37,-594 82,-594 82,-580 37,-580"/>
+<text text-anchor="start" x="44" y="-584.667" font-family="Times Roman,serif" font-size="10.00">GCATT</text>
+</g>
+<!-- 1,2 -->
+<g id="node16" class="node"><title>1,2</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-602" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-613.167" font-family="Times Roman,serif" font-size="10.00">1,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-594 179,-608 224,-608 224,-594 179,-594"/>
+<text text-anchor="start" x="185" y="-598.667" font-family="Times Roman,serif" font-size="10.00">ATGCG</text>
+<polygon fill="#218559" stroke="#218559" points="179,-580 179,-594 224,-594 224,-580 179,-580"/>
+<text text-anchor="start" x="185" y="-584.667" font-family="Times Roman,serif" font-size="10.00">CGCAT</text>
+</g>
+<!-- 1,1->1,2 -->
+<g id="edge77" class="edge"><title>1,1->1,2</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-595.755C116.207,-595.208 132.729,-595.105 148.049,-595.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-598.957 158.425,-595.756 148.534,-591.96 148.326,-598.957"/>
+</g>
+<!-- 1,2->1,1 -->
+<g id="edge83" class="edge"><title>1,2->1,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-608.244C143.825,-608.792 127.305,-608.895 111.982,-608.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-605.043 101.605,-608.245 111.497,-612.04 111.704,-605.043"/>
+</g>
+<!-- 1,3 -->
+<g id="node17" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-602" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-613.167" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-594 303,-608 348,-608 348,-594 303,-594"/>
+<text text-anchor="start" x="308.5" y="-598.667" font-family="Times Roman,serif" font-size="10.00">TGCGC</text>
+<polygon fill="#218559" stroke="#218559" points="303,-580 303,-594 348,-594 348,-580 303,-580"/>
+<text text-anchor="start" x="307.5" y="-584.667" font-family="Times Roman,serif" font-size="10.00">GCGCA</text>
+</g>
+<!-- 1,2->1,3 -->
+<g id="edge79" class="edge"><title>1,2->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-595.562C252.601,-595.258 262.531,-595.176 272.159,-595.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-598.822 282.443,-595.554 272.527,-591.823 272.364,-598.822"/>
+</g>
+<!-- 2,4 -->
+<g id="node28" class="node"><title>2,4</title>
+<ellipse fill="none" stroke="black" cx="573" cy="-506" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="553.5" y="-517.167" font-family="Times Roman,serif" font-size="10.00">2,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="551,-498 551,-512 596,-512 596,-498 551,-498"/>
+<text text-anchor="start" x="555.5" y="-502.667" font-family="Times Roman,serif" font-size="10.00">GCGCA</text>
+<polygon fill="#218559" stroke="#218559" points="551,-484 551,-498 596,-498 596,-484 551,-484"/>
+<text text-anchor="start" x="556.5" y="-488.667" font-family="Times Roman,serif" font-size="10.00">TGCGC</text>
+</g>
+<!-- 1,2->2,4 -->
+<g id="edge81" class="edge"><title>1,2->2,4</title>
+<path fill="none" stroke="#ebb035" d="M231.143,-627.695C246.511,-640.796 262,-654 262,-654 262,-654 274,-661 274,-661 274,-661 500,-661 500,-661 500,-661 512,-639 512,-639 512,-639 535.266,-588.272 553.061,-549.474"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="556.462,-550.454 557.449,-539.906 550.099,-547.536 556.462,-550.454"/>
+</g>
+<!-- 1,3->1,2 -->
+<g id="edge87" class="edge"><title>1,3->1,2</title>
+<path fill="none" stroke="#218559" d="M282.443,-608.446C273.158,-608.745 263.225,-608.824 253.602,-608.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-605.175 243.327,-608.438 253.242,-612.173 253.407,-605.175"/>
+</g>
+<!-- 1,4 -->
+<g id="node18" class="node"><title>1,4</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-602" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-613.167" font-family="Times Roman,serif" font-size="10.00">1,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-594 427,-608 472,-608 472,-594 427,-594"/>
+<text text-anchor="start" x="432.5" y="-598.667" font-family="Times Roman,serif" font-size="10.00">GCGCT</text>
+<polygon fill="#218559" stroke="#218559" points="427,-580 427,-594 472,-594 472,-580 427,-580"/>
+<text text-anchor="start" x="431.5" y="-584.667" font-family="Times Roman,serif" font-size="10.00">AGCGC</text>
+</g>
+<!-- 1,3->1,4 -->
+<g id="edge85" class="edge"><title>1,3->1,4</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-595.562C376.601,-595.258 386.531,-595.176 396.159,-595.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-598.822 406.443,-595.554 396.527,-591.823 396.364,-598.822"/>
+</g>
+<!-- 1,4->1,3 -->
+<g id="edge91" class="edge"><title>1,4->1,3</title>
+<path fill="none" stroke="#218559" d="M406.443,-608.446C397.158,-608.745 387.225,-608.824 377.602,-608.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-605.175 367.327,-608.438 377.242,-612.173 377.407,-605.175"/>
+</g>
+<!-- 2,2 -->
+<g id="node26" class="node"><title>2,2</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-506" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-517.167" font-family="Times Roman,serif" font-size="10.00">2,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-498 303,-512 348,-512 348,-498 303,-498"/>
+<text text-anchor="start" x="309" y="-502.667" font-family="Times Roman,serif" font-size="10.00">TAGCG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-484 303,-498 348,-498 348,-484 303,-484"/>
+<text text-anchor="start" x="309" y="-488.667" font-family="Times Roman,serif" font-size="10.00">CGCTA</text>
+</g>
+<!-- 1,4->2,2 -->
+<g id="edge89" class="edge"><title>1,4->2,2</title>
+<path fill="none" stroke="#ebb035" d="M414.551,-580.127C399.843,-570.789 386,-562 386,-562 386,-562 374.667,-551.596 361.771,-539.757"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="363.804,-536.872 354.071,-532.688 359.07,-542.029 363.804,-536.872"/>
+</g>
+<!-- 3,1 -->
+<g id="node20" class="node"><title>3,1</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-244" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-255.167" font-family="Times Roman,serif" font-size="10.00">3,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-236 303,-250 348,-250 348,-236 303,-236"/>
+<text text-anchor="start" x="309" y="-240.667" font-family="Times Roman,serif" font-size="10.00">CGCTA</text>
+<polygon fill="#218559" stroke="#218559" points="303,-222 303,-236 348,-236 348,-222 303,-222"/>
+<text text-anchor="start" x="309" y="-226.667" font-family="Times Roman,serif" font-size="10.00">TAGCG</text>
+</g>
+<!-- 3,2 -->
+<g id="node21" class="node"><title>3,2</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-244" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-255.167" font-family="Times Roman,serif" font-size="10.00">3,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-236 427,-250 472,-250 472,-236 427,-236"/>
+<text text-anchor="start" x="433" y="-240.667" font-family="Times Roman,serif" font-size="10.00">GCTAG</text>
+<polygon fill="#218559" stroke="#218559" points="427,-222 427,-236 472,-236 472,-222 427,-222"/>
+<text text-anchor="start" x="433" y="-226.667" font-family="Times Roman,serif" font-size="10.00">CTAGC</text>
+</g>
+<!-- 3,1->3,2 -->
+<g id="edge93" class="edge"><title>3,1->3,2</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-237.562C376.601,-237.258 386.531,-237.176 396.159,-237.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-240.822 406.443,-237.554 396.527,-233.823 396.364,-240.822"/>
+</g>
+<!-- 2,3 -->
+<g id="node27" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-506" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-517.167" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-498 427,-512 472,-512 472,-498 427,-498"/>
+<text text-anchor="start" x="431.5" y="-502.667" font-family="Times Roman,serif" font-size="10.00">AGCGC</text>
+<polygon fill="#218559" stroke="#218559" points="427,-484 427,-498 472,-498 472,-484 427,-484"/>
+<text text-anchor="start" x="432.5" y="-488.667" font-family="Times Roman,serif" font-size="10.00">GCGCT</text>
+</g>
+<!-- 3,1->2,3 -->
+<g id="edge95" class="edge"><title>3,1->2,3</title>
+<path fill="none" stroke="#06a2cb" d="M336.891,-278.859C357.577,-339.499 398,-458 398,-458 398,-458 404.502,-464.119 412.863,-471.988"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="410.54,-474.609 420.221,-478.914 415.338,-469.511 410.54,-474.609"/>
+</g>
+<!-- 3,2->3,1 -->
+<g id="edge101" class="edge"><title>3,2->3,1</title>
+<path fill="none" stroke="#218559" d="M406.443,-250.446C397.158,-250.745 387.225,-250.824 377.602,-250.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-247.175 367.327,-250.438 377.242,-254.173 377.407,-247.175"/>
+</g>
+<!-- 3,3 -->
+<g id="node22" class="node"><title>3,3</title>
+<ellipse fill="none" stroke="black" cx="573" cy="-244" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="553.5" y="-255.167" font-family="Times Roman,serif" font-size="10.00">3,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="551,-236 551,-250 596,-250 596,-236 551,-236"/>
+<text text-anchor="start" x="557" y="-240.667" font-family="Times Roman,serif" font-size="10.00">CTAGG</text>
+<polygon fill="#218559" stroke="#218559" points="551,-222 551,-236 596,-236 596,-222 551,-222"/>
+<text text-anchor="start" x="557" y="-226.667" font-family="Times Roman,serif" font-size="10.00">CCTAG</text>
+</g>
+<!-- 3,2->3,3 -->
+<g id="edge99" class="edge"><title>3,2->3,3</title>
+<path fill="none" stroke="#dd1e2f" d="M491.327,-237.562C500.601,-237.258 510.531,-237.176 520.159,-237.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="520.364,-240.822 530.443,-237.554 520.527,-233.823 520.364,-240.822"/>
+</g>
+<!-- 4,1 -->
+<g id="node35" class="node"><title>4,1</title>
+<ellipse fill="none" stroke="black" cx="573" cy="-148" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="553.5" y="-159.167" font-family="Times Roman,serif" font-size="10.00">4,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="551,-140 551,-154 596,-154 596,-140 551,-140"/>
+<text text-anchor="start" x="557" y="-144.667" font-family="Times Roman,serif" font-size="10.00">CTAGG</text>
+<polygon fill="#218559" stroke="#218559" points="551,-126 551,-140 596,-140 596,-126 551,-126"/>
+<text text-anchor="start" x="557" y="-130.667" font-family="Times Roman,serif" font-size="10.00">CCTAG</text>
+</g>
+<!-- 3,2->4,1 -->
+<g id="edge97" class="edge"><title>3,2->4,1</title>
+<path fill="none" stroke="#dd1e2f" d="M479.143,-218.305C494.511,-205.204 510,-192 510,-192 510,-192 519.601,-185.295 531.307,-177.119"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="533.461,-179.884 539.655,-171.288 529.453,-174.145 533.461,-179.884"/>
+</g>
+<!-- 3,3->3,2 -->
+<g id="edge107" class="edge"><title>3,3->3,2</title>
+<path fill="none" stroke="#218559" d="M530.443,-250.446C521.158,-250.745 511.225,-250.824 501.602,-250.681"/>
+<polygon fill="#218559" stroke="#218559" points="501.407,-247.175 491.327,-250.438 501.242,-254.173 501.407,-247.175"/>
+</g>
+<!-- 3,4 -->
+<g id="node23" class="node"><title>3,4</title>
+<ellipse fill="none" stroke="black" cx="697" cy="-244" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="677.5" y="-255.167" font-family="Times Roman,serif" font-size="10.00">3,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="675,-236 675,-250 720,-250 720,-236 675,-236"/>
+<text text-anchor="start" x="681" y="-240.667" font-family="Times Roman,serif" font-size="10.00">TAGGA</text>
+<polygon fill="#218559" stroke="#218559" points="675,-222 675,-236 720,-236 720,-222 675,-222"/>
+<text text-anchor="start" x="682.5" y="-226.667" font-family="Times Roman,serif" font-size="10.00">TCCTA</text>
+</g>
+<!-- 3,3->3,4 -->
+<g id="edge103" class="edge"><title>3,3->3,4</title>
+<path fill="none" stroke="#dd1e2f" d="M615.327,-237.562C624.601,-237.258 634.531,-237.176 644.159,-237.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="644.364,-240.822 654.443,-237.554 644.527,-233.823 644.364,-240.822"/>
+</g>
+<!-- 2,1 -->
+<g id="node25" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-506" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-517.167" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-498 179,-512 224,-512 224,-498 179,-498"/>
+<text text-anchor="start" x="185" y="-502.667" font-family="Times Roman,serif" font-size="10.00">CTAGC</text>
+<polygon fill="#218559" stroke="#218559" points="179,-484 179,-498 224,-498 224,-484 179,-484"/>
+<text text-anchor="start" x="185" y="-488.667" font-family="Times Roman,serif" font-size="10.00">GCTAG</text>
+</g>
+<!-- 3,3->2,1 -->
+<g id="edge105" class="edge"><title>3,3->2,1</title>
+<path fill="none" stroke="#06a2cb" d="M544.148,-271.271C523.744,-290.557 500,-313 500,-313 500,-313 398,-362 398,-362 398,-362 299.419,-434.059 241.729,-476.229"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="239.538,-473.494 233.531,-482.221 243.669,-479.146 239.538,-473.494"/>
+</g>
+<!-- 3,4->3,3 -->
+<g id="edge111" class="edge"><title>3,4->3,3</title>
+<path fill="none" stroke="#218559" d="M654.443,-250.446C645.158,-250.745 635.225,-250.824 625.602,-250.681"/>
+<polygon fill="#218559" stroke="#218559" points="625.407,-247.175 615.327,-250.438 625.242,-254.173 625.407,-247.175"/>
+</g>
+<!-- 4,3 -->
+<g id="node37" class="node"><title>4,3</title>
+<ellipse fill="none" stroke="black" cx="821" cy="-148" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="801.5" y="-159.167" font-family="Times Roman,serif" font-size="10.00">4,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="799,-140 799,-154 844,-154 844,-140 799,-140"/>
+<text text-anchor="start" x="803.5" y="-144.667" font-family="Times Roman,serif" font-size="10.00">AGGAG</text>
+<polygon fill="#218559" stroke="#218559" points="799,-126 799,-140 844,-140 844,-126 799,-126"/>
+<text text-anchor="start" x="806" y="-130.667" font-family="Times Roman,serif" font-size="10.00">CTCCT</text>
+</g>
+<!-- 3,4->4,3 -->
+<g id="edge109" class="edge"><title>3,4->4,3</title>
+<path fill="none" stroke="#dd1e2f" d="M733.282,-224.419C747.287,-216.861 760,-210 760,-210 760,-210 772.484,-197.311 786.157,-183.414"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="788.837,-185.681 793.356,-176.098 783.847,-180.771 788.837,-185.681"/>
+</g>
+<!-- 2,1->3,3 -->
+<g id="edge19" class="edge"><title>2,1->3,3</title>
+<path fill="none" stroke="#06a2cb" d="M213.198,-471.207C230.999,-420.429 262,-332 262,-332 262,-332 500,-289 500,-289 500,-289 513.817,-280.483 529.429,-270.859"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="531.534,-273.673 538.21,-265.446 527.861,-267.714 531.534,-273.673"/>
+</g>
+<!-- 2,1->2,2 -->
+<g id="edge17" class="edge"><title>2,1->2,2</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-499.562C252.601,-499.258 262.531,-499.176 272.159,-499.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-502.822 282.443,-499.554 272.527,-495.823 272.364,-502.822"/>
+</g>
+<!-- 2,2->1,4 -->
+<g id="edge25" class="edge"><title>2,2->1,4</title>
+<path fill="none" stroke="#ebb035" d="M356.611,-530.473C372.521,-542.79 391.976,-557.853 408.924,-570.973"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="406.965,-573.883 417.015,-577.238 411.251,-568.348 406.965,-573.883"/>
+</g>
+<!-- 2,2->2,1 -->
+<g id="edge27" class="edge"><title>2,2->2,1</title>
+<path fill="none" stroke="#218559" d="M282.443,-512.446C273.158,-512.745 263.225,-512.824 253.602,-512.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-509.175 243.327,-512.438 253.242,-516.173 253.407,-509.175"/>
+</g>
+<!-- 2,2->2,3 -->
+<g id="edge23" class="edge"><title>2,2->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-499.562C376.601,-499.258 386.531,-499.176 396.159,-499.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-502.822 406.443,-499.554 396.527,-495.823 396.364,-502.822"/>
+</g>
+<!-- 6,1 -->
+<g id="node40" class="node"><title>6,1</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-410" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-421.167" font-family="Times Roman,serif" font-size="10.00">6,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-402 427,-416 472,-416 472,-402 427,-402"/>
+<text text-anchor="start" x="431.5" y="-406.667" font-family="Times Roman,serif" font-size="10.00">AGCGC</text>
+<polygon fill="#218559" stroke="#218559" points="427,-388 427,-402 472,-402 472,-388 427,-388"/>
+<text text-anchor="start" x="432.5" y="-392.667" font-family="Times Roman,serif" font-size="10.00">GCGCT</text>
+</g>
+<!-- 2,2->6,1 -->
+<g id="edge21" class="edge"><title>2,2->6,1</title>
+<path fill="none" stroke="#dd1e2f" d="M356.611,-481.527C372.521,-469.21 391.976,-454.147 408.924,-441.027"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="411.251,-443.652 417.015,-434.762 406.965,-438.117 411.251,-443.652"/>
+</g>
+<!-- 2,3->3,1 -->
+<g id="edge31" class="edge"><title>2,3->3,1</title>
+<path fill="none" stroke="#06a2cb" d="M419.713,-479.502C403.261,-464.617 386,-449 386,-449 386,-449 356.323,-349.267 338.369,-288.928"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="341.693,-287.827 335.486,-279.241 334.984,-289.824 341.693,-287.827"/>
+</g>
+<!-- 2,3->2,2 -->
+<g id="edge33" class="edge"><title>2,3->2,2</title>
+<path fill="none" stroke="#218559" d="M406.443,-512.446C397.158,-512.745 387.225,-512.824 377.602,-512.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-509.175 367.327,-512.438 377.242,-516.173 377.407,-509.175"/>
+</g>
+<!-- 2,3->2,4 -->
+<g id="edge29" class="edge"><title>2,3->2,4</title>
+<path fill="none" stroke="#dd1e2f" d="M491.327,-499.562C500.601,-499.258 510.531,-499.176 520.159,-499.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="520.364,-502.822 530.443,-499.554 520.527,-495.823 520.364,-502.822"/>
+</g>
+<!-- 2,4->1,2 -->
+<g id="edge37" class="edge"><title>2,4->1,2</title>
+<path fill="none" stroke="#ebb035" d="M555.885,-539.058C534.308,-580.734 500,-647 500,-647 500,-647 274,-647 274,-647 274,-647 260.183,-638.483 244.571,-628.859"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="246.139,-625.714 235.79,-623.446 242.466,-631.673 246.139,-625.714"/>
+</g>
+<!-- 2,4->2,3 -->
+<g id="edge39" class="edge"><title>2,4->2,3</title>
+<path fill="none" stroke="#218559" d="M530.443,-512.446C521.158,-512.745 511.225,-512.824 501.602,-512.681"/>
+<polygon fill="#218559" stroke="#218559" points="501.407,-509.175 491.327,-512.438 501.242,-516.173 501.407,-509.175"/>
+</g>
+<!-- 6,3 -->
+<g id="node42" class="node"><title>6,3</title>
+<ellipse fill="none" stroke="black" cx="697" cy="-410" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="677.5" y="-421.167" font-family="Times Roman,serif" font-size="10.00">6,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="675,-402 675,-416 720,-416 720,-402 675,-402"/>
+<text text-anchor="start" x="681" y="-406.667" font-family="Times Roman,serif" font-size="10.00">CGCAT</text>
+<polygon fill="#218559" stroke="#218559" points="675,-388 675,-402 720,-402 720,-388 675,-388"/>
+<text text-anchor="start" x="681" y="-392.667" font-family="Times Roman,serif" font-size="10.00">ATGCG</text>
+</g>
+<!-- 2,4->6,3 -->
+<g id="edge35" class="edge"><title>2,4->6,3</title>
+<path fill="none" stroke="#dd1e2f" d="M607.817,-484.447C622.387,-475.427 636,-467 636,-467 636,-467 647.721,-456.047 660.888,-443.744"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="663.369,-446.216 668.286,-436.831 658.59,-441.101 663.369,-446.216"/>
+</g>
+<!-- 5,1 -->
+<g id="node30" class="node"><title>5,1</title>
+<ellipse fill="none" stroke="black" cx="821" cy="-52" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="801.5" y="-63.1667" font-family="Times Roman,serif" font-size="10.00">5,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="799,-44 799,-58 844,-58 844,-44 799,-44"/>
+<text text-anchor="start" x="803.5" y="-48.6667" font-family="Times Roman,serif" font-size="10.00">AGGAG</text>
+<polygon fill="#218559" stroke="#218559" points="799,-30 799,-44 844,-44 844,-30 799,-30"/>
+<text text-anchor="start" x="806" y="-34.6667" font-family="Times Roman,serif" font-size="10.00">CTCCT</text>
+</g>
+<!-- 5,2 -->
+<g id="node31" class="node"><title>5,2</title>
+<ellipse fill="none" stroke="black" cx="945" cy="-52" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="925.5" y="-63.1667" font-family="Times Roman,serif" font-size="10.00">5,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="923,-44 923,-58 968,-58 968,-44 923,-44"/>
+<text text-anchor="start" x="928.5" y="-48.6667" font-family="Times Roman,serif" font-size="10.00">GGAGT</text>
+<polygon fill="#218559" stroke="#218559" points="923,-30 923,-44 968,-44 968,-30 923,-30"/>
+<text text-anchor="start" x="929.5" y="-34.6667" font-family="Times Roman,serif" font-size="10.00">ACTCC</text>
+</g>
+<!-- 5,1->5,2 -->
+<g id="edge113" class="edge"><title>5,1->5,2</title>
+<path fill="none" stroke="#dd1e2f" d="M863.327,-45.5616C872.601,-45.2583 882.531,-45.1764 892.159,-45.3159"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="892.364,-48.8215 902.443,-45.5541 892.527,-41.8234 892.364,-48.8215"/>
+</g>
+<!-- 4,2 -->
+<g id="node36" class="node"><title>4,2</title>
+<ellipse fill="none" stroke="black" cx="697" cy="-148" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="677.5" y="-159.167" font-family="Times Roman,serif" font-size="10.00">4,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="675,-140 675,-154 720,-154 720,-140 675,-140"/>
+<text text-anchor="start" x="681" y="-144.667" font-family="Times Roman,serif" font-size="10.00">TAGGA</text>
+<polygon fill="#218559" stroke="#218559" points="675,-126 675,-140 720,-140 720,-126 675,-126"/>
+<text text-anchor="start" x="682.5" y="-130.667" font-family="Times Roman,serif" font-size="10.00">TCCTA</text>
+</g>
+<!-- 5,1->4,2 -->
+<g id="edge115" class="edge"><title>5,1->4,2</title>
+<path fill="none" stroke="#218559" d="M790.857,-77.6953C775.489,-90.7961 760,-104 760,-104 760,-104 750.399,-110.705 738.693,-118.881"/>
+<polygon fill="#218559" stroke="#218559" points="736.539,-116.116 730.345,-124.712 740.547,-121.855 736.539,-116.116"/>
+</g>
+<!-- 5,2->5,1 -->
+<g id="edge119" class="edge"><title>5,2->5,1</title>
+<path fill="none" stroke="#218559" d="M902.443,-58.4459C893.158,-58.7455 883.225,-58.8237 873.602,-58.6806"/>
+<polygon fill="#218559" stroke="#218559" points="873.407,-55.1751 863.327,-58.4384 873.242,-62.1732 873.407,-55.1751"/>
+</g>
+<!-- 5,3 -->
+<g id="node32" class="node"><title>5,3</title>
+<ellipse fill="none" stroke="black" cx="1069" cy="-52" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="1049.5" y="-63.1667" font-family="Times Roman,serif" font-size="10.00">5,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1047,-44 1047,-58 1092,-58 1092,-44 1047,-44"/>
+<text text-anchor="start" x="1053.5" y="-48.6667" font-family="Times Roman,serif" font-size="10.00">GAGTT</text>
+<polygon fill="#218559" stroke="#218559" points="1047,-30 1047,-44 1092,-44 1092,-30 1047,-30"/>
+<text text-anchor="start" x="1053.5" y="-34.6667" font-family="Times Roman,serif" font-size="10.00">AACTC</text>
+</g>
+<!-- 5,2->5,3 -->
+<g id="edge117" class="edge"><title>5,2->5,3</title>
+<path fill="none" stroke="#dd1e2f" d="M987.327,-45.5616C996.601,-45.2583 1006.53,-45.1764 1016.16,-45.3159"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1016.36,-48.8215 1026.44,-45.5541 1016.53,-41.8234 1016.36,-48.8215"/>
+</g>
+<!-- 5,3->5,2 -->
+<g id="edge125" class="edge"><title>5,3->5,2</title>
+<path fill="none" stroke="#218559" d="M1026.44,-58.4459C1017.16,-58.7455 1007.23,-58.8237 997.602,-58.6806"/>
+<polygon fill="#218559" stroke="#218559" points="997.407,-55.1751 987.327,-58.4384 997.242,-62.1732 997.407,-55.1751"/>
+</g>
+<!-- 5,4 -->
+<g id="node33" class="node"><title>5,4</title>
+<ellipse fill="none" stroke="black" cx="1193" cy="-52" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="1173.5" y="-63.1667" font-family="Times Roman,serif" font-size="10.00">5,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1171,-44 1171,-58 1216,-58 1216,-44 1171,-44"/>
+<text text-anchor="start" x="1177.5" y="-48.6667" font-family="Times Roman,serif" font-size="10.00">AGTTG</text>
+<polygon fill="#218559" stroke="#218559" points="1171,-30 1171,-44 1216,-44 1216,-30 1171,-30"/>
+<text text-anchor="start" x="1177" y="-34.6667" font-family="Times Roman,serif" font-size="10.00">CAACT</text>
+</g>
+<!-- 5,3->5,4 -->
+<g id="edge121" class="edge"><title>5,3->5,4</title>
+<path fill="none" stroke="#dd1e2f" d="M1111.33,-45.5616C1120.6,-45.2583 1130.53,-45.1764 1140.16,-45.3159"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1140.36,-48.8215 1150.44,-45.5541 1140.53,-41.8234 1140.36,-48.8215"/>
+</g>
+<!-- 4,4 -->
+<g id="node38" class="node"><title>4,4</title>
+<ellipse fill="none" stroke="black" cx="945" cy="-148" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="925.5" y="-159.167" font-family="Times Roman,serif" font-size="10.00">4,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="923,-140 923,-154 968,-154 968,-140 923,-140"/>
+<text text-anchor="start" x="928.5" y="-144.667" font-family="Times Roman,serif" font-size="10.00">GGAGT</text>
+<polygon fill="#218559" stroke="#218559" points="923,-126 923,-140 968,-140 968,-126 923,-126"/>
+<text text-anchor="start" x="929.5" y="-130.667" font-family="Times Roman,serif" font-size="10.00">ACTCC</text>
+</g>
+<!-- 5,3->4,4 -->
+<g id="edge123" class="edge"><title>5,3->4,4</title>
+<path fill="none" stroke="#218559" d="M1041.36,-80.0975C1025.22,-96.502 1008,-114 1008,-114 1008,-114 1000.24,-118.188 990.168,-123.624"/>
+<polygon fill="#218559" stroke="#218559" points="988.42,-120.59 981.282,-128.419 991.745,-126.75 988.42,-120.59"/>
+</g>
+<!-- 5,4->5,3 -->
+<g id="edge127" class="edge"><title>5,4->5,3</title>
+<path fill="none" stroke="#218559" d="M1150.44,-58.4459C1141.16,-58.7455 1131.23,-58.8237 1121.6,-58.6806"/>
+<polygon fill="#218559" stroke="#218559" points="1121.41,-55.1751 1111.33,-58.4384 1121.24,-62.1732 1121.41,-55.1751"/>
+</g>
+<!-- 4,1->3,2 -->
+<g id="edge43" class="edge"><title>4,1->3,2</title>
+<path fill="none" stroke="#218559" d="M541.015,-172.762C525.051,-185.122 505.587,-200.19 488.68,-213.28"/>
+<polygon fill="#218559" stroke="#218559" points="486.376,-210.637 480.611,-219.527 490.661,-216.173 486.376,-210.637"/>
+</g>
+<!-- 4,1->4,2 -->
+<g id="edge41" class="edge"><title>4,1->4,2</title>
+<path fill="none" stroke="#dd1e2f" d="M615.327,-141.562C624.601,-141.258 634.531,-141.176 644.159,-141.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="644.364,-144.822 654.443,-141.554 644.527,-137.823 644.364,-144.822"/>
+</g>
+<!-- 4,2->5,1 -->
+<g id="edge45" class="edge"><title>4,2->5,1</title>
+<path fill="none" stroke="#dd1e2f" d="M728.611,-123.527C744.521,-111.21 763.976,-96.1473 780.924,-83.0268"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="783.251,-85.6517 789.015,-76.7624 778.965,-80.1166 783.251,-85.6517"/>
+</g>
+<!-- 4,2->4,1 -->
+<g id="edge49" class="edge"><title>4,2->4,1</title>
+<path fill="none" stroke="#218559" d="M654.443,-154.446C645.158,-154.745 635.225,-154.824 625.602,-154.681"/>
+<polygon fill="#218559" stroke="#218559" points="625.407,-151.175 615.327,-154.438 625.242,-158.173 625.407,-151.175"/>
+</g>
+<!-- 4,2->4,3 -->
+<g id="edge47" class="edge"><title>4,2->4,3</title>
+<path fill="none" stroke="#dd1e2f" d="M739.327,-141.562C748.601,-141.258 758.531,-141.176 768.159,-141.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="768.364,-144.822 778.443,-141.554 768.527,-137.823 768.364,-144.822"/>
+</g>
+<!-- 4,3->3,4 -->
+<g id="edge53" class="edge"><title>4,3->3,4</title>
+<path fill="none" stroke="#218559" d="M789.015,-172.762C773.051,-185.122 753.587,-200.19 736.68,-213.28"/>
+<polygon fill="#218559" stroke="#218559" points="734.376,-210.637 728.611,-219.527 738.661,-216.173 734.376,-210.637"/>
+</g>
+<!-- 4,3->4,2 -->
+<g id="edge55" class="edge"><title>4,3->4,2</title>
+<path fill="none" stroke="#218559" d="M778.443,-154.446C769.158,-154.745 759.225,-154.824 749.602,-154.681"/>
+<polygon fill="#218559" stroke="#218559" points="749.407,-151.175 739.327,-154.438 749.242,-158.173 749.407,-151.175"/>
+</g>
+<!-- 4,3->4,4 -->
+<g id="edge51" class="edge"><title>4,3->4,4</title>
+<path fill="none" stroke="#dd1e2f" d="M863.327,-141.562C872.601,-141.258 882.531,-141.176 892.159,-141.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="892.364,-144.822 902.443,-141.554 892.527,-137.823 892.364,-144.822"/>
+</g>
+<!-- 4,4->5,3 -->
+<g id="edge57" class="edge"><title>4,4->5,3</title>
+<path fill="none" stroke="#dd1e2f" d="M976.611,-123.527C992.521,-111.21 1011.98,-96.1473 1028.92,-83.0268"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1031.25,-85.6517 1037.02,-76.7624 1026.97,-80.1166 1031.25,-85.6517"/>
+</g>
+<!-- 4,4->4,3 -->
+<g id="edge59" class="edge"><title>4,4->4,3</title>
+<path fill="none" stroke="#218559" d="M902.443,-154.446C893.158,-154.745 883.225,-154.824 873.602,-154.681"/>
+<polygon fill="#218559" stroke="#218559" points="873.407,-151.175 863.327,-154.438 873.242,-158.173 873.407,-151.175"/>
+</g>
+<!-- 6,1->2,2 -->
+<g id="edge63" class="edge"><title>6,1->2,2</title>
+<path fill="none" stroke="#218559" d="M415.655,-433.288C400.544,-443.843 386,-454 386,-454 386,-454 375.378,-463.055 362.998,-473.608"/>
+<polygon fill="#218559" stroke="#218559" points="360.482,-471.154 355.143,-480.305 365.023,-476.481 360.482,-471.154"/>
+</g>
+<!-- 6,2 -->
+<g id="node41" class="node"><title>6,2</title>
+<ellipse fill="none" stroke="black" cx="573" cy="-410" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="553.5" y="-421.167" font-family="Times Roman,serif" font-size="10.00">6,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="551,-402 551,-416 596,-416 596,-402 551,-402"/>
+<text text-anchor="start" x="555.5" y="-406.667" font-family="Times Roman,serif" font-size="10.00">GCGCA</text>
+<polygon fill="#218559" stroke="#218559" points="551,-388 551,-402 596,-402 596,-388 551,-388"/>
+<text text-anchor="start" x="556.5" y="-392.667" font-family="Times Roman,serif" font-size="10.00">TGCGC</text>
+</g>
+<!-- 6,1->6,2 -->
+<g id="edge61" class="edge"><title>6,1->6,2</title>
+<path fill="none" stroke="#dd1e2f" d="M491.327,-403.562C500.601,-403.258 510.531,-403.176 520.159,-403.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="520.364,-406.822 530.443,-403.554 520.527,-399.823 520.364,-406.822"/>
+</g>
+<!-- 6,2->6,1 -->
+<g id="edge67" class="edge"><title>6,2->6,1</title>
+<path fill="none" stroke="#218559" d="M530.443,-416.446C521.158,-416.745 511.225,-416.824 501.602,-416.681"/>
+<polygon fill="#218559" stroke="#218559" points="501.407,-413.175 491.327,-416.438 501.242,-420.173 501.407,-413.175"/>
+</g>
+<!-- 6,2->6,3 -->
+<g id="edge65" class="edge"><title>6,2->6,3</title>
+<path fill="none" stroke="#dd1e2f" d="M615.327,-403.562C624.601,-403.258 634.531,-403.176 644.159,-403.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="644.364,-406.822 654.443,-403.554 644.527,-399.823 644.364,-406.822"/>
+</g>
+<!-- 6,3->2,4 -->
+<g id="edge71" class="edge"><title>6,3->2,4</title>
+<path fill="none" stroke="#218559" d="M665.015,-434.762C649.051,-447.122 629.587,-462.19 612.68,-475.28"/>
+<polygon fill="#218559" stroke="#218559" points="610.376,-472.637 604.611,-481.527 614.661,-478.173 610.376,-472.637"/>
+</g>
+<!-- 6,3->6,2 -->
+<g id="edge73" class="edge"><title>6,3->6,2</title>
+<path fill="none" stroke="#218559" d="M654.443,-416.446C645.158,-416.745 635.225,-416.824 625.602,-416.681"/>
+<polygon fill="#218559" stroke="#218559" points="625.407,-413.175 615.327,-416.438 625.242,-420.173 625.407,-413.175"/>
+</g>
+<!-- 6,4 -->
+<g id="node43" class="node"><title>6,4</title>
+<ellipse fill="none" stroke="black" cx="821" cy="-410" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="801.5" y="-421.167" font-family="Times Roman,serif" font-size="10.00">6,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="799,-402 799,-416 844,-416 844,-402 799,-402"/>
+<text text-anchor="start" x="806" y="-406.667" font-family="Times Roman,serif" font-size="10.00">GCATT</text>
+<polygon fill="#218559" stroke="#218559" points="799,-388 799,-402 844,-402 844,-388 799,-388"/>
+<text text-anchor="start" x="805" y="-392.667" font-family="Times Roman,serif" font-size="10.00">AATGC</text>
+</g>
+<!-- 6,3->6,4 -->
+<g id="edge69" class="edge"><title>6,3->6,4</title>
+<path fill="none" stroke="#dd1e2f" d="M739.327,-403.562C748.601,-403.258 758.531,-403.176 768.159,-403.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="768.364,-406.822 778.443,-403.554 768.527,-399.823 768.364,-406.822"/>
+</g>
+<!-- 6,4->6,3 -->
+<g id="edge75" class="edge"><title>6,4->6,3</title>
+<path fill="none" stroke="#218559" d="M778.443,-416.446C769.158,-416.745 759.225,-416.824 749.602,-416.681"/>
+<polygon fill="#218559" stroke="#218559" points="749.407,-413.175 739.327,-416.438 749.242,-420.173 749.407,-413.175"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq2/.part-0.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq2/.part-0.crc
new file mode 100644
index 0000000..2372965
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq2/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq2/.part-1.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq2/.part-1.crc
new file mode 100644
index 0000000..dc540d5
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq2/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq2/part-0 b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq2/part-0
new file mode 100755
index 0000000..f05a165
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq2/part-0
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq2/part-1 b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq2/part-1
new file mode 100755
index 0000000..54201ec
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/synthetic/walk_random_seq2/part-1
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/tipremove/fr_with_tip.txt b/genomix/genomix-hadoop/src/test/resources/input/graphs/tipremove/fr_with_tip.txt
new file mode 100644
index 0000000..84f6828
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/tipremove/fr_with_tip.txt
@@ -0,0 +1,6 @@
+((2,1) [(2,3)] [] [] [] GGAATA) (null)
+((2,3) [(2,4)] [] [] [(2,1)] AATAC) (null)
+((2,4) [] [(1,2)] [] [(2,3)] ATACG) (null)
+((1,1) [(1,2)] [] [] [] AACGT) (null)
+((1,2) [(1,3)] [(2,4)] [] [(1,1)] ACGTA) (null)
+((1,3) [] [] [] [(1,2)] CGTATA) (null)
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/tipremove/fr_with_tip.txt.svg b/genomix/genomix-hadoop/src/test/resources/input/graphs/tipremove/fr_with_tip.txt.svg
new file mode 100644
index 0000000..9f13c3f
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/tipremove/fr_with_tip.txt.svg
@@ -0,0 +1,183 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: fr_with_tip_txt Pages: 1 -->
+<svg width="640pt" height="417pt"
+ viewBox="0.00 0.00 640.00 417.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 413)">
+<title>fr_with_tip_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-413 637,-413 637,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-8 49,-209 211,-209 211,-8 49,-8"/>
+<text text-anchor="middle" x="130" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="274,-313 274,-401 624,-401 624,-313 274,-313"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="8,-217 8,-305 376,-305 376,-217 8,-217"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.0074,-157C74.8673,-157 156.744,-157 188.46,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="188.862,-160.5 198.862,-157 188.861,-153.5 188.862,-160.5"/>
+<text text-anchor="middle" x="130" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.0074,-116C74.8673,-116 156.744,-116 188.46,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="188.862,-119.5 198.862,-116 188.861,-112.5 188.862,-119.5"/>
+<text text-anchor="middle" x="130" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.0074,-75C74.8673,-75 156.744,-75 188.46,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="188.862,-78.5001 198.862,-75 188.861,-71.5001 188.862,-78.5001"/>
+<text text-anchor="middle" x="130" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.0074,-34C74.8673,-34 156.744,-34 188.46,-34"/>
+<polygon fill="#218559" stroke="#218559" points="188.862,-37.5001 198.862,-34 188.861,-30.5001 188.862,-37.5001"/>
+<text text-anchor="middle" x="130" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-357" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-368.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-349 303,-363 348,-363 348,-349 303,-349"/>
+<text text-anchor="start" x="309" y="-353.667" font-family="Times Roman,serif" font-size="10.00">AACGT</text>
+<polygon fill="#218559" stroke="#218559" points="303,-335 303,-349 348,-349 348,-335 303,-335"/>
+<text text-anchor="start" x="310" y="-339.667" font-family="Times Roman,serif" font-size="10.00">ACGTT</text>
+</g>
+<!-- 1,2 -->
+<g id="node16" class="node"><title>1,2</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-357" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-368.167" font-family="Times Roman,serif" font-size="10.00">1,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-349 427,-363 472,-363 472,-349 427,-349"/>
+<text text-anchor="start" x="433.5" y="-353.667" font-family="Times Roman,serif" font-size="10.00">ACGTA</text>
+<polygon fill="#218559" stroke="#218559" points="427,-335 427,-349 472,-349 472,-335 427,-335"/>
+<text text-anchor="start" x="434" y="-339.667" font-family="Times Roman,serif" font-size="10.00">TACGT</text>
+</g>
+<!-- 1,1->1,2 -->
+<g id="edge23" class="edge"><title>1,1->1,2</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-350.562C376.601,-350.258 386.531,-350.176 396.159,-350.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-353.822 406.443,-350.554 396.527,-346.823 396.364,-353.822"/>
+</g>
+<!-- 1,2->1,1 -->
+<g id="edge29" class="edge"><title>1,2->1,1</title>
+<path fill="none" stroke="#218559" d="M406.443,-363.446C397.158,-363.745 387.225,-363.824 377.602,-363.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-360.175 367.327,-363.438 377.242,-367.173 377.407,-360.175"/>
+</g>
+<!-- 1,3 -->
+<g id="node17" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="573" cy="-357" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="553.5" y="-368.167" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="551,-349 551,-363 596,-363 596,-349 551,-349"/>
+<text text-anchor="start" x="555.5" y="-353.667" font-family="Times Roman,serif" font-size="10.00">CGTATA</text>
+<polygon fill="#218559" stroke="#218559" points="551,-335 551,-349 596,-349 596,-335 551,-335"/>
+<text text-anchor="start" x="555.5" y="-339.667" font-family="Times Roman,serif" font-size="10.00">TATACG</text>
+</g>
+<!-- 1,2->1,3 -->
+<g id="edge25" class="edge"><title>1,2->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M491.327,-350.562C500.601,-350.258 510.531,-350.176 520.159,-350.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="520.364,-353.822 530.443,-350.554 520.527,-346.823 520.364,-353.822"/>
+</g>
+<!-- 2,4 -->
+<g id="node21" class="node"><title>2,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-253 303,-267 348,-267 348,-253 303,-253"/>
+<text text-anchor="start" x="309.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">ATACG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-239 303,-253 348,-253 348,-239 303,-239"/>
+<text text-anchor="start" x="310.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">CGTAT</text>
+</g>
+<!-- 1,2->2,4 -->
+<g id="edge27" class="edge"><title>1,2->2,4</title>
+<path fill="none" stroke="#ebb035" d="M415.655,-333.712C400.544,-323.157 386,-313 386,-313 386,-313 375.378,-303.945 362.998,-293.392"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="365.023,-290.519 355.143,-286.695 360.482,-295.846 365.023,-290.519"/>
+</g>
+<!-- 1,3->1,2 -->
+<g id="edge31" class="edge"><title>1,3->1,2</title>
+<path fill="none" stroke="#218559" d="M530.443,-363.446C521.158,-363.745 511.225,-363.824 501.602,-363.681"/>
+<polygon fill="#218559" stroke="#218559" points="501.407,-360.175 491.327,-363.438 501.242,-367.173 501.407,-360.175"/>
+</g>
+<!-- 2,1 -->
+<g id="node19" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-253 37,-267 82,-267 82,-253 37,-253"/>
+<text text-anchor="start" x="39.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">GGAATA</text>
+<polygon fill="#218559" stroke="#218559" points="37,-239 37,-253 82,-253 82,-239 37,-239"/>
+<text text-anchor="start" x="42.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TATTCC</text>
+</g>
+<!-- 2,3 -->
+<g id="node20" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-253 179,-267 224,-267 224,-253 179,-253"/>
+<text text-anchor="start" x="186" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AATAC</text>
+<polygon fill="#218559" stroke="#218559" points="179,-239 179,-253 224,-253 224,-239 179,-239"/>
+<text text-anchor="start" x="187.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">GTATT</text>
+</g>
+<!-- 2,1->2,3 -->
+<g id="edge13" class="edge"><title>2,1->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-254.755C116.207,-254.208 132.729,-254.105 148.049,-254.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-257.957 158.425,-254.756 148.534,-250.96 148.326,-257.957"/>
+</g>
+<!-- 2,3->2,1 -->
+<g id="edge17" class="edge"><title>2,3->2,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-267.244C143.825,-267.792 127.305,-267.895 111.982,-267.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-264.043 101.605,-267.245 111.497,-271.04 111.704,-264.043"/>
+</g>
+<!-- 2,3->2,4 -->
+<g id="edge15" class="edge"><title>2,3->2,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-254.562C252.601,-254.258 262.531,-254.176 272.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-257.822 282.443,-254.554 272.527,-250.823 272.364,-257.822"/>
+</g>
+<!-- 2,4->1,2 -->
+<g id="edge19" class="edge"><title>2,4->1,2</title>
+<path fill="none" stroke="#ebb035" d="M356.611,-285.473C372.521,-297.79 391.976,-312.853 408.924,-325.973"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="406.965,-328.883 417.015,-332.238 411.251,-323.348 406.965,-328.883"/>
+</g>
+<!-- 2,4->2,3 -->
+<g id="edge21" class="edge"><title>2,4->2,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-267.446C273.158,-267.745 263.225,-267.824 253.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-264.175 243.327,-267.438 253.242,-271.173 253.407,-264.175"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/tipremove/fr_with_tip/.part-0.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/tipremove/fr_with_tip/.part-0.crc
new file mode 100644
index 0000000..35486b9
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/tipremove/fr_with_tip/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/tipremove/fr_with_tip/.part-1.crc b/genomix/genomix-hadoop/src/test/resources/input/graphs/tipremove/fr_with_tip/.part-1.crc
new file mode 100644
index 0000000..53a0483
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/tipremove/fr_with_tip/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/tipremove/fr_with_tip/part-0 b/genomix/genomix-hadoop/src/test/resources/input/graphs/tipremove/fr_with_tip/part-0
new file mode 100755
index 0000000..6077d6e
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/tipremove/fr_with_tip/part-0
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/graphs/tipremove/fr_with_tip/part-1 b/genomix/genomix-hadoop/src/test/resources/input/graphs/tipremove/fr_with_tip/part-1
new file mode 100755
index 0000000..5e8d3f9
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/graphs/tipremove/fr_with_tip/part-1
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/five_ff_bubbles.txt b/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/five_ff_bubbles.txt
new file mode 100644
index 0000000..63a8e55
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/five_ff_bubbles.txt
@@ -0,0 +1,4 @@
+1 ACGTCCTT
+2 CGTCCTTA
+3 GTCCTTAG
+4 GTCCTTAG
diff --git a/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/five_length1_bubbles.txt b/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/five_length1_bubbles.txt
new file mode 100644
index 0000000..13190dd
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/five_length1_bubbles.txt
@@ -0,0 +1,6 @@
+1 AATAGAAG
+2 AATAGAAG
+3 AATAGAAG
+4 AATAGAAG
+5 AATAGAAG
+6 AGAAGAAG
diff --git a/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/fr_bubble.txt b/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/fr_bubble.txt
new file mode 100644
index 0000000..4026c2c
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/fr_bubble.txt
@@ -0,0 +1,2 @@
+1 AAACGTAT
+2 GGAATACG
diff --git a/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/fr_bubble_and_ff_bubble.txt b/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/fr_bubble_and_ff_bubble.txt
new file mode 100644
index 0000000..e166418
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/fr_bubble_and_ff_bubble.txt
@@ -0,0 +1,3 @@
+1 AAACGTAT
+2 CGTATTCC
+3 GGAATACG
diff --git a/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/rf_bubble.txt b/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/rf_bubble.txt
new file mode 100644
index 0000000..154dc8c
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/rf_bubble.txt
@@ -0,0 +1,3 @@
+1 ACGGTGTA
+2 ACCGTGGT
+
diff --git a/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/small_bubble.txt b/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/small_bubble.txt
new file mode 100644
index 0000000..3e3bf7b
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/small_bubble.txt
@@ -0,0 +1,2 @@
+1 AATAGAAG
+2 AGAAGCCC
diff --git a/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/tip_and_bubble.txt b/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/tip_and_bubble.txt
new file mode 100644
index 0000000..958ccff
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/reads/bubblemerge/tip_and_bubble.txt
@@ -0,0 +1,3 @@
+1 AATAGAAG
+2 ATAGACTA
+3 TAGACTAC
diff --git a/genomix/genomix-hadoop/src/test/resources/input/reads/pathmerge/singleread.txt b/genomix/genomix-hadoop/src/test/resources/input/reads/pathmerge/singleread.txt
new file mode 100644
index 0000000..63a95ad
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/reads/pathmerge/singleread.txt
@@ -0,0 +1 @@
+1 AATAGAAG
diff --git a/genomix/genomix-hadoop/src/test/resources/input/reads/synthetic/walk_random_seq1.txt b/genomix/genomix-hadoop/src/test/resources/input/reads/synthetic/walk_random_seq1.txt
new file mode 100644
index 0000000..35f1c49
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/reads/synthetic/walk_random_seq1.txt
@@ -0,0 +1,37 @@
+1 TAGTGCGA
+2 CCTCGCAC
+3 GCTAGGGT
+4 GAGGGTTG
+5 AGCAACCC
+6 GTTGCTGA
+7 TTTCAGCA
+8 CTGAAATC
+9 CAGATTTC
+10 GGCAGATT
+11 CTGGCAGA
+12 CTCTGGCA
+13 ATCTCTGG
+14 GCATCTCT
+15 CGGCATCT
+16 AACGGCAT
+17 GAAACGGC
+18 CGTTTCAA
+19 TATTGAAA
+20 TCAATACG
+21 AATACGTG
+22 TACGTGAA
+23 GTTTCACG
+24 TGAAACTA
+25 AAACTATT
+26 GTAATAGT
+27 TATTACGT
+28 TTACGTCA
+29 CATGACGT
+30 GTCATGAC
+31 GCGTCATG
+32 AAGCGTCA
+33 TCGCTTAA
+34 GCTTAAGC
+35 TCGCTTAA
+36 AAGCGTGT
+37 CCACACGC
diff --git a/genomix/genomix-hadoop/src/test/resources/input/reads/synthetic/walk_random_seq2.txt b/genomix/genomix-hadoop/src/test/resources/input/reads/synthetic/walk_random_seq2.txt
new file mode 100644
index 0000000..d65f7c0
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/reads/synthetic/walk_random_seq2.txt
@@ -0,0 +1,6 @@
+1 AATGCGCT
+2 CTAGCGCA
+3 CGCTAGGA
+4 CTAGGAGT
+5 AGGAGTTG
+6 AGCGCATT
diff --git a/genomix/genomix-hadoop/src/test/resources/input/reads/tipremove/fr_with_tip.txt b/genomix/genomix-hadoop/src/test/resources/input/reads/tipremove/fr_with_tip.txt
new file mode 100644
index 0000000..b6e1640
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/input/reads/tipremove/fr_with_tip.txt
@@ -0,0 +1,2 @@
+1 AACGTATA
+2 GGAATACG
diff --git a/genomix/genomix-hyracks/HyracksCodeFormatProfile.xml b/genomix/genomix-hyracks/HyracksCodeFormatProfile.xml
new file mode 100644
index 0000000..733ca5c
--- /dev/null
+++ b/genomix/genomix-hyracks/HyracksCodeFormatProfile.xml
@@ -0,0 +1,784 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<profiles version="11">
+ <profile kind="CodeFormatterProfile" name="HyracksCodeFormatProfile"
+ version="11">
+ <setting
+ id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags"
+ value="insert" />
+ <setting id="org.eclipse.jdt.core.formatter.disabling_tag"
+ value="@formatter:off" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration"
+ value="end_of_line" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries"
+ value="true" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation"
+ value="do not insert" />
+ <setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field"
+ value="0" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while"
+ value="do not insert" />
+ <setting id="org.eclipse.jdt.core.formatter.use_on_off_tags"
+ value="false" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line"
+ value="false" />
+ <setting id="org.eclipse.jdt.core.formatter.insert_space_after_ellipsis"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases"
+ value="true" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields"
+ value="16" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer"
+ value="16" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression"
+ value="80" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_binary_operator"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer"
+ value="end_of_line" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while"
+ value="insert" />
+ <setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package"
+ value="1" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters"
+ value="insert" />
+ <setting id="org.eclipse.jdt.core.formatter.continuation_indentation"
+ value="2" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation"
+ value="16" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces"
+ value="do not insert" />
+ <setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk"
+ value="1" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_binary_operator"
+ value="insert" />
+ <setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package"
+ value="0" />
+ <setting id="org.eclipse.jdt.core.compiler.source" value="1.5" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration"
+ value="do not insert" />
+ <setting id="org.eclipse.jdt.core.formatter.comment.format_line_comments"
+ value="false" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations"
+ value="insert" />
+ <setting id="org.eclipse.jdt.core.formatter.join_wrapped_lines"
+ value="true" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call"
+ value="16" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type"
+ value="1" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.align_type_members_on_columns"
+ value="false" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation"
+ value="16" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.comment.indent_parameter_description"
+ value="true" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment"
+ value="false" />
+ <setting id="org.eclipse.jdt.core.formatter.lineSplit" value="120" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration"
+ value="0" />
+ <setting id="org.eclipse.jdt.core.formatter.indentation.size"
+ value="4" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration"
+ value="do not insert" />
+ <setting id="org.eclipse.jdt.core.formatter.enabling_tag"
+ value="@formatter:on" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration"
+ value="16" />
+ <setting id="org.eclipse.jdt.core.formatter.alignment_for_assignment"
+ value="0" />
+ <setting id="org.eclipse.jdt.core.compiler.problem.assertIdentifier"
+ value="error" />
+ <setting id="org.eclipse.jdt.core.formatter.tabulation.char"
+ value="space" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body"
+ value="true" />
+ <setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method"
+ value="1" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested"
+ value="true" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line"
+ value="false" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration"
+ value="16" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration"
+ value="end_of_line" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_method_declaration"
+ value="0" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if"
+ value="do not insert" />
+ <setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch"
+ value="end_of_line" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments"
+ value="do not insert" />
+ <setting id="org.eclipse.jdt.core.compiler.problem.enumIdentifier"
+ value="error" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch"
+ value="true" />
+ <setting id="org.eclipse.jdt.core.formatter.insert_space_before_ellipsis"
+ value="do not insert" />
+ <setting id="org.eclipse.jdt.core.formatter.brace_position_for_block"
+ value="end_of_line" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration"
+ value="end_of_line" />
+ <setting id="org.eclipse.jdt.core.formatter.compact_else_if"
+ value="true" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column"
+ value="true" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant"
+ value="end_of_line" />
+ <setting id="org.eclipse.jdt.core.formatter.comment.indent_root_tags"
+ value="true" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression"
+ value="do not insert" />
+ <setting id="org.eclipse.jdt.core.formatter.tabulation.size"
+ value="4" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment"
+ value="true" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration"
+ value="16" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator"
+ value="insert" />
+ <setting id="org.eclipse.jdt.core.formatter.indent_empty_lines"
+ value="false" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case"
+ value="end_of_line" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve"
+ value="1" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression"
+ value="16" />
+ <setting id="org.eclipse.jdt.core.compiler.compliance" value="1.5" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer"
+ value="2" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression"
+ value="16" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line"
+ value="false" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration"
+ value="insert" />
+ <setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line"
+ value="false" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration"
+ value="16" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression"
+ value="16" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration"
+ value="end_of_line" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while"
+ value="do not insert" />
+ <setting id="org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode"
+ value="enabled" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line"
+ value="false" />
+ <setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_label"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant"
+ value="48" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments"
+ value="true" />
+ <setting id="org.eclipse.jdt.core.formatter.comment.line_length"
+ value="9999" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.blank_lines_between_import_groups"
+ value="1" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration"
+ value="end_of_line" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body"
+ value="0" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header"
+ value="true" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration"
+ value="do not insert" />
+ <setting id="org.eclipse.jdt.core.formatter.wrap_before_binary_operator"
+ value="true" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header"
+ value="true" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations"
+ value="1" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block"
+ value="true" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration"
+ value="16" />
+ <setting id="org.eclipse.jdt.core.formatter.join_lines_in_comments"
+ value="false" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations"
+ value="do not insert" />
+ <setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if"
+ value="16" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases"
+ value="true" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration"
+ value="do not insert" />
+ <setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports"
+ value="1" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert"
+ value="insert" />
+ <setting id="org.eclipse.jdt.core.formatter.comment.format_html"
+ value="true" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration"
+ value="16" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator"
+ value="do not insert" />
+ <setting id="org.eclipse.jdt.core.formatter.comment.format_source_code"
+ value="true" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration"
+ value="16" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer"
+ value="insert" />
+ <setting id="org.eclipse.jdt.core.compiler.codegen.targetPlatform"
+ value="1.5" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations"
+ value="false" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation"
+ value="0" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_member"
+ value="insert" />
+ <setting id="org.eclipse.jdt.core.formatter.comment.format_header"
+ value="false" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.comment.format_block_comments"
+ value="false" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant"
+ value="do not insert" />
+ <setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants"
+ value="49" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header"
+ value="true" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration"
+ value="end_of_line" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries"
+ value="true" />
+ <setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports"
+ value="1" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header"
+ value="true" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for"
+ value="insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column"
+ value="false" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments"
+ value="do not insert" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column"
+ value="false" />
+ <setting
+ id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line"
+ value="false" />
+ </profile>
+</profiles>
diff --git a/genomix/genomix-hyracks/pom.xml b/genomix/genomix-hyracks/pom.xml
new file mode 100644
index 0000000..69d5839
--- /dev/null
+++ b/genomix/genomix-hyracks/pom.xml
@@ -0,0 +1,266 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>genomix-hyracks</artifactId>
+ <name>genomix-hyracks</name>
+
+ <parent>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>genomix</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ </parent>
+
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ </properties>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>appassembler-maven-plugin</artifactId>
+ <executions>
+ <execution>
+ <configuration>
+ <programs>
+ <program>
+ <mainClass>edu.uci.ics.genomix.hyracks.driver.Driver</mainClass>
+ <name>genomix</name>
+ </program>
+ <program>
+ <mainClass>edu.uci.ics.hyracks.control.cc.CCDriver</mainClass>
+ <name>genomixcc</name>
+ </program>
+ <program>
+ <mainClass>edu.uci.ics.hyracks.control.nc.NCDriver</mainClass>
+ <name>genomixnc</name>
+ </program>
+ </programs>
+ <repositoryLayout>flat</repositoryLayout>
+ <repositoryName>lib</repositoryName>
+ </configuration>
+ <phase>package</phase>
+ <goals>
+ <goal>assemble</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <version>2.2-beta-5</version>
+ <configuration>
+ <descriptorRefs>
+ <descriptorRef>jar-with-dependencies</descriptorRef>
+ </descriptorRefs>
+ </configuration>
+ <executions>
+ <execution>
+ <configuration>
+ <descriptors>
+ <descriptor>src/main/assembly/binary-assembly.xml</descriptor>
+ </descriptors>
+ </configuration>
+ <phase>package</phase>
+ <goals>
+ <goal>attached</goal>
+ </goals>
+ </execution>
+ <execution>
+ <id>make-my-jar-with-dependencies</id>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>2.7.2</version>
+ <configuration>
+ <forkMode>pertest</forkMode>
+ <argLine>-enableassertions -Xmx512m -XX:MaxPermSize=300m
+ -Dfile.encoding=UTF-8
+ -Djava.util.logging.config.file=src/test/resources/logging.properties</argLine>
+ <includes>
+ <include>**/*TestSuite.java</include>
+ <include>**/*Test.java</include>
+ </includes>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-clean-plugin</artifactId>
+ <configuration>
+ <filesets>
+ <fileset>
+ <directory>.</directory>
+ <includes>
+ <include>teststore*</include>
+ <include>edu*</include>
+ <include>actual*</include>
+ <include>build*</include>
+ <include>expect*</include>
+ <include>ClusterController*</include>
+ </includes>
+ </fileset>
+ </filesets>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-resources-plugin</artifactId>
+ <version>2.5</version>
+ <executions>
+ <execution>
+ <id>copy-scripts</id>
+ <!-- here the phase you need -->
+ <phase>package</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>target/appassembler/bin</outputDirectory>
+ <resources>
+ <resource>
+ <directory>src/main/resources/scripts</directory>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
+ <execution>
+ <id>copy-conf</id>
+ <!-- here the phase you need -->
+ <phase>package</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>target/appassembler/conf</outputDirectory>
+ <resources>
+ <resource>
+ <directory>src/main/resources/conf</directory>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-resources-plugin</artifactId>
+ <version>2.6</version>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.8.1</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-dataflow-std</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-api</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-dataflow-common</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-data-std</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-control-cc</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-control-nc</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.kenai.nbpwr</groupId>
+ <artifactId>org-apache-commons-io</artifactId>
+ <version>1.3.1-201002241208</version>
+ <type>nbm</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks.examples</groupId>
+ <artifactId>hyracks-integration-tests</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-ipc</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ <version>0.20.2</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>0.20.2</version>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-hdfs-core</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-hdfs-core</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>genomix-data</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+
+ </dependencies>
+</project>
diff --git a/genomix/genomix-hyracks/src/main/assembly/binary-assembly.xml b/genomix/genomix-hyracks/src/main/assembly/binary-assembly.xml
new file mode 100644
index 0000000..68d424a
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/assembly/binary-assembly.xml
@@ -0,0 +1,19 @@
+<assembly>
+ <id>binary-assembly</id>
+ <formats>
+ <format>zip</format>
+ <format>dir</format>
+ </formats>
+ <includeBaseDirectory>false</includeBaseDirectory>
+ <fileSets>
+ <fileSet>
+ <directory>target/appassembler/bin</directory>
+ <outputDirectory>bin</outputDirectory>
+ <fileMode>0755</fileMode>
+ </fileSet>
+ <fileSet>
+ <directory>target/appassembler/lib</directory>
+ <outputDirectory>lib</outputDirectory>
+ </fileSet>
+ </fileSets>
+</assembly>
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/ByteSerializerDeserializer.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/ByteSerializerDeserializer.java
new file mode 100644
index 0000000..257d5a3
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/ByteSerializerDeserializer.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.data.accessors;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
+public class ByteSerializerDeserializer implements ISerializerDeserializer<Byte> {
+
+ private static final long serialVersionUID = 1L;
+
+ public static final ByteSerializerDeserializer INSTANCE = new ByteSerializerDeserializer();
+
+ private ByteSerializerDeserializer() {
+ }
+
+ @Override
+ public Byte deserialize(DataInput in) throws HyracksDataException {
+ try {
+ return in.readByte();
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void serialize(Byte instance, DataOutput out) throws HyracksDataException {
+ try {
+ out.writeByte(instance.intValue());
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ public static byte getByte(byte[] bytes, int offset) {
+ return bytes[offset];
+ }
+
+ public static void putByte(byte val, byte[] bytes, int offset) {
+ bytes[offset] = val;
+ }
+
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerBinaryHashFunctionFamily.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerBinaryHashFunctionFamily.java
new file mode 100644
index 0000000..130d5ab
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerBinaryHashFunctionFamily.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.data.accessors;
+
+import edu.uci.ics.genomix.hyracks.data.primitive.KmerPointable;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFamily;
+
+public class KmerBinaryHashFunctionFamily implements IBinaryHashFunctionFamily {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public IBinaryHashFunction createBinaryHashFunction(final int seed) {
+
+ return new IBinaryHashFunction() {
+ private KmerPointable p = new KmerPointable();
+
+ @Override
+ public int hash(byte[] bytes, int offset, int length) {
+ if (length + offset >= bytes.length)
+ throw new IllegalStateException("out of bound");
+ p.set(bytes, offset, length);
+ int hash = p.hash() * (seed + 1);
+ if (hash < 0) {
+ hash = -(hash + 1);
+ }
+ return hash;
+ }
+ };
+ }
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerHashPartitioncomputerFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerHashPartitioncomputerFactory.java
new file mode 100644
index 0000000..f8d4f84
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerHashPartitioncomputerFactory.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.data.accessors;
+
+import java.nio.ByteBuffer;
+
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputerFactory;
+
+public class KmerHashPartitioncomputerFactory implements ITuplePartitionComputerFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ public static int hashBytes(byte[] bytes, int offset, int length) {
+ int hash = 1;
+ for (int i = offset; i < offset + length; i++)
+ hash = (31 * hash) + (int) bytes[i];
+ return hash;
+ }
+
+ @Override
+ public ITuplePartitionComputer createPartitioner() {
+ return new ITuplePartitionComputer() {
+ @Override
+ public int partition(IFrameTupleAccessor accessor, int tIndex, int nParts) {
+ int startOffset = accessor.getTupleStartOffset(tIndex);
+ int fieldOffset = accessor.getFieldStartOffset(tIndex, 0);
+ int slotLength = accessor.getFieldSlotsLength();
+ int fieldLength = accessor.getFieldLength(tIndex, 0);
+
+ ByteBuffer buf = accessor.getBuffer();
+
+ int hash = hashBytes(buf.array(), startOffset + fieldOffset + slotLength, fieldLength);
+ if (hash < 0) {
+ hash = -(hash + 1);
+ }
+
+ return hash % nParts;
+ }
+ };
+ }
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerNormarlizedComputerFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerNormarlizedComputerFactory.java
new file mode 100644
index 0000000..7dd3b14
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerNormarlizedComputerFactory.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.data.accessors;
+
+import edu.uci.ics.genomix.hyracks.data.primitive.KmerPointable;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+
+public class KmerNormarlizedComputerFactory implements INormalizedKeyComputerFactory {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+ return new INormalizedKeyComputer() {
+ /**
+ * read one int from Kmer, make sure this int is consistent whith Kmer compartor
+ */
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ return KmerPointable.getIntReverse(bytes, start, length);
+ }
+ };
+ }
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/ReadIDPartitionComputerFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/ReadIDPartitionComputerFactory.java
new file mode 100644
index 0000000..6773a73
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/ReadIDPartitionComputerFactory.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.data.accessors;
+
+import java.nio.ByteBuffer;
+
+import edu.uci.ics.genomix.data.Marshal;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputerFactory;
+
+public class ReadIDPartitionComputerFactory implements ITuplePartitionComputerFactory {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public ITuplePartitionComputer createPartitioner() {
+ return new ITuplePartitionComputer() {
+ @Override
+ public int partition(IFrameTupleAccessor accessor, int tIndex, int nParts) {
+ int startOffset = accessor.getTupleStartOffset(tIndex);
+ int fieldOffset = accessor.getFieldStartOffset(tIndex, 0);
+ int slotLength = accessor.getFieldSlotsLength();
+
+ ByteBuffer buf = accessor.getBuffer();
+
+ int hash = Marshal.getInt(buf.array(), startOffset + fieldOffset + slotLength);
+ if (hash < 0) {
+ hash = -(hash + 1);
+ }
+
+ return hash % nParts;
+ }
+ };
+ }
+
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/KmerPointable.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/KmerPointable.java
new file mode 100644
index 0000000..0457de9
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/KmerPointable.java
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.data.primitive;
+
+import edu.uci.ics.genomix.hyracks.data.accessors.KmerHashPartitioncomputerFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.data.std.api.AbstractPointable;
+import edu.uci.ics.hyracks.data.std.api.IComparable;
+import edu.uci.ics.hyracks.data.std.api.IHashable;
+import edu.uci.ics.hyracks.data.std.api.INumeric;
+import edu.uci.ics.hyracks.data.std.api.IPointable;
+import edu.uci.ics.hyracks.data.std.api.IPointableFactory;
+
+public final class KmerPointable extends AbstractPointable implements IHashable, IComparable, INumeric {
+ public static final ITypeTraits TYPE_TRAITS = new ITypeTraits() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public boolean isFixedLength() {
+ return false;
+ }
+
+ @Override
+ public int getFixedLength() {
+ return -1;
+ }
+ };
+
+ public static final IPointableFactory FACTORY = new IPointableFactory() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public IPointable createPointable() {
+ return new KmerPointable();
+ }
+
+ @Override
+ public ITypeTraits getTypeTraits() {
+ return TYPE_TRAITS;
+ }
+ };
+
+ public static short getShortReverse(byte[] bytes, int offset, int length) {
+ if (length < 2) {
+ return (short) (bytes[offset] & 0xff);
+ }
+ return (short) (((bytes[offset + length - 1] & 0xff) << 8) + (bytes[offset + length - 2] & 0xff));
+ }
+
+ public static int getIntReverse(byte[] bytes, int offset, int length) {
+ int shortValue = getShortReverse(bytes, offset, length) & 0xffff;
+
+ if (length < 3) {
+ return shortValue;
+ }
+ if (length == 3) {
+ return (((bytes[offset + 2] & 0xff) << 16) + ((bytes[offset + 1] & 0xff) << 8) + ((bytes[offset] & 0xff)));
+ }
+ return ((bytes[offset + length - 1] & 0xff) << 24) + ((bytes[offset + length - 2] & 0xff) << 16)
+ + ((bytes[offset + length - 3] & 0xff) << 8) + ((bytes[offset + length - 4] & 0xff) << 0);
+ }
+
+ public static long getLongReverse(byte[] bytes, int offset, int length) {
+ if (length < 8) {
+ return ((long) getIntReverse(bytes, offset, length)) & 0x0ffffffffL;
+ }
+ return (((long) (bytes[offset + length - 1] & 0xff)) << 56)
+ + (((long) (bytes[offset + length - 2] & 0xff)) << 48)
+ + (((long) (bytes[offset + length - 3] & 0xff)) << 40)
+ + (((long) (bytes[offset + length - 4] & 0xff)) << 32)
+ + (((long) (bytes[offset + length - 5] & 0xff)) << 24)
+ + (((long) (bytes[offset + length - 6] & 0xff)) << 16)
+ + (((long) (bytes[offset + length - 7] & 0xff)) << 8) + (((long) (bytes[offset + length - 8] & 0xff)));
+ }
+
+ @Override
+ public int compareTo(IPointable pointer) {
+ return compareTo(pointer.getByteArray(), pointer.getStartOffset(), pointer.getLength());
+ }
+
+ @Override
+ public int compareTo(byte[] bytes, int offset, int length) {
+
+ if (this.length != length) {
+ return this.length - length;
+ }
+ for (int i = length - 1; i >= 0; i--) {
+ int cmp = (this.bytes[this.start + i] & 0xff) - (bytes[offset + i] & 0xff);
+ if (cmp != 0) {
+ return cmp;
+ }
+ }
+
+ return 0;
+ }
+
+ @Override
+ public int hash() {
+ int hash = KmerHashPartitioncomputerFactory.hashBytes(bytes, start, length);
+ return hash;
+ }
+
+ @Override
+ public byte byteValue() {
+ return bytes[start + length - 1];
+ }
+
+ @Override
+ public short shortValue() {
+ return getShortReverse(bytes, start, length);
+ }
+
+ @Override
+ public int intValue() {
+ return getIntReverse(bytes, start, length);
+ }
+
+ @Override
+ public long longValue() {
+ return getLongReverse(bytes, start, length);
+ }
+
+ @Override
+ public float floatValue() {
+ return Float.intBitsToFloat(intValue());
+ }
+
+ @Override
+ public double doubleValue() {
+ return Double.longBitsToDouble(longValue());
+ }
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/AssembleKeyIntoNodeOperator.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/AssembleKeyIntoNodeOperator.java
new file mode 100644
index 0000000..f245c7a
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/AssembleKeyIntoNodeOperator.java
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.newgraph.dataflow;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.NodeWritable;
+
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
+import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.IOperatorDescriptorRegistry;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputUnaryOutputOperatorNodePushable;
+
+public class AssembleKeyIntoNodeOperator extends AbstractSingleActivityOperatorDescriptor {
+
+ public AssembleKeyIntoNodeOperator(IOperatorDescriptorRegistry spec, RecordDescriptor outRecDesc, int kmerSize) {
+ super(spec, 1, 1);
+ recordDescriptors[0] = outRecDesc;
+ this.kmerSize = kmerSize;
+ KmerBytesWritable.setGlobalKmerLength(this.kmerSize);
+ }
+
+ private static final long serialVersionUID = 1L;
+ private final int kmerSize;
+
+ public static final int InputKmerField = 0;
+ public static final int InputtempNodeField = 1;
+ public static final int OutputNodeField = 0;
+
+ public static final RecordDescriptor nodeOutputRec = new RecordDescriptor(new ISerializerDeserializer[1]);
+
+ public class MapReadToNodePushable extends AbstractUnaryInputUnaryOutputOperatorNodePushable {
+ public static final int INT_LENGTH = 4;
+ private final IHyracksTaskContext ctx;
+ private final RecordDescriptor inputRecDesc;
+ private final RecordDescriptor outputRecDesc;
+
+ private FrameTupleAccessor accessor;
+ private ByteBuffer writeBuffer;
+ private ArrayTupleBuilder builder;
+ private FrameTupleAppender appender;
+
+ NodeWritable readNode;
+ KmerBytesWritable readKmer;
+
+ public MapReadToNodePushable(IHyracksTaskContext ctx, RecordDescriptor inputRecDesc,
+ RecordDescriptor outputRecDesc) {
+ this.ctx = ctx;
+ this.inputRecDesc = inputRecDesc;
+ this.outputRecDesc = outputRecDesc;
+
+ readNode = new NodeWritable();
+ readKmer = new KmerBytesWritable();
+ }
+
+ @Override
+ public void open() throws HyracksDataException {
+ accessor = new FrameTupleAccessor(ctx.getFrameSize(), inputRecDesc);
+ writeBuffer = ctx.allocateFrame();
+ builder = new ArrayTupleBuilder(outputRecDesc.getFieldCount());
+ appender = new FrameTupleAppender(ctx.getFrameSize());
+ appender.reset(writeBuffer, true);
+ writer.open();
+ }
+
+ @Override
+ public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+ accessor.reset(buffer);
+ int tupleCount = accessor.getTupleCount();
+ for (int i = 0; i < tupleCount; i++) {
+ generateNodeFromKmer(i);
+ }
+ }
+
+ private void generateNodeFromKmer(int tIndex) throws HyracksDataException {
+ int offsetPoslist = accessor.getTupleStartOffset(tIndex) + accessor.getFieldSlotsLength();
+ setKmer(readKmer, offsetPoslist + accessor.getFieldStartOffset(tIndex, InputKmerField));
+ readNode.reset();
+ setNode(readNode, offsetPoslist + accessor.getFieldStartOffset(tIndex, InputtempNodeField));
+ readNode.getKmer().setAsCopy(readKmer);
+ outputNode(readNode);
+ }
+
+
+ private void setKmer(KmerBytesWritable kmer, int offset) {
+ ByteBuffer buffer = accessor.getBuffer();
+ kmer.setAsCopy(buffer.array(), offset);
+ }
+
+ private void setNode(NodeWritable node, int offset) {
+ ByteBuffer buffer = accessor.getBuffer();
+ node.setAsCopy(buffer.array(), offset);
+ }
+
+
+ private void outputNode(NodeWritable node) throws HyracksDataException {
+
+ try {
+ builder.reset();
+ builder.addField(node.marshalToByteArray(), 0, node.getSerializedLength());
+
+ if (!appender.append(builder.getFieldEndOffsets(), builder.getByteArray(), 0, builder.getSize())) {
+ FrameUtils.flushFrame(writeBuffer, writer);
+ appender.reset(writeBuffer, true);
+ if (!appender.append(builder.getFieldEndOffsets(), builder.getByteArray(), 0, builder.getSize())) {
+ throw new IllegalStateException("Failed to append tuplebuilder to frame");
+ }
+ }
+ } catch (IOException e) {
+ throw new IllegalStateException("Failed to Add a field to the tupleBuilder.");
+ }
+ }
+
+ @Override
+ public void fail() throws HyracksDataException {
+ writer.fail();
+ }
+
+ @Override
+ public void close() throws HyracksDataException {
+ if (appender.getTupleCount() > 0) {
+ FrameUtils.flushFrame(writeBuffer, writer);
+ }
+ writer.close();
+ }
+
+ }
+
+ @Override
+ public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
+ IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
+ return new MapReadToNodePushable(ctx, recordDescProvider.getInputRecordDescriptor(getActivityId(), 0),
+ recordDescriptors[0]);
+ }
+
+}
+
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/ConnectorPolicyAssignmentPolicy.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/ConnectorPolicyAssignmentPolicy.java
new file mode 100644
index 0000000..6919e76
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/ConnectorPolicyAssignmentPolicy.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.newgraph.dataflow;
+
+import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicy;
+import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicyAssignmentPolicy;
+import edu.uci.ics.hyracks.api.dataflow.connectors.PipeliningConnectorPolicy;
+import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedPipeliningConnectorPolicy;
+import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor;
+
+/**
+ * used by precluster groupby
+ */
+public class ConnectorPolicyAssignmentPolicy implements IConnectorPolicyAssignmentPolicy {
+ private static final long serialVersionUID = 1L;
+ private IConnectorPolicy senderSideMaterializePolicy = new SendSideMaterializedPipeliningConnectorPolicy();
+ private IConnectorPolicy pipeliningPolicy = new PipeliningConnectorPolicy();
+
+ @Override
+ public IConnectorPolicy getConnectorPolicyAssignment(IConnectorDescriptor c, int nProducers, int nConsumers,
+ int[] fanouts) {
+ if (c instanceof MToNPartitioningMergingConnectorDescriptor) {
+ return senderSideMaterializePolicy;
+ } else {
+ return pipeliningPolicy;
+ }
+ }
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/ReadsKeyValueParserFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/ReadsKeyValueParserFactory.java
new file mode 100644
index 0000000..572fb96
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/ReadsKeyValueParserFactory.java
@@ -0,0 +1,228 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.newgraph.dataflow;
+
+import java.nio.ByteBuffer;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.genomix.type.PositionListWritable;
+import edu.uci.ics.genomix.type.PositionWritable;
+
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
+import edu.uci.ics.hyracks.hdfs.api.IKeyValueParser;
+import edu.uci.ics.hyracks.hdfs.api.IKeyValueParserFactory;
+
+public class ReadsKeyValueParserFactory implements IKeyValueParserFactory<LongWritable, Text> {
+ private static final long serialVersionUID = 1L;
+ private static final Log LOG = LogFactory.getLog(ReadsKeyValueParserFactory.class);
+
+ public static final int OutputKmerField = 0;
+ public static final int OutputNodeField = 1;
+
+ private final int readLength;
+ private final int kmerSize;
+
+ public static final RecordDescriptor readKmerOutputRec = new RecordDescriptor(new ISerializerDeserializer[] { null,
+ null });
+
+ public ReadsKeyValueParserFactory(int readlength, int k) {
+ this.readLength = readlength;
+ this.kmerSize = k;
+ }
+
+ public enum KmerDir {
+ FORWARD,
+ REVERSE,
+ }
+
+ @Override
+ public IKeyValueParser<LongWritable, Text> createKeyValueParser(final IHyracksTaskContext ctx) {
+ final ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(2);
+ final ByteBuffer outputBuffer = ctx.allocateFrame();
+ final FrameTupleAppender outputAppender = new FrameTupleAppender(ctx.getFrameSize());
+ outputAppender.reset(outputBuffer, true);
+ KmerBytesWritable.setGlobalKmerLength(kmerSize);
+ return new IKeyValueParser<LongWritable, Text>() {
+
+ private PositionWritable nodeId = new PositionWritable();
+ private PositionListWritable nodeIdList = new PositionListWritable();
+ private NodeWritable curNode = new NodeWritable();
+ private NodeWritable nextNode = new NodeWritable();
+
+ private KmerBytesWritable curForwardKmer = new KmerBytesWritable();
+ private KmerBytesWritable curReverseKmer = new KmerBytesWritable();
+ private KmerBytesWritable nextForwardKmer = new KmerBytesWritable();
+ private KmerBytesWritable nextReverseKmer = new KmerBytesWritable();
+
+ private KmerDir curKmerDir = KmerDir.FORWARD;
+ private KmerDir nextKmerDir = KmerDir.FORWARD;
+
+ byte mateId = (byte) 0;
+
+ @Override
+ public void parse(LongWritable key, Text value, IFrameWriter writer) throws HyracksDataException {
+ String[] geneLine = value.toString().split("\\t"); // Read the Real Gene Line
+ if (geneLine.length != 2) {
+ return;
+ }
+ int readID = 0;
+ try {
+ readID = Integer.parseInt(geneLine[0]);
+ } catch (NumberFormatException e) {
+ LOG.warn("Invalid data ");
+ return;
+ }
+
+ Pattern genePattern = Pattern.compile("[AGCT]+");
+ Matcher geneMatcher = genePattern.matcher(geneLine[1]);
+ boolean isValid = geneMatcher.matches();
+ if (isValid) {
+ if (geneLine[1].length() != readLength) {
+ LOG.warn("Invalid readlength at: " + readID);
+ return;
+ }
+ SplitReads(readID, geneLine[1].getBytes(), writer);
+ }
+ }
+
+ private void SplitReads(int readID, byte[] array, IFrameWriter writer) {
+ /*first kmer*/
+ if (kmerSize >= array.length) {
+ return;
+ }
+ curNode.reset();
+ nextNode.reset();
+ curForwardKmer.setByRead(array, 0);
+ curReverseKmer.setByReadReverse(array, 0);
+ curKmerDir = curForwardKmer.compareTo(curReverseKmer) <= 0 ? KmerDir.FORWARD : KmerDir.REVERSE;
+ nextForwardKmer.setAsCopy(curForwardKmer);
+ nextKmerDir = setNextKmer(nextForwardKmer, nextReverseKmer, array[kmerSize]);
+ setNodeBasicInfo(curNode, mateId, readID, 0, 1);
+ setNodeBasicInfo(nextNode, mateId, readID, 0, 1);
+ setEdgeListForCurAndNextKmer(curKmerDir, curNode, nextKmerDir, nextNode);
+ writeToFrame(curForwardKmer, curReverseKmer, curKmerDir, curNode, writer);
+
+ /*middle kmer*/
+ int i = kmerSize + 1;
+ for (; i < array.length; i++) {
+ curForwardKmer.setAsCopy(nextForwardKmer);
+ curReverseKmer.setAsCopy(nextReverseKmer);
+ curKmerDir = nextKmerDir;
+ curNode.set(nextNode);
+ nextNode.reset();
+ nextKmerDir = setNextKmer(nextForwardKmer, nextReverseKmer, array[i]);
+ setNodeBasicInfo(nextNode, mateId, readID, 0, 1);
+ setEdgeListForCurAndNextKmer(curKmerDir, curNode, nextKmerDir, nextNode);
+ writeToFrame(curForwardKmer, curReverseKmer, curKmerDir, curNode, writer);
+ }
+
+ /*last kmer*/
+ writeToFrame(nextForwardKmer, nextReverseKmer, nextKmerDir, nextNode, writer);
+ }
+
+ public void setNodeBasicInfo(NodeWritable node, byte mateId, long readID, int posId, int iniCovergage) {
+ nodeId.set(mateId, readID, posId);
+ nodeIdList.reset();
+ nodeIdList.append(nodeId);
+ node.setNodeIdList(nodeIdList);
+ node.setAvgCoverage(iniCovergage);
+ }
+
+ public KmerDir setNextKmer(KmerBytesWritable forwardKmer, KmerBytesWritable ReverseKmer,
+ byte nextChar) {
+ forwardKmer.shiftKmerWithNextChar(nextChar);
+ ReverseKmer.setByReadReverse(forwardKmer.toString().getBytes(), forwardKmer.getOffset());
+ return forwardKmer.compareTo(ReverseKmer) <= 0 ? KmerDir.FORWARD : KmerDir.REVERSE;
+ }
+
+ public void writeToFrame(KmerBytesWritable forwardKmer, KmerBytesWritable reverseKmer, KmerDir curKmerDir,
+ NodeWritable node, IFrameWriter writer) {
+ switch (curKmerDir) {
+ case FORWARD:
+ InsertToFrame(forwardKmer, node, writer);
+ break;
+ case REVERSE:
+ InsertToFrame(reverseKmer, node, writer);
+ break;
+ }
+ }
+
+ public void setEdgeListForCurAndNextKmer(KmerDir curKmerDir, NodeWritable curNode, KmerDir nextKmerDir,
+ NodeWritable nextNode) {
+ if (curKmerDir == KmerDir.FORWARD && nextKmerDir == KmerDir.FORWARD) {
+ curNode.getFFList().append(kmerSize, nextForwardKmer);
+ nextNode.getRRList().append(kmerSize, curForwardKmer);
+ }
+ if (curKmerDir == KmerDir.FORWARD && nextKmerDir == KmerDir.REVERSE) {
+ curNode.getFRList().append(kmerSize, nextReverseKmer);
+ nextNode.getFRList().append(kmerSize, curForwardKmer);
+ }
+ if (curKmerDir == KmerDir.REVERSE && nextKmerDir == KmerDir.FORWARD) {
+ curNode.getRFList().append(kmerSize, nextForwardKmer);
+ nextNode.getRFList().append(kmerSize, curReverseKmer);
+ }
+ if (curKmerDir == KmerDir.REVERSE && nextKmerDir == KmerDir.REVERSE) {
+ curNode.getRRList().append(kmerSize, nextReverseKmer);
+ nextNode.getFFList().append(kmerSize, curReverseKmer);
+ }
+ }
+
+ private void InsertToFrame(KmerBytesWritable kmer, NodeWritable node, IFrameWriter writer) {
+ try {
+ tupleBuilder.reset();
+ tupleBuilder.addField(kmer.getBytes(), kmer.getOffset(), kmer.getLength());
+ tupleBuilder.addField(node.marshalToByteArray(), 0, node.getSerializedLength());
+
+ if (!outputAppender.append(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray(), 0,
+ tupleBuilder.getSize())) {
+ FrameUtils.flushFrame(outputBuffer, writer);
+ outputAppender.reset(outputBuffer, true);
+ if (!outputAppender.append(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray(), 0,
+ tupleBuilder.getSize())) {
+ throw new IllegalStateException(
+ "Failed to copy an record into a frame: the record kmerByteSize is too large.");
+ }
+ }
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ @Override
+ public void open(IFrameWriter writer) throws HyracksDataException {
+ }
+
+ @Override
+ public void close(IFrameWriter writer) throws HyracksDataException {
+ FrameUtils.flushFrame(outputBuffer, writer);
+ }
+ };
+ }
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/aggregators/AggregateKmerAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/aggregators/AggregateKmerAggregateFactory.java
new file mode 100644
index 0000000..94c8ec1
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/aggregators/AggregateKmerAggregateFactory.java
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.newgraph.dataflow.aggregators;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
+
+public class AggregateKmerAggregateFactory implements IAggregatorDescriptorFactory {
+
+ /**
+ * local Aggregate
+ */
+ private static final long serialVersionUID = 1L;
+ private final int kmerSize;
+
+ public AggregateKmerAggregateFactory(int k) {
+ this.kmerSize = k;
+ }
+
+ @Override
+ public IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor,
+ RecordDescriptor outRecordDescriptor, int[] keyFields, int[] keyFieldsInPartialResults)
+ throws HyracksDataException {
+ KmerBytesWritable.setGlobalKmerLength(kmerSize);
+ return new IAggregatorDescriptor() {
+
+ private NodeWritable readNode = new NodeWritable();
+
+ protected int getOffSet(IFrameTupleAccessor accessor, int tIndex, int fieldId) {
+ int tupleOffset = accessor.getTupleStartOffset(tIndex);
+ int fieldStart = accessor.getFieldStartOffset(tIndex, fieldId);
+ int offset = tupleOffset + fieldStart + accessor.getFieldSlotsLength();
+ return offset;
+ }
+
+ @Override
+ public void reset() {
+ }
+
+ @Override
+ public void close() {
+
+ }
+
+ @Override
+ public AggregateState createAggregateStates() {
+ return new AggregateState(new NodeWritable());
+ }
+
+ @Override
+ public void init(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
+ AggregateState state) throws HyracksDataException {
+ NodeWritable localUniNode = (NodeWritable) state.state;
+ localUniNode.reset();
+ readNode.setAsReference(accessor.getBuffer().array(), getOffSet(accessor, tIndex, 1));
+ localUniNode.getNodeIdList().appendList(readNode.getNodeIdList());
+ localUniNode.getFFList().appendList(readNode.getFFList());
+ localUniNode.getFRList().appendList(readNode.getFRList());
+ localUniNode.getRFList().appendList(readNode.getRFList());
+ localUniNode.getRRList().appendList(readNode.getRRList());
+ localUniNode.addCoverage(readNode);
+ // make an empty field
+// tupleBuilder.addFieldEndOffset();// mark question?
+ }
+
+ @Override
+ public void aggregate(IFrameTupleAccessor accessor, int tIndex, IFrameTupleAccessor stateAccessor,
+ int stateTupleIndex, AggregateState state) throws HyracksDataException {
+ NodeWritable localUniNode = (NodeWritable) state.state;
+ readNode.setAsReference(accessor.getBuffer().array(), getOffSet(accessor, tIndex, 1));
+ localUniNode.getNodeIdList().appendList(readNode.getNodeIdList());
+ localUniNode.getFFList().appendList(readNode.getFFList());
+ localUniNode.getFRList().appendList(readNode.getFRList());
+ localUniNode.getRFList().appendList(readNode.getRFList());
+ localUniNode.getRRList().appendList(readNode.getRRList());
+ localUniNode.addCoverage(readNode);
+ }
+
+ @Override
+ public void outputPartialResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
+ AggregateState state) throws HyracksDataException {
+ throw new IllegalStateException("partial result method should not be called");
+ }
+
+ @Override
+ public void outputFinalResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
+ AggregateState state) throws HyracksDataException {
+ DataOutput fieldOutput = tupleBuilder.getDataOutput();
+ NodeWritable localUniNode = (NodeWritable) state.state;
+ try {
+ fieldOutput.write(localUniNode.marshalToByteArray(), 0, localUniNode.getSerializedLength());
+ tupleBuilder.addFieldEndOffset();
+ } catch (IOException e) {
+ throw new HyracksDataException("I/O exception when writing aggregation to the output buffer.");
+ }
+ }
+
+ };
+ }
+
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/aggregators/MergeKmerAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/aggregators/MergeKmerAggregateFactory.java
new file mode 100644
index 0000000..f83bc4b
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/aggregators/MergeKmerAggregateFactory.java
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.newgraph.dataflow.aggregators;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
+
+public class MergeKmerAggregateFactory implements IAggregatorDescriptorFactory {
+ private static final long serialVersionUID = 1L;
+ private static final Log LOG = LogFactory.getLog(MergeKmerAggregateFactory.class);
+
+ private final int kmerSize;
+
+ public MergeKmerAggregateFactory(int k) {
+ this.kmerSize = k;
+ }
+
+ @Override
+ public IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor,
+ RecordDescriptor outRecordDescriptor, int[] keyFields, int[] keyFieldsInPartialResults)
+ throws HyracksDataException {
+ final int frameSize = ctx.getFrameSize();
+ KmerBytesWritable.setGlobalKmerLength(kmerSize);
+ return new IAggregatorDescriptor() {
+
+ private NodeWritable readNode = new NodeWritable();
+
+ protected int getOffSet(IFrameTupleAccessor accessor, int tIndex, int fieldId) {
+ int tupleOffset = accessor.getTupleStartOffset(tIndex);
+ int fieldStart = accessor.getFieldStartOffset(tIndex, fieldId);
+ int offset = tupleOffset + fieldStart + accessor.getFieldSlotsLength();
+ return offset;
+ }
+
+ @Override
+ public AggregateState createAggregateStates() {
+ return new AggregateState(new NodeWritable());
+ }
+
+ @Override
+ public void init(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
+ AggregateState state) throws HyracksDataException {
+ NodeWritable localUniNode = (NodeWritable) state.state;
+ localUniNode.reset();
+ readNode.setAsReference(accessor.getBuffer().array(), getOffSet(accessor, tIndex, 1));
+ localUniNode.getNodeIdList().unionUpdate(readNode.getNodeIdList());
+ localUniNode.getFFList().unionUpdate(readNode.getFFList());
+ localUniNode.getFRList().unionUpdate(readNode.getFRList());
+ localUniNode.getRFList().unionUpdate(readNode.getRFList());
+ localUniNode.getRRList().unionUpdate(readNode.getRRList());
+ localUniNode.addCoverage(readNode);
+ //make a fake feild to cheat caller
+ // tupleBuilder.addFieldEndOffset();
+ }
+
+ @Override
+ public void reset() {
+
+ }
+
+ @Override
+ public void aggregate(IFrameTupleAccessor accessor, int tIndex, IFrameTupleAccessor stateAccessor,
+ int stateTupleIndex, AggregateState state) throws HyracksDataException {
+ NodeWritable localUniNode = (NodeWritable) state.state;
+ readNode.setAsReference(accessor.getBuffer().array(), getOffSet(accessor, tIndex, 1));
+ localUniNode.getNodeIdList().unionUpdate(readNode.getNodeIdList());
+ localUniNode.getFFList().unionUpdate(readNode.getFFList());
+ localUniNode.getFRList().unionUpdate(readNode.getFRList());
+ localUniNode.getRFList().unionUpdate(readNode.getRFList());
+ localUniNode.getRRList().unionUpdate(readNode.getRRList());
+ localUniNode.addCoverage(readNode);
+ }
+
+ @Override
+ public void outputPartialResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
+ AggregateState state) throws HyracksDataException {
+ throw new IllegalStateException("partial result method should not be called");
+ }
+
+ @Override
+ public void outputFinalResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
+ AggregateState state) throws HyracksDataException {
+ DataOutput fieldOutput = tupleBuilder.getDataOutput();
+ NodeWritable localUniNode = (NodeWritable) state.state;
+ try {
+ if (localUniNode.getSerializedLength() > frameSize / 2) {
+ LOG.warn("MergeKmer: output data kmerByteSize is too big: " + localUniNode.getSerializedLength());
+ }
+ fieldOutput.write(localUniNode.marshalToByteArray(), 0, localUniNode.getSerializedLength());
+ tupleBuilder.addFieldEndOffset();
+
+ } catch (IOException e) {
+ throw new HyracksDataException("I/O exception when writing aggregation to the output buffer.");
+ }
+ }
+
+ @Override
+ public void close() {
+
+ }
+
+ };
+
+ }
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/driver/Driver.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/driver/Driver.java
new file mode 100644
index 0000000..4602ed2
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/driver/Driver.java
@@ -0,0 +1,153 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.newgraph.driver;
+
+import java.net.URL;
+import java.util.EnumSet;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.GenericOptionsParser;
+
+import edu.uci.ics.genomix.hyracks.newgraph.job.GenomixJobConf;
+import edu.uci.ics.genomix.hyracks.newgraph.job.JobGen;
+import edu.uci.ics.genomix.hyracks.newgraph.job.JobGenBrujinGraph;
+import edu.uci.ics.genomix.hyracks.newgraph.job.JobGenCheckReader;
+
+import edu.uci.ics.hyracks.api.client.HyracksConnection;
+import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
+import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.job.JobFlag;
+import edu.uci.ics.hyracks.api.job.JobId;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
+
+public class Driver {
+ public static enum Plan {
+ BUILD_DEBRUJIN_GRAPH,
+ CHECK_KMERREADER,
+ }
+
+ private static final String IS_PROFILING = "genomix.driver.profiling";
+ private static final String CPARTITION_PER_MACHINE = "genomix.driver.duplicate.num";
+ private static final Log LOG = LogFactory.getLog(Driver.class);
+ private JobGen jobGen;
+ private boolean profiling;
+
+ private int numPartitionPerMachine;
+
+ private IHyracksClientConnection hcc;
+ private Scheduler scheduler;
+
+ public Driver(String ipAddress, int port, int numPartitionPerMachine) throws HyracksException {
+ try {
+ hcc = new HyracksConnection(ipAddress, port);
+ scheduler = new Scheduler(hcc.getNodeControllerInfos());
+ } catch (Exception e) {
+ throw new HyracksException(e);
+ }
+ this.numPartitionPerMachine = numPartitionPerMachine;
+ }
+
+ public void runJob(GenomixJobConf job) throws HyracksException {
+ runJob(job, Plan.BUILD_DEBRUJIN_GRAPH, false);
+ }
+
+ public void runJob(GenomixJobConf job, Plan planChoice, boolean profiling) throws HyracksException {
+ /** add hadoop configurations */
+ URL hadoopCore = job.getClass().getClassLoader().getResource("core-site.xml");
+ job.addResource(hadoopCore);
+ URL hadoopMapRed = job.getClass().getClassLoader().getResource("mapred-site.xml");
+ job.addResource(hadoopMapRed);
+ URL hadoopHdfs = job.getClass().getClassLoader().getResource("hdfs-site.xml");
+ job.addResource(hadoopHdfs);
+
+ LOG.info("job started");
+ long start = System.currentTimeMillis();
+ long end = start;
+ long time = 0;
+
+ this.profiling = profiling;
+ try {
+ Map<String, NodeControllerInfo> ncMap = hcc.getNodeControllerInfos();
+ LOG.info("ncmap:" + ncMap.size() + " " + ncMap.keySet().toString());
+ switch (planChoice) {
+ case BUILD_DEBRUJIN_GRAPH:
+ default:
+ jobGen = new JobGenBrujinGraph(job, scheduler, ncMap, numPartitionPerMachine);
+ break;
+ case CHECK_KMERREADER:
+ jobGen = new JobGenCheckReader(job, scheduler, ncMap, numPartitionPerMachine);
+ break;
+ }
+
+ start = System.currentTimeMillis();
+ run(jobGen);
+ end = System.currentTimeMillis();
+ time = end - start;
+ LOG.info("result writing finished " + time + "ms");
+ LOG.info("job finished");
+ } catch (Exception e) {
+ throw new HyracksException(e);
+ }
+ }
+
+ private void run(JobGen jobGen) throws Exception {
+ try {
+ JobSpecification createJob = jobGen.generateJob();
+ execute(createJob);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
+ private void execute(JobSpecification job) throws Exception {
+ job.setUseConnectorPolicyForScheduling(false);
+ JobId jobId = hcc.startJob(job, profiling ? EnumSet.of(JobFlag.PROFILE_RUNTIME) : EnumSet.noneOf(JobFlag.class));
+ hcc.waitForCompletion(jobId);
+ }
+
+ public static void main(String[] args) throws Exception {
+ GenomixJobConf jobConf = new GenomixJobConf();
+ String[] otherArgs = new GenericOptionsParser(jobConf, args).getRemainingArgs();
+ if (otherArgs.length < 4) {
+ System.err.println("Need <serverIP> <port> <input> <output>");
+ System.exit(-1);
+ }
+ String ipAddress = otherArgs[0];
+ int port = Integer.parseInt(otherArgs[1]);
+ int numOfDuplicate = jobConf.getInt(CPARTITION_PER_MACHINE, 2);
+ boolean bProfiling = jobConf.getBoolean(IS_PROFILING, true);
+ // FileInputFormat.setInputPaths(job, otherArgs[2]);
+ {
+ @SuppressWarnings("deprecation")
+ Path path = new Path(jobConf.getWorkingDirectory(), otherArgs[2]);
+ jobConf.set("mapred.input.dir", path.toString());
+
+ @SuppressWarnings("deprecation")
+ Path outputDir = new Path(jobConf.getWorkingDirectory(), otherArgs[3]);
+ jobConf.set("mapred.output.dir", outputDir.toString());
+ }
+ // FileInputFormat.addInputPath(jobConf, new Path(otherArgs[2]));
+ // FileOutputFormat.setOutputPath(job, new Path(otherArgs[3]));
+ Driver driver = new Driver(ipAddress, port, numOfDuplicate);
+ driver.runJob(jobConf, Plan.BUILD_DEBRUJIN_GRAPH, bProfiling);
+ }
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/io/NodeSequenceWriterFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/io/NodeSequenceWriterFactory.java
new file mode 100644
index 0000000..1ca59d8
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/io/NodeSequenceWriterFactory.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.hyracks.newgraph.io;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
+import org.apache.hadoop.io.SequenceFile.Writer;
+import org.apache.hadoop.mapred.JobConf;
+import edu.uci.ics.genomix.hyracks.newgraph.job.GenomixJobConf;
+import edu.uci.ics.genomix.hyracks.newgraph.dataflow.AssembleKeyIntoNodeOperator;
+import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
+import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
+import edu.uci.ics.hyracks.hdfs.dataflow.ConfFactory;
+
+@SuppressWarnings("deprecation")
+public class NodeSequenceWriterFactory implements ITupleWriterFactory {
+
+ /**
+ * Write the node to Text
+ */
+ private static final long serialVersionUID = 1L;
+ private final int kmerSize;
+ private ConfFactory confFactory;
+
+ public static final int OutputNodeField = AssembleKeyIntoNodeOperator.OutputNodeField;
+
+ public NodeSequenceWriterFactory(JobConf conf) throws HyracksDataException {
+ this.confFactory = new ConfFactory(conf);
+ this.kmerSize = conf.getInt(GenomixJobConf.KMER_LENGTH, GenomixJobConf.DEFAULT_KMERLEN);
+ }
+
+ public class TupleWriter implements ITupleWriter {
+
+ public TupleWriter(ConfFactory confFactory) {
+ this.cf = confFactory;
+ }
+
+ ConfFactory cf;
+ Writer writer = null;
+ NodeWritable node = new NodeWritable();
+
+ @Override
+ public void open(DataOutput output) throws HyracksDataException {
+ try {
+ writer = SequenceFile.createWriter(cf.getConf(), (FSDataOutputStream) output, NodeWritable.class,
+ NullWritable.class, CompressionType.NONE, null);
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
+ node.setAsReference(tuple.getFieldData(OutputNodeField), tuple.getFieldStart(OutputNodeField));
+ try {
+ writer.append(node, NullWritable.get());
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void close(DataOutput output) throws HyracksDataException {
+ }
+
+ }
+
+ @Override
+ public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException {
+ KmerBytesWritable.setGlobalKmerLength(kmerSize);
+ return new TupleWriter(confFactory);
+ }
+
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/io/NodeTextWriterFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/io/NodeTextWriterFactory.java
new file mode 100644
index 0000000..fa6ae9b
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/io/NodeTextWriterFactory.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.hyracks.newgraph.io;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import edu.uci.ics.genomix.hyracks.newgraph.dataflow.AssembleKeyIntoNodeOperator;
+import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
+import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
+
+public class NodeTextWriterFactory implements ITupleWriterFactory {
+
+ /**
+ * Write the node to Text
+ */
+ private static final long serialVersionUID = 1L;
+ private final int kmerSize;
+ public static final int OutputNodeField = AssembleKeyIntoNodeOperator.OutputNodeField;
+
+ public NodeTextWriterFactory(int k) {
+ this.kmerSize = k;
+ }
+
+ @Override
+ public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException {
+ KmerBytesWritable.setGlobalKmerLength(kmerSize);
+ return new ITupleWriter() {
+ NodeWritable node = new NodeWritable();
+
+ @Override
+ public void open(DataOutput output) throws HyracksDataException {
+
+ }
+
+ @Override
+ public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
+ node.setAsReference(tuple.getFieldData(OutputNodeField), tuple.getFieldStart(OutputNodeField));
+ try {
+ output.write(node.toString().getBytes());
+ output.writeByte('\n');
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void close(DataOutput output) throws HyracksDataException {
+
+ }
+
+ };
+ }
+
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/GenomixJobConf.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/GenomixJobConf.java
new file mode 100644
index 0000000..b0edf77
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/GenomixJobConf.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.newgraph.job;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.JobConf;
+
+@SuppressWarnings("deprecation")
+public class GenomixJobConf extends JobConf {
+
+ public static final String JOB_NAME = "genomix";
+
+ /** Kmers length */
+ public static final String KMER_LENGTH = "genomix.kmerlen";
+ /** Read length */
+ public static final String READ_LENGTH = "genomix.readlen";
+ /** Frame Size */
+ public static final String FRAME_SIZE = "genomix.framesize";
+ /** Frame Limit, hyracks need */
+ public static final String FRAME_LIMIT = "genomix.framelimit";
+ /** Table Size, hyracks need */
+ public static final String TABLE_SIZE = "genomix.tablesize";
+ /** Groupby types */
+ public static final String GROUPBY_TYPE = "genomix.graph.groupby.type";
+ /** Graph outputformat */
+ public static final String OUTPUT_FORMAT = "genomix.graph.output";
+ /** Get reversed Kmer Sequence */
+ public static final String REVERSED_KMER = "genomix.kmer.reversed";
+
+ /** Configurations used by hybrid groupby function in graph build phrase */
+ public static final String GROUPBY_HYBRID_INPUTSIZE = "genomix.graph.groupby.hybrid.inputsize";
+ public static final String GROUPBY_HYBRID_INPUTKEYS = "genomix.graph.groupby.hybrid.inputkeys";
+ public static final String GROUPBY_HYBRID_RECORDSIZE_SINGLE = "genomix.graph.groupby.hybrid.recordsize.single";
+ public static final String GROUPBY_HYBRID_RECORDSIZE_CROSS = "genomix.graph.groupby.hybrid.recordsize.cross";
+ public static final String GROUPBY_HYBRID_HASHLEVEL = "genomix.graph.groupby.hybrid.hashlevel";
+
+ public static final int DEFAULT_KMERLEN = 21;
+ public static final int DEFAULT_READLEN = 124;
+ public static final int DEFAULT_FRAME_SIZE = 128 * 1024;
+ public static final int DEFAULT_FRAME_LIMIT = 4096;
+ public static final int DEFAULT_TABLE_SIZE = 10485767;
+ public static final long DEFAULT_GROUPBY_HYBRID_INPUTSIZE = 154000000L;
+ public static final long DEFAULT_GROUPBY_HYBRID_INPUTKEYS = 38500000L;
+ public static final int DEFAULT_GROUPBY_HYBRID_RECORDSIZE_SINGLE = 9;
+ public static final int DEFAULT_GROUPBY_HYBRID_HASHLEVEL = 1;
+ public static final int DEFAULT_GROUPBY_HYBRID_RECORDSIZE_CROSS = 13;
+
+ public static final boolean DEFAULT_REVERSED = true;
+
+ public static final String JOB_PLAN_GRAPHBUILD = "graphbuild";
+ public static final String JOB_PLAN_GRAPHSTAT = "graphstat";
+
+ public static final String GROUPBY_TYPE_HYBRID = "hybrid";
+ public static final String GROUPBY_TYPE_EXTERNAL = "external";
+ public static final String GROUPBY_TYPE_PRECLUSTER = "precluster";
+ public static final String OUTPUT_FORMAT_BINARY = "binary";
+ public static final String OUTPUT_FORMAT_TEXT = "text";
+
+ public GenomixJobConf() throws IOException {
+ super(new Configuration());
+ }
+
+ public GenomixJobConf(Configuration conf) throws IOException {
+ super(conf);
+ }
+
+ /**
+ * Set the kmer length
+ *
+ * @param the
+ * desired frame kmerByteSize
+ */
+ final public void setKmerLength(int kmerlength) {
+ setInt(KMER_LENGTH, kmerlength);
+ }
+
+ final public void setFrameSize(int frameSize) {
+ setInt(FRAME_SIZE, frameSize);
+ }
+
+ final public void setFrameLimit(int frameLimit) {
+ setInt(FRAME_LIMIT, frameLimit);
+ }
+
+ final public void setTableSize(int tableSize) {
+ setInt(TABLE_SIZE, tableSize);
+ }
+
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/JobGen.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/JobGen.java
new file mode 100644
index 0000000..9649566
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/JobGen.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.newgraph.job;
+
+import java.io.Serializable;
+import java.util.UUID;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.hdfs.dataflow.ConfFactory;
+
+public abstract class JobGen implements Serializable {
+
+ /**
+ * generate the jobId
+ */
+ private static final long serialVersionUID = 1L;
+ protected final ConfFactory confFactory;
+ protected String jobId = new UUID(System.currentTimeMillis(), System.nanoTime()).toString();
+
+ public JobGen(GenomixJobConf job) throws HyracksDataException {
+ this.confFactory = new ConfFactory(job);
+ }
+
+ public abstract JobSpecification generateJob() throws HyracksException;
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/JobGenBrujinGraph.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/JobGenBrujinGraph.java
new file mode 100644
index 0000000..6a5dcc4
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/JobGenBrujinGraph.java
@@ -0,0 +1,274 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.newgraph.job;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.genomix.hyracks.data.accessors.KmerHashPartitioncomputerFactory;
+import edu.uci.ics.genomix.hyracks.data.accessors.KmerNormarlizedComputerFactory;
+import edu.uci.ics.genomix.hyracks.data.primitive.KmerPointable;
+import edu.uci.ics.genomix.hyracks.newgraph.dataflow.ConnectorPolicyAssignmentPolicy;
+import edu.uci.ics.genomix.hyracks.newgraph.dataflow.ReadsKeyValueParserFactory;
+import edu.uci.ics.genomix.hyracks.newgraph.dataflow.AssembleKeyIntoNodeOperator;
+import edu.uci.ics.genomix.hyracks.newgraph.dataflow.aggregators.AggregateKmerAggregateFactory;
+import edu.uci.ics.genomix.hyracks.newgraph.dataflow.aggregators.MergeKmerAggregateFactory;
+import edu.uci.ics.genomix.hyracks.newgraph.io.NodeTextWriterFactory;
+
+import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
+import edu.uci.ics.hyracks.api.constraints.PartitionConstraintHelper;
+import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputerFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
+import edu.uci.ics.hyracks.data.std.api.IPointableFactory;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
+import edu.uci.ics.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
+import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
+import edu.uci.ics.hyracks.hdfs.dataflow.ConfFactory;
+import edu.uci.ics.hyracks.hdfs.dataflow.HDFSReadOperatorDescriptor;
+import edu.uci.ics.hyracks.hdfs.dataflow.HDFSWriteOperatorDescriptor;
+import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
+
+@SuppressWarnings("deprecation")
+public class JobGenBrujinGraph extends JobGen {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ public enum GroupbyType {
+ EXTERNAL,
+ PRECLUSTER,
+ HYBRIDHASH,
+ }
+
+ public enum OutputFormat {
+ TEXT,
+ BINARY,
+ }
+
+ protected ConfFactory hadoopJobConfFactory;
+ protected static final Log LOG = LogFactory.getLog(JobGenBrujinGraph.class);
+ protected String[] ncNodeNames;
+ protected String[] readSchedule;
+
+ protected int readLength;
+ protected int kmerSize;
+ protected int frameLimits;
+ protected int frameSize;
+ protected int tableSize;
+ protected GroupbyType groupbyType;
+ protected OutputFormat outputFormat;
+
+
+ protected void logDebug(String status) {
+ LOG.debug(status + " nc nodes:" + ncNodeNames.length);
+ }
+
+ public JobGenBrujinGraph(GenomixJobConf job, Scheduler scheduler, final Map<String, NodeControllerInfo> ncMap,
+ int numPartitionPerMachine) throws HyracksDataException {
+ super(job);
+ String[] nodes = new String[ncMap.size()];
+ ncMap.keySet().toArray(nodes);
+ ncNodeNames = new String[nodes.length * numPartitionPerMachine];
+ for (int i = 0; i < numPartitionPerMachine; i++) {
+ System.arraycopy(nodes, 0, ncNodeNames, i * nodes.length, nodes.length);
+ }
+ initJobConfiguration(scheduler);
+ }
+
+ private Object[] generateAggeragateDescriptorbyType(JobSpecification jobSpec, int[] keyFields,
+ IAggregatorDescriptorFactory aggregator, IAggregatorDescriptorFactory merger,
+ ITuplePartitionComputerFactory partition, INormalizedKeyComputerFactory normalizer,
+ IPointableFactory pointable, RecordDescriptor combineRed, RecordDescriptor finalRec)
+ throws HyracksDataException {
+
+ Object[] obj = new Object[3];
+
+ switch (groupbyType) {
+ case PRECLUSTER:
+ default:
+ obj[0] = new PreclusteredGroupOperatorDescriptor(jobSpec, keyFields,
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(pointable) }, aggregator,
+ combineRed);
+ obj[1] = new MToNPartitioningMergingConnectorDescriptor(jobSpec, partition, keyFields,
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(pointable) });
+ obj[2] = new PreclusteredGroupOperatorDescriptor(jobSpec, keyFields,
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(pointable) }, merger,
+ finalRec);
+ jobSpec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
+ break;
+ }
+ return obj;
+ }
+
+ public HDFSReadOperatorDescriptor createHDFSReader(JobSpecification jobSpec) throws HyracksDataException {
+ try {
+ InputSplit[] splits = hadoopJobConfFactory.getConf().getInputFormat()
+ .getSplits(hadoopJobConfFactory.getConf(), ncNodeNames.length);
+
+ return new HDFSReadOperatorDescriptor(jobSpec, ReadsKeyValueParserFactory.readKmerOutputRec,
+ hadoopJobConfFactory.getConf(), splits, readSchedule, new ReadsKeyValueParserFactory(readLength,
+ kmerSize));
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ public static void connectOperators(JobSpecification jobSpec, IOperatorDescriptor preOp, String[] preNodes,
+ IOperatorDescriptor nextOp, String[] nextNodes, IConnectorDescriptor conn) {
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, preOp, preNodes);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, nextOp, nextNodes);
+ jobSpec.connect(conn, preOp, 0, nextOp, 0);
+ }
+
+ public AbstractOperatorDescriptor generateGroupbyKmerJob(JobSpecification jobSpec,
+ AbstractOperatorDescriptor readOperator) throws HyracksDataException {
+ int[] keyFields = new int[] { 0 }; // the id of grouped key
+
+ ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(jobSpec, frameLimits, keyFields,
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(KmerPointable.FACTORY) },
+ ReadsKeyValueParserFactory.readKmerOutputRec);
+
+ connectOperators(jobSpec, readOperator, ncNodeNames, sorter, ncNodeNames, new OneToOneConnectorDescriptor(
+ jobSpec));
+
+ RecordDescriptor combineKmerOutputRec = new RecordDescriptor(new ISerializerDeserializer[] { null, null });
+ jobSpec.setFrameSize(frameSize);
+
+ Object[] objs = generateAggeragateDescriptorbyType(jobSpec, keyFields, new AggregateKmerAggregateFactory(kmerSize),
+ new MergeKmerAggregateFactory(kmerSize), new KmerHashPartitioncomputerFactory(),
+ new KmerNormarlizedComputerFactory(), KmerPointable.FACTORY, combineKmerOutputRec, combineKmerOutputRec);
+ AbstractOperatorDescriptor kmerLocalAggregator = (AbstractOperatorDescriptor) objs[0];
+ logDebug("LocalKmerGroupby Operator");
+ connectOperators(jobSpec, sorter, ncNodeNames, kmerLocalAggregator, ncNodeNames,
+ new OneToOneConnectorDescriptor(jobSpec));
+
+ logDebug("CrossKmerGroupby Operator");
+ IConnectorDescriptor kmerConnPartition = (IConnectorDescriptor) objs[1];
+ AbstractOperatorDescriptor kmerCrossAggregator = (AbstractOperatorDescriptor) objs[2];
+ connectOperators(jobSpec, kmerLocalAggregator, ncNodeNames, kmerCrossAggregator, ncNodeNames, kmerConnPartition);
+ return kmerCrossAggregator;
+ }
+
+ public AbstractOperatorDescriptor generateKmerToFinalNode(JobSpecification jobSpec,
+ AbstractOperatorDescriptor kmerCrossAggregator) {
+
+ AbstractOperatorDescriptor mapToFinalNode = new AssembleKeyIntoNodeOperator(jobSpec,
+ AssembleKeyIntoNodeOperator.nodeOutputRec, kmerSize);
+ connectOperators(jobSpec, kmerCrossAggregator, ncNodeNames, mapToFinalNode, ncNodeNames,
+ new OneToOneConnectorDescriptor(jobSpec));
+ return mapToFinalNode;
+ }
+
+ public AbstractOperatorDescriptor generateNodeWriterOpertator(JobSpecification jobSpec,
+ AbstractOperatorDescriptor mapEachReadToNode) throws HyracksException {
+ ITupleWriterFactory nodeWriter = null;
+ switch (outputFormat) {
+ case TEXT:
+ nodeWriter = new NodeTextWriterFactory(kmerSize);
+ break;
+ }
+ logDebug("WriteOperator");
+ // Output Node
+ HDFSWriteOperatorDescriptor writeNodeOperator = new HDFSWriteOperatorDescriptor(jobSpec,
+ hadoopJobConfFactory.getConf(), nodeWriter);
+ connectOperators(jobSpec, mapEachReadToNode, ncNodeNames, writeNodeOperator, ncNodeNames,
+ new OneToOneConnectorDescriptor(jobSpec));
+ return writeNodeOperator;
+ }
+
+ @Override
+ public JobSpecification generateJob() throws HyracksException {
+
+ JobSpecification jobSpec = new JobSpecification();
+ logDebug("ReadKmer Operator");
+
+ HDFSReadOperatorDescriptor readOperator = createHDFSReader(jobSpec);
+
+ logDebug("Group by Kmer");
+ AbstractOperatorDescriptor lastOperator = generateGroupbyKmerJob(jobSpec, readOperator);
+
+ logDebug("Generate final node");
+ lastOperator = generateKmerToFinalNode(jobSpec, lastOperator);
+
+ jobSpec.addRoot(lastOperator);
+
+ logDebug("Write node to result");
+ lastOperator = generateNodeWriterOpertator(jobSpec, lastOperator);
+
+ jobSpec.addRoot(lastOperator);
+ return jobSpec;
+ }
+
+ protected void initJobConfiguration(Scheduler scheduler) throws HyracksDataException {
+ Configuration conf = confFactory.getConf();
+ readLength = conf.getInt(GenomixJobConf.READ_LENGTH, GenomixJobConf.DEFAULT_READLEN);
+ kmerSize = conf.getInt(GenomixJobConf.KMER_LENGTH, GenomixJobConf.DEFAULT_KMERLEN);
+ if (kmerSize % 2 == 0) {
+ kmerSize--;
+ conf.setInt(GenomixJobConf.KMER_LENGTH, kmerSize);
+ }
+ frameLimits = conf.getInt(GenomixJobConf.FRAME_LIMIT, GenomixJobConf.DEFAULT_FRAME_LIMIT);
+ tableSize = conf.getInt(GenomixJobConf.TABLE_SIZE, GenomixJobConf.DEFAULT_TABLE_SIZE);
+ frameSize = conf.getInt(GenomixJobConf.FRAME_SIZE, GenomixJobConf.DEFAULT_FRAME_SIZE);
+
+ String type = conf.get(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
+ groupbyType = GroupbyType.PRECLUSTER;
+
+ String output = conf.get(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
+
+ if (output.equalsIgnoreCase("text")) {
+ outputFormat = OutputFormat.TEXT;
+ } else {
+ outputFormat = OutputFormat.BINARY;
+ }
+ try {
+ hadoopJobConfFactory = new ConfFactory(new JobConf(conf));
+ InputSplit[] splits = hadoopJobConfFactory.getConf().getInputFormat()
+ .getSplits(hadoopJobConfFactory.getConf(), ncNodeNames.length);
+ readSchedule = scheduler.getLocationConstraints(splits);
+ } catch (IOException ex) {
+ throw new HyracksDataException(ex);
+ }
+
+ LOG.info("Genomix Graph Build Configuration");
+ LOG.info("Kmer:" + kmerSize);
+ LOG.info("Groupby type:" + type);
+ LOG.info("Output format:" + output);
+ LOG.info("Frame limit" + frameLimits);
+ LOG.info("Frame kmerByteSize" + frameSize);
+ }
+
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/JobGenCheckReader.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/JobGenCheckReader.java
new file mode 100644
index 0000000..f512f43
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/JobGenCheckReader.java
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.hyracks.newgraph.job;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import edu.uci.ics.genomix.hyracks.newgraph.dataflow.ReadsKeyValueParserFactory;
+import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+
+import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
+import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
+import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
+import edu.uci.ics.hyracks.hdfs.dataflow.ConfFactory;
+import edu.uci.ics.hyracks.hdfs.dataflow.HDFSReadOperatorDescriptor;
+import edu.uci.ics.hyracks.hdfs.dataflow.HDFSWriteOperatorDescriptor;
+import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
+
+public class JobGenCheckReader extends JobGenBrujinGraph {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ public JobGenCheckReader(GenomixJobConf job, Scheduler scheduler, Map<String, NodeControllerInfo> ncMap,
+ int numPartitionPerMachine) throws HyracksDataException {
+ super(job, scheduler, ncMap, numPartitionPerMachine);
+ }
+
+ @Override
+ public JobSpecification generateJob() throws HyracksException {
+
+ JobSpecification jobSpec = new JobSpecification();
+ logDebug("ReadKmer Operator");
+ HDFSReadOperatorDescriptor readOperator = createHDFSReader(jobSpec);
+
+ logDebug("Write kmer to result");
+ generateRootByWriteKmerReader(jobSpec, readOperator);
+
+ return jobSpec;
+ }
+
+ public AbstractSingleActivityOperatorDescriptor generateRootByWriteKmerReader(JobSpecification jobSpec,
+ HDFSReadOperatorDescriptor readOperator) throws HyracksException {
+
+ HDFSWriteOperatorDescriptor writeKmerOperator = new HDFSWriteOperatorDescriptor(jobSpec,
+ hadoopJobConfFactory.getConf(), new ITupleWriterFactory() {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException {
+ KmerBytesWritable.setGlobalKmerLength(kmerSize);
+ return new ITupleWriter() {
+
+ private NodeWritable outputNode = new NodeWritable();
+ private KmerBytesWritable outputKmer = new KmerBytesWritable();
+
+ @Override
+ public void open(DataOutput output) throws HyracksDataException {
+ }
+
+ @Override
+ public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
+ try {
+ if (outputKmer.getLength() > tuple
+ .getFieldLength(ReadsKeyValueParserFactory.OutputKmerField)) {
+ throw new IllegalArgumentException("Not enough kmer bytes");
+ }
+ outputKmer.setAsReference(
+ tuple.getFieldData(ReadsKeyValueParserFactory.OutputKmerField),
+ tuple.getFieldStart(ReadsKeyValueParserFactory.OutputKmerField));
+ outputNode.setAsReference(
+ tuple.getFieldData(ReadsKeyValueParserFactory.OutputNodeField),
+ tuple.getFieldStart(ReadsKeyValueParserFactory.OutputNodeField));
+
+ output.write(outputKmer.toString().getBytes());
+ output.writeByte('\t');
+ output.write(outputNode.toString().getBytes());
+ output.writeByte('\n');
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void close(DataOutput output) throws HyracksDataException {
+
+ }
+
+ };
+ }
+
+ });
+ connectOperators(jobSpec, readOperator, ncNodeNames, writeKmerOperator, ncNodeNames,
+ new OneToOneConnectorDescriptor(jobSpec));
+ jobSpec.addRoot(writeKmerOperator);
+ return writeKmerOperator;
+ }
+
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/util/StatCountAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/util/StatCountAggregateFactory.java
new file mode 100644
index 0000000..26daa96
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/util/StatCountAggregateFactory.java
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.hyracks.util;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
+
+public class StatCountAggregateFactory implements IAggregatorDescriptorFactory {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ public class CountAggregator implements IAggregatorDescriptor {
+ private final int[] keyFields;
+
+ public CountAggregator(int[] keyFields) {
+ this.keyFields = keyFields;
+ }
+
+ @Override
+ public AggregateState createAggregateStates() {
+ return null;
+ }
+
+ @Override
+ public void init(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex, AggregateState state)
+ throws HyracksDataException {
+ int count = 1;
+ DataOutput fieldOutput = tupleBuilder.getDataOutput();
+ try {
+ fieldOutput.writeInt(count);
+ tupleBuilder.addFieldEndOffset();
+ } catch (IOException e) {
+ throw new HyracksDataException("I/O exception when initializing the aggregator.");
+ }
+ }
+
+ @Override
+ public void reset() {
+
+ }
+
+ @Override
+ public void aggregate(IFrameTupleAccessor accessor, int tIndex, IFrameTupleAccessor stateAccessor,
+ int stateTupleIndex, AggregateState state) throws HyracksDataException {
+ int count = 1;
+
+ int statetupleOffset = stateAccessor.getTupleStartOffset(stateTupleIndex);
+ int countfieldStart = stateAccessor.getFieldStartOffset(stateTupleIndex, keyFields.length);
+ int countoffset = statetupleOffset + stateAccessor.getFieldSlotsLength() + countfieldStart;
+
+ byte[] data = stateAccessor.getBuffer().array();
+ count += IntegerSerializerDeserializer.getInt(data, countoffset);
+ IntegerSerializerDeserializer.putInt(count, data, countoffset);
+ }
+
+ @Override
+ public void outputPartialResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
+ AggregateState state) throws HyracksDataException {
+ int count = getCount(accessor, tIndex);
+ DataOutput fieldOutput = tupleBuilder.getDataOutput();
+ try {
+ fieldOutput.writeInt(count);
+ tupleBuilder.addFieldEndOffset();
+ } catch (IOException e) {
+ throw new HyracksDataException("I/O exception when writing aggregation to the output buffer.");
+ }
+
+ }
+
+ protected int getCount(IFrameTupleAccessor accessor, int tIndex) {
+ int tupleOffset = accessor.getTupleStartOffset(tIndex);
+ int fieldStart = accessor.getFieldStartOffset(tIndex, keyFields.length);
+ int countoffset = tupleOffset + accessor.getFieldSlotsLength() + fieldStart;
+ byte[] data = accessor.getBuffer().array();
+
+ return IntegerSerializerDeserializer.getInt(data, countoffset);
+ }
+
+ @Override
+ public void outputFinalResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
+ AggregateState state) throws HyracksDataException {
+ outputPartialResult(tupleBuilder, accessor, tIndex, state);
+ }
+
+ @Override
+ public void close() {
+
+ }
+
+ }
+
+ @Override
+ public IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor,
+ RecordDescriptor outRecordDescriptor, int[] keyFields, int[] keyFieldsInPartialResults)
+ throws HyracksDataException {
+ return new CountAggregator(keyFields);
+ }
+
+}
diff --git a/genomix/genomix-hyracks/src/main/resources/conf/cluster.properties b/genomix/genomix-hyracks/src/main/resources/conf/cluster.properties
new file mode 100644
index 0000000..66251be
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/conf/cluster.properties
@@ -0,0 +1,41 @@
+#The CC port for Hyracks clients
+CC_CLIENTPORT=3099
+
+#The CC port for Hyracks cluster management
+CC_CLUSTERPORT=1099
+
+#The directory of hyracks binaries
+HYRACKS_HOME="../../../../hyracks"
+
+WORKPATH=""
+#The tmp directory for cc to install jars
+CCTMP_DIR=${WORKPATH}/tmp/t1
+
+#The tmp directory for nc to install jars
+NCTMP_DIR=${WORKPATH}/tmp/t2
+
+#The directory to put cc logs
+CCLOGS_DIR=$CCTMP_DIR/logs
+
+#The directory to put nc logs
+NCLOGS_DIR=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS="${WORKPATH}/tmp/t3"
+
+#The JAVA_HOME
+JAVA_HOME=$JAVA_HOME
+
+#HADOOP_HOME
+CLASSPATH="${HADOOP_HOME}:${CLASSPATH}:."
+
+#The frame size of the internal dataflow engine
+FRAME_SIZE=65536
+
+#CC JAVA_OPTS
+CCJAVA_OPTS="-Xrunjdwp:transport=dt_socket,address=7001,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+# Yourkit option: -agentpath:/grid/0/dev/vborkar/tools/yjp-10.0.4/bin/linux-x86-64/libyjpagent.so=port=20001"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS="-Xrunjdwp:transport=dt_socket,address=7002,server=y,suspend=n -Xmx10g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/genomix/genomix-hyracks/src/main/resources/conf/debugnc.properties b/genomix/genomix-hyracks/src/main/resources/conf/debugnc.properties
new file mode 100644
index 0000000..27afa26
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/conf/debugnc.properties
@@ -0,0 +1,12 @@
+#The tmp directory for nc to install jars
+NCTMP_DIR2=/tmp/t-1
+
+#The directory to put nc logs
+NCLOGS_DIR2=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS2="/tmp/t-2,/tmp/t-3"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS2="-Xdebug -Xrunjdwp:transport=dt_socket,address=7003,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/genomix/genomix-hyracks/src/main/resources/conf/master b/genomix/genomix-hyracks/src/main/resources/conf/master
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/conf/master
@@ -0,0 +1 @@
+localhost
diff --git a/genomix/genomix-hyracks/src/main/resources/conf/slaves b/genomix/genomix-hyracks/src/main/resources/conf/slaves
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/conf/slaves
@@ -0,0 +1 @@
+localhost
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/genomix b/genomix/genomix-hyracks/src/main/resources/scripts/genomix
new file mode 100644
index 0000000..239a46c
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/genomix
@@ -0,0 +1,113 @@
+#!/bin/sh
+# ----------------------------------------------------------------------------
+# Copyright 2001-2006 The Apache Software Foundation.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+#
+# Copyright (c) 2001-2006 The Apache Software Foundation. All rights
+# reserved.
+
+
+# resolve links - $0 may be a softlink
+PRG="$0"
+
+while [ -h "$PRG" ]; do
+ ls=`ls -ld "$PRG"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '/.*' > /dev/null; then
+ PRG="$link"
+ else
+ PRG=`dirname "$PRG"`/"$link"
+ fi
+done
+
+PRGDIR=`dirname "$PRG"`
+BASEDIR=`cd "$PRGDIR/.." >/dev/null; pwd`
+
+
+
+# OS specific support. $var _must_ be set to either true or false.
+cygwin=false;
+darwin=false;
+case "`uname`" in
+ CYGWIN*) cygwin=true ;;
+ Darwin*) darwin=true
+ if [ -z "$JAVA_VERSION" ] ; then
+ JAVA_VERSION="CurrentJDK"
+ else
+ echo "Using Java version: $JAVA_VERSION"
+ fi
+ if [ -z "$JAVA_HOME" ] ; then
+ JAVA_HOME=/System/Library/Frameworks/JavaVM.framework/Versions/${JAVA_VERSION}/Home
+ fi
+ ;;
+esac
+
+if [ -z "$JAVA_HOME" ] ; then
+ if [ -r /etc/gentoo-release ] ; then
+ JAVA_HOME=`java-config --jre-home`
+ fi
+fi
+
+# For Cygwin, ensure paths are in UNIX format before anything is touched
+if $cygwin ; then
+ [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
+ [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
+fi
+
+# If a specific java binary isn't specified search for the standard 'java' binary
+if [ -z "$JAVACMD" ] ; then
+ if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD="$JAVA_HOME/jre/sh/java"
+ else
+ JAVACMD="$JAVA_HOME/bin/java"
+ fi
+ else
+ JAVACMD=`which java`
+ fi
+fi
+
+if [ ! -x "$JAVACMD" ] ; then
+ echo "Error: JAVA_HOME is not defined correctly." 1>&2
+ echo " We cannot execute $JAVACMD" 1>&2
+ exit 1
+fi
+
+if [ -z "$REPO" ]
+then
+ REPO="$BASEDIR"/lib
+fi
+
+CLASSPATH=$CLASSPATH_PREFIX:"$BASEDIR"/etc:"$REPO"/hyracks-dataflow-std-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-api-0.2.3-SNAPSHOT.jar:"$REPO"/json-20090211.jar:"$REPO"/httpclient-4.1-alpha2.jar:"$REPO"/httpcore-4.1-beta1.jar:"$REPO"/commons-logging-1.1.1.jar:"$REPO"/args4j-2.0.12.jar:"$REPO"/commons-lang3-3.1.jar:"$REPO"/hyracks-dataflow-common-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-data-std-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-control-cc-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-control-common-0.2.3-SNAPSHOT.jar:"$REPO"/jetty-server-8.0.0.RC0.jar:"$REPO"/servlet-api-3.0.20100224.jar:"$REPO"/jetty-continuation-8.0.0.RC0.jar:"$REPO"/jetty-http-8.0.0.RC0.jar:"$REPO"/jetty-io-8.0.0.RC0.jar:"$REPO"/jetty-webapp-8.0.0.RC0.jar:"$REPO"/jetty-xml-8.0.0.RC0.jar:"$REPO"/jetty-util-8.0.0.RC0.jar:"$REPO"/jetty-servlet-8.0.0.RC0.jar:"$REPO"/jetty-security-8.0.0.RC0.jar:"$REPO"/wicket-core-1.5.2.jar:"$REPO"/wicket-util-1.5.2.jar:"$REPO"/wicket-request-1.5.2.jar:"$REPO"/slf4j-api-1.6.1.jar:"$REPO"/slf4j-jcl-1.6.3.jar:"$REPO"/hyracks-control-nc-0.2.3-SNAPSHOT.jar:"$REPO"/dcache-client-0.0.1.jar:"$REPO"/jetty-client-8.0.0.M0.jar:"$REPO"/hyracks-net-0.2.3-SNAPSHOT.jar:"$REPO"/commons-io-1.3.1.jar:"$REPO"/hyracks-ipc-0.2.3-SNAPSHOT.jar:"$REPO"/hadoop-core-0.20.2.jar:"$REPO"/commons-cli-1.2.jar:"$REPO"/xmlenc-0.52.jar:"$REPO"/commons-httpclient-3.0.1.jar:"$REPO"/commons-codec-1.3.jar:"$REPO"/commons-net-1.4.1.jar:"$REPO"/jetty-6.1.14.jar:"$REPO"/jetty-util-6.1.14.jar:"$REPO"/jasper-runtime-5.5.12.jar:"$REPO"/jasper-compiler-5.5.12.jar:"$REPO"/jsp-api-2.1-6.1.14.jar:"$REPO"/jsp-2.1-6.1.14.jar:"$REPO"/ant-1.6.5.jar:"$REPO"/commons-el-1.0.jar:"$REPO"/jets3t-0.7.1.jar:"$REPO"/servlet-api-2.5-6.1.14.jar:"$REPO"/kfs-0.3.jar:"$REPO"/hsqldb-1.8.0.10.jar:"$REPO"/oro-2.0.8.jar:"$REPO"/core-3.1.1.jar:"$REPO"/hadoop-test-0.20.2.jar:"$REPO"/ftplet-api-1.0.0.jar:"$REPO"/mina-core-2.0.0-M5.jar:"$REPO"/ftpserver-core-1.0.0.jar:"$REPO"/ftpserver-deprecated-1.0.0-M2.jar:"$REPO"/hyracks-hdfs-core-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-hdfs-0.20.2-0.2.3-SNAPSHOT.jar:"$REPO"/genomix-data-0.2.3-SNAPSHOT.jar:"$REPO"/genomix-hyracks-0.2.3-SNAPSHOT.jar
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin; then
+ [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
+ [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"`
+ [ -n "$HOME" ] && HOME=`cygpath --path --windows "$HOME"`
+ [ -n "$BASEDIR" ] && BASEDIR=`cygpath --path --windows "$BASEDIR"`
+ [ -n "$REPO" ] && REPO=`cygpath --path --windows "$REPO"`
+fi
+
+exec "$JAVACMD" $JAVA_OPTS \
+ -classpath "$CLASSPATH" \
+ -Dapp.name="genomix" \
+ -Dapp.pid="$$" \
+ -Dapp.repo="$REPO" \
+ -Dapp.home="$BASEDIR" \
+ -Dbasedir="$BASEDIR" \
+ edu.uci.ics.genomix.driver.Driver \
+ "$@"
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/genomix.bat b/genomix/genomix-hyracks/src/main/resources/scripts/genomix.bat
new file mode 100644
index 0000000..abcafaf
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/genomix.bat
@@ -0,0 +1,108 @@
+@REM ----------------------------------------------------------------------------
+@REM Copyright 2001-2006 The Apache Software Foundation.
+@REM
+@REM Licensed under the Apache License, Version 2.0 (the "License");
+@REM you may not use this file except in compliance with the License.
+@REM You may obtain a copy of the License at
+@REM
+@REM http://www.apache.org/licenses/LICENSE-2.0
+@REM
+@REM Unless required by applicable law or agreed to in writing, software
+@REM distributed under the License is distributed on an "AS IS" BASIS,
+@REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@REM See the License for the specific language governing permissions and
+@REM limitations under the License.
+@REM ----------------------------------------------------------------------------
+@REM
+@REM Copyright (c) 2001-2006 The Apache Software Foundation. All rights
+@REM reserved.
+
+@echo off
+
+set ERROR_CODE=0
+
+:init
+@REM Decide how to startup depending on the version of windows
+
+@REM -- Win98ME
+if NOT "%OS%"=="Windows_NT" goto Win9xArg
+
+@REM set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" @setlocal
+
+@REM -- 4NT shell
+if "%eval[2+2]" == "4" goto 4NTArgs
+
+@REM -- Regular WinNT shell
+set CMD_LINE_ARGS=%*
+goto WinNTGetScriptDir
+
+@REM The 4NT Shell from jp software
+:4NTArgs
+set CMD_LINE_ARGS=%$
+goto WinNTGetScriptDir
+
+:Win9xArg
+@REM Slurp the command line arguments. This loop allows for an unlimited number
+@REM of arguments (up to the command line limit, anyway).
+set CMD_LINE_ARGS=
+:Win9xApp
+if %1a==a goto Win9xGetScriptDir
+set CMD_LINE_ARGS=%CMD_LINE_ARGS% %1
+shift
+goto Win9xApp
+
+:Win9xGetScriptDir
+set SAVEDIR=%CD%
+%0\
+cd %0\..\..
+set BASEDIR=%CD%
+cd %SAVEDIR%
+set SAVE_DIR=
+goto repoSetup
+
+:WinNTGetScriptDir
+set BASEDIR=%~dp0\..
+
+:repoSetup
+
+
+if "%JAVACMD%"=="" set JAVACMD=java
+
+if "%REPO%"=="" set REPO=%BASEDIR%\lib
+
+set CLASSPATH="%BASEDIR%"\etc;"%REPO%"\hyracks-dataflow-std-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-api-0.2.3-SNAPSHOT.jar;"%REPO%"\json-20090211.jar;"%REPO%"\httpclient-4.1-alpha2.jar;"%REPO%"\httpcore-4.1-beta1.jar;"%REPO%"\commons-logging-1.1.1.jar;"%REPO%"\args4j-2.0.12.jar;"%REPO%"\commons-lang3-3.1.jar;"%REPO%"\hyracks-dataflow-common-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-data-std-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-control-cc-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-control-common-0.2.3-SNAPSHOT.jar;"%REPO%"\jetty-server-8.0.0.RC0.jar;"%REPO%"\servlet-api-3.0.20100224.jar;"%REPO%"\jetty-continuation-8.0.0.RC0.jar;"%REPO%"\jetty-http-8.0.0.RC0.jar;"%REPO%"\jetty-io-8.0.0.RC0.jar;"%REPO%"\jetty-webapp-8.0.0.RC0.jar;"%REPO%"\jetty-xml-8.0.0.RC0.jar;"%REPO%"\jetty-util-8.0.0.RC0.jar;"%REPO%"\jetty-servlet-8.0.0.RC0.jar;"%REPO%"\jetty-security-8.0.0.RC0.jar;"%REPO%"\wicket-core-1.5.2.jar;"%REPO%"\wicket-util-1.5.2.jar;"%REPO%"\wicket-request-1.5.2.jar;"%REPO%"\slf4j-api-1.6.1.jar;"%REPO%"\slf4j-jcl-1.6.3.jar;"%REPO%"\hyracks-control-nc-0.2.3-SNAPSHOT.jar;"%REPO%"\dcache-client-0.0.1.jar;"%REPO%"\jetty-client-8.0.0.M0.jar;"%REPO%"\hyracks-net-0.2.3-SNAPSHOT.jar;"%REPO%"\commons-io-1.3.1.jar;"%REPO%"\hyracks-ipc-0.2.3-SNAPSHOT.jar;"%REPO%"\hadoop-core-0.20.2.jar;"%REPO%"\commons-cli-1.2.jar;"%REPO%"\xmlenc-0.52.jar;"%REPO%"\commons-httpclient-3.0.1.jar;"%REPO%"\commons-codec-1.3.jar;"%REPO%"\commons-net-1.4.1.jar;"%REPO%"\jetty-6.1.14.jar;"%REPO%"\jetty-util-6.1.14.jar;"%REPO%"\jasper-runtime-5.5.12.jar;"%REPO%"\jasper-compiler-5.5.12.jar;"%REPO%"\jsp-api-2.1-6.1.14.jar;"%REPO%"\jsp-2.1-6.1.14.jar;"%REPO%"\ant-1.6.5.jar;"%REPO%"\commons-el-1.0.jar;"%REPO%"\jets3t-0.7.1.jar;"%REPO%"\servlet-api-2.5-6.1.14.jar;"%REPO%"\kfs-0.3.jar;"%REPO%"\hsqldb-1.8.0.10.jar;"%REPO%"\oro-2.0.8.jar;"%REPO%"\core-3.1.1.jar;"%REPO%"\hadoop-test-0.20.2.jar;"%REPO%"\ftplet-api-1.0.0.jar;"%REPO%"\mina-core-2.0.0-M5.jar;"%REPO%"\ftpserver-core-1.0.0.jar;"%REPO%"\ftpserver-deprecated-1.0.0-M2.jar;"%REPO%"\hyracks-hdfs-core-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-hdfs-0.20.2-0.2.3-SNAPSHOT.jar;"%REPO%"\genomix-data-0.2.3-SNAPSHOT.jar;"%REPO%"\genomix-hyracks-0.2.3-SNAPSHOT.jar
+goto endInit
+
+@REM Reaching here means variables are defined and arguments have been captured
+:endInit
+
+%JAVACMD% %JAVA_OPTS% -classpath %CLASSPATH_PREFIX%;%CLASSPATH% -Dapp.name="genomix" -Dapp.repo="%REPO%" -Dapp.home="%BASEDIR%" -Dbasedir="%BASEDIR%" edu.uci.ics.genomix.driver.Driver %CMD_LINE_ARGS%
+if ERRORLEVEL 1 goto error
+goto end
+
+:error
+if "%OS%"=="Windows_NT" @endlocal
+set ERROR_CODE=%ERRORLEVEL%
+
+:end
+@REM set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" goto endNT
+
+@REM For old DOS remove the set variables from ENV - we assume they were not set
+@REM before we started - at least we don't leave any baggage around
+set CMD_LINE_ARGS=
+goto postExec
+
+:endNT
+@REM If error code is set to 1 then the endlocal was done already in :error.
+if %ERROR_CODE% EQU 0 @endlocal
+
+
+:postExec
+
+if "%FORCE_EXIT_ON_ERROR%" == "on" (
+ if %ERROR_CODE% NEQ 0 exit %ERROR_CODE%
+)
+
+exit /B %ERROR_CODE%
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/getip.sh b/genomix/genomix-hyracks/src/main/resources/scripts/getip.sh
new file mode 100644
index 0000000..e0cdf73
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/getip.sh
@@ -0,0 +1,21 @@
+#get the OS
+OS_NAME=`uname -a|awk '{print $1}'`
+LINUX_OS='Linux'
+
+if [ $OS_NAME = $LINUX_OS ];
+then
+ #Get IP Address
+ IPADDR=`/sbin/ifconfig eth0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ if [ "$IPADDR" = "" ]
+ then
+ IPADDR=`/sbin/ifconfig lo | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ fi
+else
+ IPADDR=`/sbin/ifconfig en1 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ if [ "$IPADDR" = "" ]
+ then
+ IPADDR=`/sbin/ifconfig lo0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ fi
+
+fi
+echo $IPADDR
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/startAllNCs.sh b/genomix/genomix-hyracks/src/main/resources/scripts/startAllNCs.sh
new file mode 100644
index 0000000..28fcb84
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/startAllNCs.sh
@@ -0,0 +1,6 @@
+GENOMIX_PATH=`pwd`
+
+for i in `cat conf/slaves`
+do
+ ssh $i "cd ${GENOMIX_PATH}; export JAVA_HOME=${JAVA_HOME}; bin/startnc.sh"
+done
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/startCluster.sh b/genomix/genomix-hyracks/src/main/resources/scripts/startCluster.sh
new file mode 100755
index 0000000..843a6b1
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/startCluster.sh
@@ -0,0 +1,4 @@
+bin/startcc.sh
+sleep 5
+bin/startAllNCs.sh
+
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/startDebugNc.sh b/genomix/genomix-hyracks/src/main/resources/scripts/startDebugNc.sh
new file mode 100644
index 0000000..5f89bcc
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/startDebugNc.sh
@@ -0,0 +1,48 @@
+hostname
+
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CURRENT_PATH=`pwd`
+CCHOST=`ssh ${CCHOST_NAME} "cd ${CURRENT_PATH}; bin/getip.sh"`
+
+#Import cluster properties
+. conf/cluster.properties
+. conf/debugnc.properties
+
+#Clean up temp dir
+
+#rm -rf $NCTMP_DIR2
+mkdir -p $NCTMP_DIR2
+
+#Clean up log dir
+#rm -rf $NCLOGS_DIR2
+mkdir -p $NCLOGS_DIR2
+
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS2 | tr "," "\n")
+for io_dir in $io_dirs
+do
+ #rm -rf $io_dir
+ mkdir $io_dir
+done
+
+#Set JAVA_HOME
+export JAVA_HOME=$JAVA_HOME
+
+#Get OS
+IPADDR=`bin/getip.sh`
+
+#Get node ID
+NODEID=`hostname | cut -d '.' -f 1`
+
+#Set JAVA_OPTS
+export JAVA_OPTS=$NCJAVA_OPTS2
+
+GENOMIX_HOME=`pwd`
+
+#Enter the temp dir
+cd $NCTMP_DIR2
+
+#Launch hyracks nc
+${GENOMIX_HOME}/bin/genomixnc -cc-host $CCHOST -cc-port $CC_CLUSTERPORT -cluster-net-ip-address $IPADDR -data-ip-address $IPADDR -result-ip-address $IPADDR -node-id $NODEID -iodevices "${IO_DIRS}" &> $NCLOGS_DIR/$NODEID.log &
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/startcc.sh b/genomix/genomix-hyracks/src/main/resources/scripts/startcc.sh
new file mode 100644
index 0000000..67023c1
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/startcc.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+hostname
+
+#Import cluster properties
+. conf/cluster.properties
+
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CCHOST=`bin/getip.sh`
+
+#Remove the temp dir
+#rm -rf $CCTMP_DIR
+mkdir -p $CCTMP_DIR
+
+#Remove the logs dir
+#rm -rf $CCLOGS_DIR
+mkdir -p $CCLOGS_DIR
+
+#Export JAVA_HOME and JAVA_OPTS
+export JAVA_HOME=$JAVA_HOME
+export JAVA_OPTS=$CCJAVA_OPTS
+
+GENOMIX_HOME=`pwd`
+cd $CCTMP_DIR
+#Launch hyracks cc script
+${GENOMIX_HOME}/bin/genomixcc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -max-heartbeat-lapse-periods 999999 -default-max-job-attempts 0 -job-history-size 3 &> $CCLOGS_DIR/cc.log &
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/startnc.sh b/genomix/genomix-hyracks/src/main/resources/scripts/startnc.sh
new file mode 100644
index 0000000..bfcc1d4
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/startnc.sh
@@ -0,0 +1,48 @@
+hostname
+
+MY_NAME=`hostname`
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CURRENT_PATH=`pwd`
+CCHOST=`ssh ${CCHOST_NAME} "cd ${CURRENT_PATH}; bin/getip.sh"`
+
+#Import cluster properties
+. conf/cluster.properties
+
+#Clean up temp dir
+
+#rm -rf $NCTMP_DIR
+mkdir -p $NCTMP_DIR
+
+#Clean up log dir
+#rm -rf $NCLOGS_DIR
+mkdir -p $NCLOGS_DIR
+
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS | tr "," "\n")
+for io_dir in $io_dirs
+do
+ #rm -rf $io_dir
+ mkdir -p $io_dir
+done
+
+#Set JAVA_HOME
+export JAVA_HOME=$JAVA_HOME
+
+IPADDR=`bin/getip.sh`
+#echo $IPADDR
+
+#Get node ID
+NODEID=`hostname | cut -d '.' -f 1`
+
+#Set JAVA_OPTS
+export JAVA_OPTS=$NCJAVA_OPTS
+
+GENOMIX_HOME=`pwd`
+
+#Enter the temp dir
+cd $NCTMP_DIR
+
+#Launch hyracks nc
+${GENOMIX_HOME}/bin/genomixnc -cc-host $CCHOST -cc-port $CC_CLUSTERPORT -cluster-net-ip-address $IPADDR -data-ip-address $IPADDR -result-ip-address $IPADDR -node-id $NODEID -iodevices "${IO_DIRS}" &> $NCLOGS_DIR/$NODEID.log &
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/stopAllNCs.sh b/genomix/genomix-hyracks/src/main/resources/scripts/stopAllNCs.sh
new file mode 100644
index 0000000..66ed866
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/stopAllNCs.sh
@@ -0,0 +1,6 @@
+GENOMIX_PATH=`pwd`
+
+for i in `cat conf/slaves`
+do
+ ssh $i "cd ${GENOMIX_PATH}; bin/stopnc.sh"
+done
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/stopCluster.sh b/genomix/genomix-hyracks/src/main/resources/scripts/stopCluster.sh
new file mode 100644
index 0000000..4889934
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/stopCluster.sh
@@ -0,0 +1,3 @@
+bin/stopAllNCs.sh
+sleep 2
+bin/stopcc.sh
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/stopcc.sh b/genomix/genomix-hyracks/src/main/resources/scripts/stopcc.sh
new file mode 100644
index 0000000..6d1b2d2
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/stopcc.sh
@@ -0,0 +1,20 @@
+hostname
+. conf/cluster.properties
+
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=genomixcc'|awk '{print $2}'`
+
+if [ "$PID" == "" ]; then
+ PID=`ps -ef|grep ${USER}|grep java|grep 'hyracks'|awk '{print $2}'`
+fi
+
+if [ "$PID" == "" ]; then
+ USERID=`id | sed 's/^uid=//;s/(.*$//'`
+ PID=`ps -ef|grep ${USERID}|grep java|grep 'Dapp.name=genomixcc'|awk '{print $2}'`
+fi
+
+echo $PID
+kill -9 $PID
+
+#Clean up CC temp dir
+rm -rf $CCTMP_DIR/*
diff --git a/genomix/genomix-hyracks/src/main/resources/scripts/stopnc.sh b/genomix/genomix-hyracks/src/main/resources/scripts/stopnc.sh
new file mode 100644
index 0000000..092e232
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/resources/scripts/stopnc.sh
@@ -0,0 +1,31 @@
+hostname
+. conf/cluster.properties
+
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=genomixnc'|awk '{print $2}'`
+
+if [ "$PID" == "" ]; then
+ PID=`ps -ef|grep ${USER}|grep java|grep 'hyracks'|awk '{print $2}'`
+fi
+
+if [ "$PID" == "" ]; then
+ PID=`ps -ef|grep ${USER}|grep java|grep 'hyracks'|awk '{print $2}'`
+fi
+
+if [ "$PID" == "" ]; then
+ USERID=`id | sed 's/^uid=//;s/(.*$//'`
+ PID=`ps -ef|grep ${USERID}|grep java|grep 'Dapp.name=genomixnc'|awk '{print $2}'`
+fi
+
+echo $PID
+kill -9 $PID
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS | tr "," "\n")
+for io_dir in $io_dirs
+do
+ rm -rf $io_dir/*
+done
+
+#Clean up NC temp dir
+rm -rf $NCTMP_DIR/*
diff --git a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java
new file mode 100644
index 0000000..3a98573
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java
@@ -0,0 +1,148 @@
+package edu.uci.ics.genomix.hyracks.newgraph.test;
+
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+
+import junit.framework.Assert;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.hyracks.newgraph.job.GenomixJobConf;
+import edu.uci.ics.genomix.hyracks.newgraph.driver.Driver;
+import edu.uci.ics.genomix.hyracks.newgraph.driver.Driver.Plan;
+//import edu.uci.ics.genomix.hyracks.test.TestUtils;
+//import edu.uci.ics.genomix.oldtype.NodeWritable;
+
+@SuppressWarnings("deprecation")
+public class JobRun {
+ private static final int KmerSize = 3;
+ private static final int ReadLength = 6;
+ private static final String ACTUAL_RESULT_DIR = "actual";
+ private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
+
+ private static final String DATA_INPUT_PATH = "src/test/resources/data/lastesttest/Tips4.txt";
+ private static final String HDFS_INPUT_PATH = "/webmap";
+ private static final String HDFS_OUTPUT_PATH = "/webmap_result";
+
+ private static final String DUMPED_RESULT = ACTUAL_RESULT_DIR + HDFS_OUTPUT_PATH + "/Tips4.txt";
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";;
+ private MiniDFSCluster dfsCluster;
+
+ private JobConf conf = new JobConf();
+ private int numberOfNC = 2;
+ private int numPartitionPerMachine = 2;
+
+ private Driver driver;
+
+ @Test
+ public void TestAll() throws Exception {
+// TestReader();
+ TestGroupby();
+ }
+
+ public void TestReader() throws Exception {
+ cleanUpReEntry();
+ conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
+ driver.runJob(new GenomixJobConf(conf), Plan.CHECK_KMERREADER, true);
+ dumpResult();
+ }
+
+ public void TestGroupby() throws Exception {
+ conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
+ cleanUpReEntry();
+ conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
+ driver.runJob(new GenomixJobConf(conf), Plan.BUILD_DEBRUJIN_GRAPH, true);
+ dumpResult();
+ }
+
+ @Before
+ public void setUp() throws Exception {
+ cleanupStores();
+ edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.init();
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHDFS();
+
+ FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
+ FileOutputFormat.setOutputPath(conf, new Path(HDFS_OUTPUT_PATH));
+
+ conf.setInt(GenomixJobConf.KMER_LENGTH, KmerSize);
+ conf.setInt(GenomixJobConf.READ_LENGTH, ReadLength);
+ driver = new Driver(edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.CC_HOST,
+ edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT, numPartitionPerMachine);
+ }
+
+ private void cleanupStores() throws IOException {
+ FileUtils.forceMkdir(new File("teststore"));
+ FileUtils.forceMkdir(new File("build"));
+ FileUtils.cleanDirectory(new File("teststore"));
+ FileUtils.cleanDirectory(new File("build"));
+ }
+
+ private void startHDFS() throws IOException {
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
+ FileSystem dfs = FileSystem.get(conf);
+ Path src = new Path(DATA_INPUT_PATH);
+ Path dest = new Path(HDFS_INPUT_PATH);
+ dfs.mkdirs(dest);
+ // dfs.mkdirs(result);
+ dfs.copyFromLocalFile(src, dest);
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanUpReEntry() throws IOException {
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ if (lfs.exists(new Path(DUMPED_RESULT))) {
+ lfs.delete(new Path(DUMPED_RESULT), true);
+ }
+ FileSystem dfs = FileSystem.get(conf);
+ if (dfs.exists(new Path(HDFS_OUTPUT_PATH))) {
+ dfs.delete(new Path(HDFS_OUTPUT_PATH), true);
+ }
+ }
+
+ private void dumpResult() throws Exception {
+ String format = conf.get(GenomixJobConf.OUTPUT_FORMAT);
+ if (GenomixJobConf.OUTPUT_FORMAT_TEXT.equalsIgnoreCase(format)) {
+ FileUtil.copyMerge(FileSystem.get(conf), new Path(HDFS_OUTPUT_PATH),
+ FileSystem.getLocal(new Configuration()), new Path(DUMPED_RESULT), false, conf, null);
+ }
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.deinit();
+ cleanupHDFS();
+ }
+
+ private void cleanupHDFS() throws Exception {
+ dfsCluster.shutdown();
+ }
+}
diff --git a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/TestUtils.java b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/TestUtils.java
new file mode 100644
index 0000000..9fcbca3
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/TestUtils.java
@@ -0,0 +1,235 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.newgraph.test;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.ArrayList;
+import java.util.Collections;
+
+public class TestUtils {
+ /**
+ * Compare with the sorted expected file.
+ * The actual file may not be sorted;
+ *
+ * @param expectedFile
+ * @param actualFile
+ */
+ public static void compareWithSortedResult(File expectedFile, File actualFile) throws Exception {
+ BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
+ BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
+ ArrayList<String> actualLines = new ArrayList<String>();
+ String lineExpected, lineActual;
+ try {
+ while ((lineActual = readerActual.readLine()) != null) {
+ actualLines.add(lineActual);
+ }
+ Collections.sort(actualLines);
+ int num = 1;
+ for (String actualLine : actualLines) {
+ lineExpected = readerExpected.readLine();
+ if (lineExpected == null) {
+ throw new Exception("Actual result changed at line " + num + ":\n< " + actualLine + "\n> ");
+ }
+ if (!equalStrings(lineExpected, actualLine)) {
+ throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
+ + actualLine);
+ }
+ ++num;
+ }
+ lineExpected = readerExpected.readLine();
+ if (lineExpected != null) {
+ throw new Exception("Actual result changed at line " + num + ":\n< \n> " + lineExpected);
+ }
+ } finally {
+ readerActual.close();
+ readerExpected.close();
+ }
+ }
+
+ public static void compareWithUnSortedPosition(File expectedFile, File actualFile, int[] poslistField)
+ throws Exception {
+ BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
+ BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
+ ArrayList<String> actualLines = new ArrayList<String>();
+ String lineExpected, lineActual;
+ try {
+ while ((lineActual = readerActual.readLine()) != null) {
+ actualLines.add(lineActual);
+ }
+ Collections.sort(actualLines);
+ int num = 1;
+ for (String actualLine : actualLines) {
+ lineExpected = readerExpected.readLine();
+ if (lineExpected == null) {
+ throw new Exception("Actual result changed at line " + num + ":\n< " + actualLine + "\n> ");
+ }
+ if (!containStrings(lineExpected, actualLine, poslistField)) {
+ throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
+ + actualLine);
+ }
+ ++num;
+ }
+ lineExpected = readerExpected.readLine();
+ if (lineExpected != null) {
+ throw new Exception("Actual result changed at line " + num + ":\n< \n> " + lineExpected);
+ }
+ } finally {
+ readerActual.close();
+ readerExpected.close();
+ }
+ }
+
+ public static void compareWithResult(File expectedFile, File actualFile) throws Exception {
+ BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
+ BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
+ String lineExpected, lineActual;
+ int num = 1;
+ try {
+ while ((lineExpected = readerExpected.readLine()) != null) {
+ lineActual = readerActual.readLine();
+ // Assert.assertEquals(lineExpected, lineActual);
+ if (lineActual == null) {
+ throw new Exception("Actual result changed at line " + num + ":\n< " + lineExpected + "\n> ");
+ }
+ if (!equalStrings(lineExpected, lineActual)) {
+ throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
+ + lineActual);
+ }
+ ++num;
+ }
+ lineActual = readerActual.readLine();
+ if (lineActual != null) {
+ throw new Exception("Actual result changed at line " + num + ":\n< \n> " + lineActual);
+ }
+ } finally {
+ readerExpected.close();
+ readerActual.close();
+ }
+ }
+
+ private static boolean equalStrings(String s1, String s2) {
+ String[] rowsOne = s1.split("\n");
+ String[] rowsTwo = s2.split("\n");
+
+ if (rowsOne.length != rowsTwo.length)
+ return false;
+
+ for (int i = 0; i < rowsOne.length; i++) {
+ String row1 = rowsOne[i];
+ String row2 = rowsTwo[i];
+
+ if (row1.equals(row2))
+ continue;
+
+ String[] fields1 = row1.split(",");
+ String[] fields2 = row2.split(",");
+
+ for (int j = 0; j < fields1.length; j++) {
+ if (fields1[j].equals(fields2[j])) {
+ continue;
+ } else if (fields1[j].indexOf('.') < 0) {
+ return false;
+ } else {
+ fields1[j] = fields1[j].split("=")[1];
+ fields2[j] = fields2[j].split("=")[1];
+ Double double1 = Double.parseDouble(fields1[j]);
+ Double double2 = Double.parseDouble(fields2[j]);
+ float float1 = (float) double1.doubleValue();
+ float float2 = (float) double2.doubleValue();
+
+ if (Math.abs(float1 - float2) == 0)
+ continue;
+ else {
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+ }
+
+ private static boolean containStrings(String lineExpected, String actualLine, int[] poslistField) {
+ if (lineExpected.equals(actualLine)) {
+ return true;
+ }
+ String[] fieldsExp = lineExpected.split("\\\t");
+ String[] fieldsAct = actualLine.split("\\\t");
+ if (fieldsAct.length != fieldsExp.length) {
+ return false;
+ }
+ for (int i = 0; i < fieldsAct.length; i++) {
+ boolean cont = false;
+ for (int x : poslistField) {
+ if (i == x) {
+ cont = true;
+ break;
+ }
+ }
+ if (cont) {
+ continue;
+ }
+ if (!fieldsAct[i].equals(fieldsExp[i])) {
+ return false;
+ }
+ }
+
+ ArrayList<String> posExp = new ArrayList<String>();
+ ArrayList<String> posAct = new ArrayList<String>();
+
+ for (int x : poslistField) {
+ String valueExp = lineExpected.split("\\\t")[x];
+ for (int i = 1; i < valueExp.length() - 1;) {
+ if (valueExp.charAt(i) == '(') {
+ String str = "";
+ i++;
+ while (i < valueExp.length() - 1 && valueExp.charAt(i) != ')') {
+ str += valueExp.charAt(i);
+ i++;
+ }
+ posExp.add(str);
+ }
+ i++;
+ }
+ String valueAct = actualLine.split("\\\t")[x];
+ for (int i = 1; i < valueAct.length() - 1;) {
+ if (valueAct.charAt(i) == '(') {
+ String str = "";
+ i++;
+ while (i < valueAct.length() - 1 && valueAct.charAt(i) != ')') {
+ str += valueAct.charAt(i);
+ i++;
+ }
+ posAct.add(str);
+ }
+ i++;
+ }
+
+ if (posExp.size() != posAct.size()) {
+ return false;
+ }
+ Collections.sort(posExp);
+ Collections.sort(posAct);
+ for (int i = 0; i < posExp.size(); i++) {
+ if (!posExp.get(i).equals(posAct.get(i))) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+}
diff --git a/genomix/genomix-hyracks/src/test/resources/HighSplitRepeatResult.txt b/genomix/genomix-hyracks/src/test/resources/HighSplitRepeatResult.txt
new file mode 100755
index 0000000..dc4395d
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/HighSplitRepeatResult.txt
@@ -0,0 +1,6 @@
+{[(1-0_0)] [] [] [] [GCC] CCA 1.0x}
+{[(2-0_0)] [GCC] [] [] [] CGC 1.0x}
+{[(3-0_0)] [] [CCG] [] [GCC] CCG 2.0x}
+{[(2-0_0),(3-0_0),(1-0_0)] [CCG,CCA] [AGG] [] [AGC,CGC] GCC 3.0x}
+{[(1-0_0)] [GCC] [] [] [] AGC 1.0x}
+{[(2-0_0)] [] [GCC] [] [] AGG 1.0x}
diff --git a/genomix/genomix-hyracks/src/test/resources/LowSplitRepeatResult.txt b/genomix/genomix-hyracks/src/test/resources/LowSplitRepeatResult.txt
new file mode 100755
index 0000000..f8df8bb
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/LowSplitRepeatResult.txt
@@ -0,0 +1,6 @@
+{[(1-0_0)] [] [] [] [GCC] CCA 1.0x}
+{[(2-0_0)] [] [] [] [GCC] CCG 1.0x}
+{[(2-0_0),(3-0_0),(1-0_0)] [CCG,CCA] [AGG] [] [AGC] GCC 3.0x}
+{[(2-0_0),(1-0_0)] [GCC] [] [] [] AGC 2.0x}
+{[(3-0_0)] [AGG] [] [] [] AAG 1.0x}
+{[(3-0_0)] [] [GCC] [] [AAG] AGG 1.0x}
diff --git a/genomix/genomix-hyracks/src/test/resources/MidSplitRepeatResult.txt b/genomix/genomix-hyracks/src/test/resources/MidSplitRepeatResult.txt
new file mode 100755
index 0000000..dc4395d
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/MidSplitRepeatResult.txt
@@ -0,0 +1,6 @@
+{[(1-0_0)] [] [] [] [GCC] CCA 1.0x}
+{[(2-0_0)] [GCC] [] [] [] CGC 1.0x}
+{[(3-0_0)] [] [CCG] [] [GCC] CCG 2.0x}
+{[(2-0_0),(3-0_0),(1-0_0)] [CCG,CCA] [AGG] [] [AGC,CGC] GCC 3.0x}
+{[(1-0_0)] [GCC] [] [] [] AGC 1.0x}
+{[(2-0_0)] [] [GCC] [] [] AGG 1.0x}
diff --git a/genomix/genomix-hyracks/src/test/resources/Tips1Result.txt b/genomix/genomix-hyracks/src/test/resources/Tips1Result.txt
new file mode 100755
index 0000000..3f04844
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/Tips1Result.txt
@@ -0,0 +1,7 @@
+{[(1-0_0)] [] [] [] [GCC] CCA 1.0x}
+{[(1-0_0)] [AGC] [] [] [] CAG 1.0x}
+{[(2-0_0)] [] [ACG] [] [GCC] CCG 1.0x}
+{[(2-0_0)] [] [] [ACG] [] GTA 1.0x}
+{[(2-0_0),(1-0_0)] [CCG,CCA] [] [] [AGC] GCC 2.0x}
+{[(1-0_0)] [GCC] [] [] [CAG] AGC 1.0x}
+{[(2-0_0)] [] [CCG] [GTA] [] ACG 1.0x}
diff --git a/genomix/genomix-hyracks/src/test/resources/Tips2Result.txt b/genomix/genomix-hyracks/src/test/resources/Tips2Result.txt
new file mode 100755
index 0000000..322cc70
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/Tips2Result.txt
@@ -0,0 +1,7 @@
+{[(2-0_0)] [GAA] [] [CGC] [] CGA 1.0x}
+{[(2-0_0),(1-0_0)] [GCC] [AGC] [CGA] [] CGC 2.0x}
+{[(1-0_0)] [AGC] [] [] [ACA] CAG 1.0x}
+{[(2-0_0)] [] [] [] [CGA] GAA 1.0x}
+{[(2-0_0)] [] [] [] [CGC] GCC 1.0x}
+{[(1-0_0)] [CAG] [] [] [] ACA 1.0x}
+{[(1-0_0)] [] [CGC] [] [CAG] AGC 1.0x}
diff --git a/genomix/genomix-hyracks/src/test/resources/Tips3Result.txt b/genomix/genomix-hyracks/src/test/resources/Tips3Result.txt
new file mode 100755
index 0000000..d37d70c
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/Tips3Result.txt
@@ -0,0 +1,5 @@
+{[(2-0_0)] [] [] [] [GCC] CCA 1.0x}
+{[(2-0_0),(1-0_0)] [AGC] [] [] [] CAG 2.0x}
+{[(2-0_0),(1-0_0)] [CCA] [AGG] [] [AGC] GCC 2.0x}
+{[(2-0_0),(1-0_0)] [GCC] [] [] [CAG] AGC 2.0x}
+{[(1-0_0)] [] [GCC] [] [] AGG 1.0x}
diff --git a/genomix/genomix-hyracks/src/test/resources/Tips4Result.txt b/genomix/genomix-hyracks/src/test/resources/Tips4Result.txt
new file mode 100755
index 0000000..de3a226
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/Tips4Result.txt
@@ -0,0 +1,4 @@
+{[(2-0_0),(1-0_0)] [AGG] [] [] [] CAG 2.0x}
+{[(1-0_0)] [] [] [GCC] [] GCA 1.0x}
+{[(2-0_0),(1-0_0)] [] [AGG] [GCC,GCA] [] GCC 3.0x}
+{[(2-0_0),(1-0_0)] [] [GCC] [] [CAG] AGG 2.0x}
diff --git a/genomix/genomix-hyracks/src/test/resources/data/lastesttest/HighSplitRepeat.txt b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/HighSplitRepeat.txt
new file mode 100644
index 0000000..eca0a13
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/HighSplitRepeat.txt
@@ -0,0 +1,3 @@
+1 AGCCACA
+2 GCACTTT
+3 CGCCGTC
diff --git a/genomix/genomix-hyracks/src/test/resources/data/lastesttest/LowSplitRepeat.txt b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/LowSplitRepeat.txt
new file mode 100644
index 0000000..259fd80
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/LowSplitRepeat.txt
@@ -0,0 +1,3 @@
+1 AGCCA
+2 AGCCG
+3 GCCTT
diff --git a/genomix/genomix-hyracks/src/test/resources/data/lastesttest/MidSplitRepeat.txt b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/MidSplitRepeat.txt
new file mode 100644
index 0000000..e934e54
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/MidSplitRepeat.txt
@@ -0,0 +1,3 @@
+1 AGCCA
+2 CGCCT
+3 GCCGG
diff --git a/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips1.txt b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips1.txt
new file mode 100644
index 0000000..1e16d68
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips1.txt
@@ -0,0 +1,2 @@
+1 CAGCCA
+2 GCCGTA
diff --git a/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips2.txt b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips2.txt
new file mode 100644
index 0000000..8109730
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips2.txt
@@ -0,0 +1,2 @@
+1 ACAGCG
+2 GGCGAA
diff --git a/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips3.txt b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips3.txt
new file mode 100644
index 0000000..a672034
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips3.txt
@@ -0,0 +1,2 @@
+1 CAGCCT
+2 CAGCCA
diff --git a/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips4.txt b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips4.txt
new file mode 100644
index 0000000..499e8e6
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips4.txt
@@ -0,0 +1,2 @@
+1 CAGGCA
+2 CAGGCC
diff --git a/genomix/genomix-hyracks/src/test/resources/data/webmap/SplitRepeat.txt b/genomix/genomix-hyracks/src/test/resources/data/webmap/SplitRepeat.txt
new file mode 100644
index 0000000..bb03d70
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/webmap/SplitRepeat.txt
@@ -0,0 +1,2 @@
+1 AATAG
+2 CATAC
diff --git a/genomix/genomix-hyracks/src/test/resources/data/webmap/test1.txt b/genomix/genomix-hyracks/src/test/resources/data/webmap/test1.txt
new file mode 100644
index 0000000..a720dc4
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/webmap/test1.txt
@@ -0,0 +1 @@
+1 AATAGAA
diff --git a/genomix/genomix-hyracks/src/test/resources/data/webmap/text.txt b/genomix/genomix-hyracks/src/test/resources/data/webmap/text.txt
new file mode 100755
index 0000000..01c49e5
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/webmap/text.txt
@@ -0,0 +1,6 @@
+1 AATAGAAG
+2 AATAGCTT
+3 AATAGAAG
+4 AATAGCTT
+5 AATAGAAG
+6 AGAAGAAG
diff --git a/genomix/genomix-hyracks/src/test/resources/hadoop/conf/core-site.xml b/genomix/genomix-hyracks/src/test/resources/hadoop/conf/core-site.xml
new file mode 100644
index 0000000..3e5bacb
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/hadoop/conf/core-site.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+ <property>
+ <name>fs.default.name</name>
+ <value>hdfs://127.0.0.1:31888</value>
+ </property>
+ <property>
+ <name>hadoop.tmp.dir</name>
+ <value>/tmp/hadoop</value>
+ </property>
+
+
+</configuration>
diff --git a/genomix/genomix-hyracks/src/test/resources/hadoop/conf/hdfs-site.xml b/genomix/genomix-hyracks/src/test/resources/hadoop/conf/hdfs-site.xml
new file mode 100644
index 0000000..b1b1902
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/hadoop/conf/hdfs-site.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+ <property>
+ <name>dfs.replication</name>
+ <value>1</value>
+ </property>
+
+ <property>
+ <name>dfs.block.size</name>
+ <value>65536</value>
+ </property>
+
+</configuration>
diff --git a/genomix/genomix-hyracks/src/test/resources/hadoop/conf/log4j.properties b/genomix/genomix-hyracks/src/test/resources/hadoop/conf/log4j.properties
new file mode 100755
index 0000000..d5e6004
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/hadoop/conf/log4j.properties
@@ -0,0 +1,94 @@
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=FATAL,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshhold=FATAL
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# Rolling File Appender
+#
+
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+#
+# FSNamesystem Audit logging
+# All audit events are logged at INFO level
+#
+log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
diff --git a/genomix/genomix-hyracks/src/test/resources/hadoop/conf/mapred-site.xml b/genomix/genomix-hyracks/src/test/resources/hadoop/conf/mapred-site.xml
new file mode 100644
index 0000000..525e7d5
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/hadoop/conf/mapred-site.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+ <property>
+ <name>mapred.job.tracker</name>
+ <value>localhost:29007</value>
+ </property>
+ <property>
+ <name>mapred.tasktracker.map.tasks.maximum</name>
+ <value>20</value>
+ </property>
+ <property>
+ <name>mapred.tasktracker.reduce.tasks.maximum</name>
+ <value>20</value>
+ </property>
+ <property>
+ <name>mapred.max.split.size</name>
+ <value>2048</value>
+ </property>
+
+</configuration>
diff --git a/genomix/genomix-pregelix/data/AddBridge/SimpleTest/part-00000 b/genomix/genomix-pregelix/data/AddBridge/SimpleTest/part-00000
new file mode 100755
index 0000000..22508c2
--- /dev/null
+++ b/genomix/genomix-pregelix/data/AddBridge/SimpleTest/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathMergeTestSet/2/part-00000 b/genomix/genomix-pregelix/data/PathMergeTestSet/2/part-00000
new file mode 100755
index 0000000..dc6250b
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathMergeTestSet/2/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathMergeTestSet/3/part-00000 b/genomix/genomix-pregelix/data/PathMergeTestSet/3/part-00000
new file mode 100755
index 0000000..a672f70
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathMergeTestSet/3/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathMergeTestSet/4/part-00000 b/genomix/genomix-pregelix/data/PathMergeTestSet/4/part-00000
new file mode 100755
index 0000000..b47312f
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathMergeTestSet/4/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathMergeTestSet/5/part-00000 b/genomix/genomix-pregelix/data/PathMergeTestSet/5/part-00000
new file mode 100755
index 0000000..3b06e4c
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathMergeTestSet/5/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathMergeTestSet/6/part-00000 b/genomix/genomix-pregelix/data/PathMergeTestSet/6/part-00000
new file mode 100755
index 0000000..330821c
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathMergeTestSet/6/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathMergeTestSet/7/part-00000 b/genomix/genomix-pregelix/data/PathMergeTestSet/7/part-00000
new file mode 100755
index 0000000..a7be3db
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathMergeTestSet/7/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathMergeTestSet/8/part-00000 b/genomix/genomix-pregelix/data/PathMergeTestSet/8/part-00000
new file mode 100755
index 0000000..50bf862
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathMergeTestSet/8/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathMergeTestSet/9/part-00000 b/genomix/genomix-pregelix/data/PathMergeTestSet/9/part-00000
new file mode 100755
index 0000000..1b8024d
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathMergeTestSet/9/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/2/part-0 b/genomix/genomix-pregelix/data/PathTestSet/2/part-0
new file mode 100755
index 0000000..f7f0186
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/2/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/2/part-1 b/genomix/genomix-pregelix/data/PathTestSet/2/part-1
new file mode 100755
index 0000000..7b40564
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/2/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/3/part-0 b/genomix/genomix-pregelix/data/PathTestSet/3/part-0
new file mode 100755
index 0000000..055931c
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/3/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/3/part-1 b/genomix/genomix-pregelix/data/PathTestSet/3/part-1
new file mode 100755
index 0000000..4487006
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/3/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/4/part-0 b/genomix/genomix-pregelix/data/PathTestSet/4/part-0
new file mode 100755
index 0000000..d3baf9f
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/4/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/4/part-1 b/genomix/genomix-pregelix/data/PathTestSet/4/part-1
new file mode 100755
index 0000000..877d4f3
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/4/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/5/part-0 b/genomix/genomix-pregelix/data/PathTestSet/5/part-0
new file mode 100755
index 0000000..9a38afb
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/5/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/5/part-1 b/genomix/genomix-pregelix/data/PathTestSet/5/part-1
new file mode 100755
index 0000000..36a623e
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/5/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/6/part-0 b/genomix/genomix-pregelix/data/PathTestSet/6/part-0
new file mode 100755
index 0000000..fee737e
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/6/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/6/part-1 b/genomix/genomix-pregelix/data/PathTestSet/6/part-1
new file mode 100755
index 0000000..07f3452
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/6/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/7/part-0 b/genomix/genomix-pregelix/data/PathTestSet/7/part-0
new file mode 100755
index 0000000..3d4cc9b
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/7/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/7/part-1 b/genomix/genomix-pregelix/data/PathTestSet/7/part-1
new file mode 100755
index 0000000..b7fb670
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/7/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/8/part-0 b/genomix/genomix-pregelix/data/PathTestSet/8/part-0
new file mode 100755
index 0000000..778fbe8
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/8/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/8/part-1 b/genomix/genomix-pregelix/data/PathTestSet/8/part-1
new file mode 100755
index 0000000..a045391
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/8/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/9/part-0 b/genomix/genomix-pregelix/data/PathTestSet/9/part-0
new file mode 100755
index 0000000..b2d3f50
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/9/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/9/part-1 b/genomix/genomix-pregelix/data/PathTestSet/9/part-1
new file mode 100755
index 0000000..dacf119
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/9/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/BridgePath/BridgePath b/genomix/genomix-pregelix/data/PathTestSet/BridgePath/BridgePath
new file mode 100755
index 0000000..afb242c
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/BridgePath/BridgePath
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/CyclePath/part-0 b/genomix/genomix-pregelix/data/PathTestSet/CyclePath/part-0
new file mode 100755
index 0000000..b986370
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/CyclePath/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/CyclePath/part-1 b/genomix/genomix-pregelix/data/PathTestSet/CyclePath/part-1
new file mode 100755
index 0000000..729417d
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/CyclePath/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/LongPath/LongPath b/genomix/genomix-pregelix/data/PathTestSet/LongPath/LongPath
new file mode 100755
index 0000000..dbbb21a
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/LongPath/LongPath
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/Path/Path b/genomix/genomix-pregelix/data/PathTestSet/Path/Path
new file mode 100755
index 0000000..7a3c370
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/Path/Path
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/RingPath/CyclePath b/genomix/genomix-pregelix/data/PathTestSet/RingPath/CyclePath
new file mode 100755
index 0000000..2387139
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/RingPath/CyclePath
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/SimplePath/SimplePath b/genomix/genomix-pregelix/data/PathTestSet/SimplePath/SimplePath
new file mode 100755
index 0000000..bd6b4f0
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/SimplePath/SimplePath
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/SinglePath/SinglePath b/genomix/genomix-pregelix/data/PathTestSet/SinglePath/SinglePath
new file mode 100755
index 0000000..e5fc081
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/SinglePath/SinglePath
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/ThreeKmer/part-0 b/genomix/genomix-pregelix/data/PathTestSet/ThreeKmer/part-0
new file mode 100755
index 0000000..b02d917
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/ThreeKmer/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/ThreeKmer/part-1 b/genomix/genomix-pregelix/data/PathTestSet/ThreeKmer/part-1
new file mode 100755
index 0000000..37e9986
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/ThreeKmer/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/TreePath/TreePath b/genomix/genomix-pregelix/data/PathTestSet/TreePath/TreePath
new file mode 100755
index 0000000..bb83023
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/TreePath/TreePath
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/TwoKmer/part-0 b/genomix/genomix-pregelix/data/PathTestSet/TwoKmer/part-0
new file mode 100755
index 0000000..7973858
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/TwoKmer/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathTestSet/TwoKmer/part-1 b/genomix/genomix-pregelix/data/PathTestSet/TwoKmer/part-1
new file mode 100755
index 0000000..02d893d
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathTestSet/TwoKmer/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/SplitRepeat/AdjSplitRepeat/part-00000 b/genomix/genomix-pregelix/data/SplitRepeat/AdjSplitRepeat/part-00000
new file mode 100755
index 0000000..665db46
--- /dev/null
+++ b/genomix/genomix-pregelix/data/SplitRepeat/AdjSplitRepeat/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/SplitRepeat/SplitOnce/part-00000 b/genomix/genomix-pregelix/data/SplitRepeat/SplitOnce/part-00000
new file mode 100755
index 0000000..cd574bb
--- /dev/null
+++ b/genomix/genomix-pregelix/data/SplitRepeat/SplitOnce/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/SplitRepeat/SplitTwice/part-00000 b/genomix/genomix-pregelix/data/SplitRepeat/SplitTwice/part-00000
new file mode 100755
index 0000000..e56b813
--- /dev/null
+++ b/genomix/genomix-pregelix/data/SplitRepeat/SplitTwice/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/2 b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/2
new file mode 100644
index 0000000..0f501fe
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/2
@@ -0,0 +1 @@
+1 AATA
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/2~ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/2~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/2~
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/3 b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/3
new file mode 100644
index 0000000..b90246c
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/3
@@ -0,0 +1 @@
+1 AATAG
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/3~ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/3~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/3~
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/4 b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/4
new file mode 100644
index 0000000..3f1cd5c
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/4
@@ -0,0 +1 @@
+1 AATAGA
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/4~ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/4~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/4~
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/5 b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/5
new file mode 100644
index 0000000..a720dc4
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/5
@@ -0,0 +1 @@
+1 AATAGAA
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/5~ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/5~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/5~
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/6 b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/6
new file mode 100644
index 0000000..7a95b7c
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/6
@@ -0,0 +1 @@
+1 AATAGAAC
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/6~ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/6~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/6~
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/7 b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/7
new file mode 100644
index 0000000..ce4b8a8
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/7
@@ -0,0 +1 @@
+1 AATAGAACT
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/7~ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/7~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/7~
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/8 b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/8
new file mode 100644
index 0000000..3959d4d
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/8
@@ -0,0 +1 @@
+1 AATAGAACTT
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/8~ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/8~
new file mode 100644
index 0000000..89ead1e
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/8~
@@ -0,0 +1 @@
+1 AATAGAACTTA
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/9 b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/9
new file mode 100644
index 0000000..89ead1e
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/9
@@ -0,0 +1 @@
+1 AATAGAACTTA
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/9~ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/9~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/pathmerge_TestSet/9~
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/tworead3.txt b/genomix/genomix-pregelix/data/graphbuild.test/tworead3.txt
new file mode 100644
index 0000000..0ba6a10
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/tworead3.txt
@@ -0,0 +1,2 @@
+1 AATAGAAC
+2 GCGCATGT
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/tworead3.txt~ b/genomix/genomix-pregelix/data/graphbuild.test/tworead3.txt~
new file mode 100644
index 0000000..ff13d81
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/tworead3.txt~
@@ -0,0 +1,2 @@
+1 AATAGAAC
+2 CCGTATGT
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/tworeads.txt b/genomix/genomix-pregelix/data/graphbuild.test/tworeads.txt
new file mode 100644
index 0000000..55c1c45
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/tworeads.txt
@@ -0,0 +1,2 @@
+1 AATAGAAC
+2 GCCTACTG
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/tworeads.txt~ b/genomix/genomix-pregelix/data/graphbuild.test/tworeads.txt~
new file mode 100644
index 0000000..6e9444d
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/tworeads.txt~
@@ -0,0 +1,2 @@
+1 AATAGAAGC
+2 GCCTACTG
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/tworeads2.txt b/genomix/genomix-pregelix/data/graphbuild.test/tworeads2.txt
new file mode 100644
index 0000000..b35ef52
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/tworeads2.txt
@@ -0,0 +1,2 @@
+1 AATAGAAC
+2 GCCTGTCA
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/tworeads2.txt~ b/genomix/genomix-pregelix/data/graphbuild.test/tworeads2.txt~
new file mode 100644
index 0000000..55c1c45
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/tworeads2.txt~
@@ -0,0 +1,2 @@
+1 AATAGAAC
+2 GCCTACTG
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_ff_bubbles.txt b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_ff_bubbles.txt
new file mode 100644
index 0000000..2896107
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_ff_bubbles.txt
@@ -0,0 +1,16 @@
+((2,1) [(3,1),(4,1),(2,2)] [] [] [(1,1)] CGTCC) (null)
+((2,2) [(2,3)] [] [] [(2,1)] GTCCT) (null)
+((2,3) [(2,4)] [] [] [(2,2)] TCCTT) (null)
+((2,4) [(4,4),(3,4)] [] [] [(1,4),(2,3)] CCTTA) (null)
+((4,1) [(4,2)] [] [] [(1,2),(2,1)] GTCCT) (null)
+((4,2) [(4,3)] [] [] [(4,1)] TCCTT) (null)
+((4,3) [(4,4)] [] [] [(1,4),(4,2)] CCTTA) (null)
+((4,4) [] [] [] [(2,4),(4,3)] CTTAG) (null)
+((1,1) [(2,1),(1,2)] [] [] [] ACGTC) (null)
+((1,2) [(3,1),(4,1),(1,3)] [] [] [(1,1)] CGTCC) (null)
+((1,3) [(1,4)] [] [] [(1,2)] GTCCT) (null)
+((1,4) [(3,3),(4,3),(2,4)] [] [] [(1,3)] TCCTT) (null)
+((3,1) [(3,2)] [] [] [(1,2),(2,1)] GTCCT) (null)
+((3,2) [(3,3)] [] [] [(3,1)] TCCTT) (null)
+((3,3) [(3,4)] [] [] [(1,4),(3,2)] CCTTA) (null)
+((3,4) [] [] [] [(2,4),(3,3)] CTTAG) (null)
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_ff_bubbles.txt.svg b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_ff_bubbles.txt.svg
new file mode 100644
index 0000000..fef64c2
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_ff_bubbles.txt.svg
@@ -0,0 +1,449 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: five_ff_bubbles_txt Pages: 1 -->
+<svg width="764pt" height="520pt"
+ viewBox="0.00 0.00 764.00 520.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 516)">
+<title>five_ff_bubbles_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-516 761,-516 761,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-8 49,-209 211,-209 211,-8 49,-8"/>
+<text text-anchor="middle" x="130" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-217 8,-305 500,-305 500,-217 8,-217"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_3</title>
+<polygon fill="none" stroke="black" points="274,-416 274,-504 748,-504 748,-416 274,-416"/>
+</g>
+<g id="graph5" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="150,-313 150,-401 624,-401 624,-313 150,-313"/>
+</g>
+<g id="graph6" class="cluster"><title>cluster_4</title>
+<polygon fill="none" stroke="black" points="274,-121 274,-209 748,-209 748,-121 274,-121"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.0074,-157C74.8673,-157 156.744,-157 188.46,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="188.862,-160.5 198.862,-157 188.861,-153.5 188.862,-160.5"/>
+<text text-anchor="middle" x="130" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.0074,-116C74.8673,-116 156.744,-116 188.46,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="188.862,-119.5 198.862,-116 188.861,-112.5 188.862,-119.5"/>
+<text text-anchor="middle" x="130" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.0074,-75C74.8673,-75 156.744,-75 188.46,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="188.862,-78.5001 198.862,-75 188.861,-71.5001 188.862,-78.5001"/>
+<text text-anchor="middle" x="130" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.0074,-34C74.8673,-34 156.744,-34 188.46,-34"/>
+<polygon fill="#218559" stroke="#218559" points="188.862,-37.5001 198.862,-34 188.861,-30.5001 188.862,-37.5001"/>
+<text text-anchor="middle" x="130" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-253 37,-267 82,-267 82,-253 37,-253"/>
+<text text-anchor="start" x="43.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">ACGTC</text>
+<polygon fill="#218559" stroke="#218559" points="37,-239 37,-253 82,-253 82,-239 37,-239"/>
+<text text-anchor="start" x="42.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">GACGT</text>
+</g>
+<!-- 1,2 -->
+<g id="node16" class="node"><title>1,2</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-253 179,-267 224,-267 224,-253 179,-253"/>
+<text text-anchor="start" x="185" y="-257.667" font-family="Times Roman,serif" font-size="10.00">CGTCC</text>
+<polygon fill="#218559" stroke="#218559" points="179,-239 179,-253 224,-253 224,-239 179,-239"/>
+<text text-anchor="start" x="183.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">GGACG</text>
+</g>
+<!-- 1,1->1,2 -->
+<g id="edge61" class="edge"><title>1,1->1,2</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-254.755C116.207,-254.208 132.729,-254.105 148.049,-254.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-257.957 158.425,-254.756 148.534,-250.96 148.326,-257.957"/>
+</g>
+<!-- 2,1 -->
+<g id="node25" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-357" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-368.167" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-349 179,-363 224,-363 224,-349 179,-349"/>
+<text text-anchor="start" x="185" y="-353.667" font-family="Times Roman,serif" font-size="10.00">CGTCC</text>
+<polygon fill="#218559" stroke="#218559" points="179,-335 179,-349 224,-349 224,-335 179,-335"/>
+<text text-anchor="start" x="183.5" y="-339.667" font-family="Times Roman,serif" font-size="10.00">GGACG</text>
+</g>
+<!-- 1,1->2,1 -->
+<g id="edge59" class="edge"><title>1,1->2,1</title>
+<path fill="none" stroke="#dd1e2f" d="M92.652,-283.751C112.403,-297.103 137.544,-314.1 158.635,-328.359"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="156.853,-331.379 167.097,-334.08 160.773,-325.58 156.853,-331.379"/>
+</g>
+<!-- 1,2->1,1 -->
+<g id="edge69" class="edge"><title>1,2->1,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-267.244C143.825,-267.792 127.305,-267.895 111.982,-267.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-264.043 101.605,-267.245 111.497,-271.04 111.704,-264.043"/>
+</g>
+<!-- 1,3 -->
+<g id="node17" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-253 303,-267 348,-267 348,-253 303,-253"/>
+<text text-anchor="start" x="310" y="-257.667" font-family="Times Roman,serif" font-size="10.00">GTCCT</text>
+<polygon fill="#218559" stroke="#218559" points="303,-239 303,-253 348,-253 348,-239 303,-239"/>
+<text text-anchor="start" x="308" y="-243.667" font-family="Times Roman,serif" font-size="10.00">AGGAC</text>
+</g>
+<!-- 1,2->1,3 -->
+<g id="edge67" class="edge"><title>1,2->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-254.562C252.601,-254.258 262.531,-254.176 272.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-257.822 282.443,-254.554 272.527,-250.823 272.364,-257.822"/>
+</g>
+<!-- 3,1 -->
+<g id="node20" class="node"><title>3,1</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-460" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-471.167" font-family="Times Roman,serif" font-size="10.00">3,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-452 303,-466 348,-466 348,-452 303,-452"/>
+<text text-anchor="start" x="310" y="-456.667" font-family="Times Roman,serif" font-size="10.00">GTCCT</text>
+<polygon fill="#218559" stroke="#218559" points="303,-438 303,-452 348,-452 348,-438 303,-438"/>
+<text text-anchor="start" x="308" y="-442.667" font-family="Times Roman,serif" font-size="10.00">AGGAC</text>
+</g>
+<!-- 1,2->3,1 -->
+<g id="edge63" class="edge"><title>1,2->3,1</title>
+<path fill="none" stroke="#dd1e2f" d="M227.005,-290.029C236.061,-300.138 244,-309 244,-309 244,-309 274,-405 274,-405 274,-405 282.027,-413.657 291.712,-424.101"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="289.238,-426.58 298.604,-431.533 294.371,-421.821 289.238,-426.58"/>
+</g>
+<!-- 4,1 -->
+<g id="node30" class="node"><title>4,1</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-165" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">4,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-157 303,-171 348,-171 348,-157 303,-157"/>
+<text text-anchor="start" x="310" y="-161.667" font-family="Times Roman,serif" font-size="10.00">GTCCT</text>
+<polygon fill="#218559" stroke="#218559" points="303,-143 303,-157 348,-157 348,-143 303,-143"/>
+<text text-anchor="start" x="308" y="-147.667" font-family="Times Roman,serif" font-size="10.00">AGGAC</text>
+</g>
+<!-- 1,2->4,1 -->
+<g id="edge65" class="edge"><title>1,2->4,1</title>
+<path fill="none" stroke="#dd1e2f" d="M227.005,-231.971C236.061,-221.862 244,-213 244,-213 244,-213 262,-184 262,-184 262,-184 267.269,-182.411 274.758,-180.152"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="275.825,-183.486 284.389,-177.248 273.804,-176.784 275.825,-183.486"/>
+</g>
+<!-- 1,3->1,2 -->
+<g id="edge73" class="edge"><title>1,3->1,2</title>
+<path fill="none" stroke="#218559" d="M282.443,-267.446C273.158,-267.745 263.225,-267.824 253.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-264.175 243.327,-267.438 253.242,-271.173 253.407,-264.175"/>
+</g>
+<!-- 1,4 -->
+<g id="node18" class="node"><title>1,4</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-253 427,-267 472,-267 472,-253 427,-253"/>
+<text text-anchor="start" x="435" y="-257.667" font-family="Times Roman,serif" font-size="10.00">TCCTT</text>
+<polygon fill="#218559" stroke="#218559" points="427,-239 427,-253 472,-253 472,-239 427,-239"/>
+<text text-anchor="start" x="431.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">AAGGA</text>
+</g>
+<!-- 1,3->1,4 -->
+<g id="edge71" class="edge"><title>1,3->1,4</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-254.562C376.601,-254.258 386.531,-254.176 396.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-257.822 406.443,-254.554 396.527,-250.823 396.364,-257.822"/>
+</g>
+<!-- 1,4->1,3 -->
+<g id="edge81" class="edge"><title>1,4->1,3</title>
+<path fill="none" stroke="#218559" d="M406.443,-267.446C397.158,-267.745 387.225,-267.824 377.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-264.175 367.327,-267.438 377.242,-271.173 377.407,-264.175"/>
+</g>
+<!-- 3,3 -->
+<g id="node22" class="node"><title>3,3</title>
+<ellipse fill="none" stroke="black" cx="573" cy="-460" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="553.5" y="-471.167" font-family="Times Roman,serif" font-size="10.00">3,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="551,-452 551,-466 596,-466 596,-452 551,-452"/>
+<text text-anchor="start" x="558.5" y="-456.667" font-family="Times Roman,serif" font-size="10.00">CCTTA</text>
+<polygon fill="#218559" stroke="#218559" points="551,-438 551,-452 596,-452 596,-438 551,-438"/>
+<text text-anchor="start" x="556.5" y="-442.667" font-family="Times Roman,serif" font-size="10.00">TAAGG</text>
+</g>
+<!-- 1,4->3,3 -->
+<g id="edge75" class="edge"><title>1,4->3,3</title>
+<path fill="none" stroke="#dd1e2f" d="M477.779,-288.086C489.347,-298.974 500,-309 500,-309 500,-309 530,-405 530,-405 530,-405 535.671,-412.254 542.894,-421.492"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="540.32,-423.882 549.236,-429.604 545.834,-419.571 540.32,-423.882"/>
+</g>
+<!-- 2,4 -->
+<g id="node28" class="node"><title>2,4</title>
+<ellipse fill="none" stroke="black" cx="573" cy="-357" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="553.5" y="-368.167" font-family="Times Roman,serif" font-size="10.00">2,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="551,-349 551,-363 596,-363 596,-349 551,-349"/>
+<text text-anchor="start" x="558.5" y="-353.667" font-family="Times Roman,serif" font-size="10.00">CCTTA</text>
+<polygon fill="#218559" stroke="#218559" points="551,-335 551,-349 596,-349 596,-335 551,-335"/>
+<text text-anchor="start" x="556.5" y="-339.667" font-family="Times Roman,serif" font-size="10.00">TAAGG</text>
+</g>
+<!-- 1,4->2,4 -->
+<g id="edge79" class="edge"><title>1,4->2,4</title>
+<path fill="none" stroke="#dd1e2f" d="M479.857,-286.293C494.987,-298.695 510,-311 510,-311 510,-311 520.107,-318.38 532.225,-327.228"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="530.253,-330.121 540.393,-333.192 534.38,-324.468 530.253,-330.121"/>
+</g>
+<!-- 4,3 -->
+<g id="node32" class="node"><title>4,3</title>
+<ellipse fill="none" stroke="black" cx="573" cy="-165" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="553.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">4,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="551,-157 551,-171 596,-171 596,-157 551,-157"/>
+<text text-anchor="start" x="558.5" y="-161.667" font-family="Times Roman,serif" font-size="10.00">CCTTA</text>
+<polygon fill="#218559" stroke="#218559" points="551,-143 551,-157 596,-157 596,-143 551,-143"/>
+<text text-anchor="start" x="556.5" y="-147.667" font-family="Times Roman,serif" font-size="10.00">TAAGG</text>
+</g>
+<!-- 1,4->4,3 -->
+<g id="edge77" class="edge"><title>1,4->4,3</title>
+<path fill="none" stroke="#dd1e2f" d="M479.143,-235.305C494.511,-222.204 510,-209 510,-209 510,-209 519.601,-202.295 531.307,-194.119"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="533.461,-196.884 539.655,-188.288 529.453,-191.145 533.461,-196.884"/>
+</g>
+<!-- 3,1->1,2 -->
+<g id="edge85" class="edge"><title>3,1->1,2</title>
+<path fill="none" stroke="#218559" d="M298.604,-431.533C286.159,-418.113 274,-405 274,-405 274,-405 262,-384 262,-384 262,-384 244,-309 244,-309 244,-309 239.718,-304.221 233.857,-297.678"/>
+<polygon fill="#218559" stroke="#218559" points="236.285,-295.142 227.005,-290.029 231.071,-299.813 236.285,-295.142"/>
+</g>
+<!-- 3,2 -->
+<g id="node21" class="node"><title>3,2</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-460" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-471.167" font-family="Times Roman,serif" font-size="10.00">3,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-452 427,-466 472,-466 472,-452 427,-452"/>
+<text text-anchor="start" x="435" y="-456.667" font-family="Times Roman,serif" font-size="10.00">TCCTT</text>
+<polygon fill="#218559" stroke="#218559" points="427,-438 427,-452 472,-452 472,-438 427,-438"/>
+<text text-anchor="start" x="431.5" y="-442.667" font-family="Times Roman,serif" font-size="10.00">AAGGA</text>
+</g>
+<!-- 3,1->3,2 -->
+<g id="edge83" class="edge"><title>3,1->3,2</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-453.562C376.601,-453.258 386.531,-453.176 396.159,-453.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-456.822 406.443,-453.554 396.527,-449.823 396.364,-456.822"/>
+</g>
+<!-- 3,1->2,1 -->
+<g id="edge87" class="edge"><title>3,1->2,1</title>
+<path fill="none" stroke="#218559" d="M283.329,-450.078C271.613,-447.289 262,-445 262,-445 262,-445 244.717,-420.067 228.55,-396.744"/>
+<polygon fill="#218559" stroke="#218559" points="231.252,-394.498 222.678,-388.273 225.499,-398.486 231.252,-394.498"/>
+</g>
+<!-- 3,2->3,1 -->
+<g id="edge91" class="edge"><title>3,2->3,1</title>
+<path fill="none" stroke="#218559" d="M406.443,-466.446C397.158,-466.745 387.225,-466.824 377.602,-466.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-463.175 367.327,-466.438 377.242,-470.173 377.407,-463.175"/>
+</g>
+<!-- 3,2->3,3 -->
+<g id="edge89" class="edge"><title>3,2->3,3</title>
+<path fill="none" stroke="#dd1e2f" d="M491.327,-453.562C500.601,-453.258 510.531,-453.176 520.159,-453.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="520.364,-456.822 530.443,-453.554 520.527,-449.823 520.364,-456.822"/>
+</g>
+<!-- 3,3->1,4 -->
+<g id="edge95" class="edge"><title>3,3->1,4</title>
+<path fill="none" stroke="#218559" d="M549.236,-429.604C539.291,-416.884 530,-405 530,-405 530,-405 510,-359 510,-359 510,-359 500,-309 500,-309 500,-309 493.498,-302.881 485.137,-295.012"/>
+<polygon fill="#218559" stroke="#218559" points="487.46,-292.391 477.779,-288.086 482.662,-297.489 487.46,-292.391"/>
+</g>
+<!-- 3,3->3,2 -->
+<g id="edge97" class="edge"><title>3,3->3,2</title>
+<path fill="none" stroke="#218559" d="M530.443,-466.446C521.158,-466.745 511.225,-466.824 501.602,-466.681"/>
+<polygon fill="#218559" stroke="#218559" points="501.407,-463.175 491.327,-466.438 501.242,-470.173 501.407,-463.175"/>
+</g>
+<!-- 3,4 -->
+<g id="node23" class="node"><title>3,4</title>
+<ellipse fill="none" stroke="black" cx="697" cy="-460" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="677.5" y="-471.167" font-family="Times Roman,serif" font-size="10.00">3,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="675,-452 675,-466 720,-466 720,-452 675,-452"/>
+<text text-anchor="start" x="682" y="-456.667" font-family="Times Roman,serif" font-size="10.00">CTTAG</text>
+<polygon fill="#218559" stroke="#218559" points="675,-438 675,-452 720,-452 720,-438 675,-438"/>
+<text text-anchor="start" x="681" y="-442.667" font-family="Times Roman,serif" font-size="10.00">CTAAG</text>
+</g>
+<!-- 3,3->3,4 -->
+<g id="edge93" class="edge"><title>3,3->3,4</title>
+<path fill="none" stroke="#dd1e2f" d="M615.327,-453.562C624.601,-453.258 634.531,-453.176 644.159,-453.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="644.364,-456.822 654.443,-453.554 644.527,-449.823 644.364,-456.822"/>
+</g>
+<!-- 3,4->3,3 -->
+<g id="edge101" class="edge"><title>3,4->3,3</title>
+<path fill="none" stroke="#218559" d="M654.443,-466.446C645.158,-466.745 635.225,-466.824 625.602,-466.681"/>
+<polygon fill="#218559" stroke="#218559" points="625.407,-463.175 615.327,-466.438 625.242,-470.173 625.407,-463.175"/>
+</g>
+<!-- 3,4->2,4 -->
+<g id="edge99" class="edge"><title>3,4->2,4</title>
+<path fill="none" stroke="#218559" d="M667.572,-433.467C651.973,-419.402 636,-405 636,-405 636,-405 625.367,-396.899 612.837,-387.352"/>
+<polygon fill="#218559" stroke="#218559" points="614.945,-384.558 604.869,-381.281 610.702,-390.126 614.945,-384.558"/>
+</g>
+<!-- 2,1->1,1 -->
+<g id="edge21" class="edge"><title>2,1->1,1</title>
+<path fill="none" stroke="#218559" d="M163.346,-338.87C142.193,-328.685 120,-318 120,-318 120,-318 108.279,-307.047 95.112,-294.744"/>
+<polygon fill="#218559" stroke="#218559" points="97.41,-292.101 87.7138,-287.831 92.6308,-297.216 97.41,-292.101"/>
+</g>
+<!-- 2,1->3,1 -->
+<g id="edge15" class="edge"><title>2,1->3,1</title>
+<path fill="none" stroke="#dd1e2f" d="M225.811,-386.692C242.699,-406.902 262,-430 262,-430 262,-430 268.943,-433.306 278.219,-437.723"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="276.728,-440.89 287.262,-442.029 279.738,-434.57 276.728,-440.89"/>
+</g>
+<!-- 2,2 -->
+<g id="node26" class="node"><title>2,2</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-357" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-368.167" font-family="Times Roman,serif" font-size="10.00">2,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-349 303,-363 348,-363 348,-349 303,-349"/>
+<text text-anchor="start" x="310" y="-353.667" font-family="Times Roman,serif" font-size="10.00">GTCCT</text>
+<polygon fill="#218559" stroke="#218559" points="303,-335 303,-349 348,-349 348,-335 303,-335"/>
+<text text-anchor="start" x="308" y="-339.667" font-family="Times Roman,serif" font-size="10.00">AGGAC</text>
+</g>
+<!-- 2,1->2,2 -->
+<g id="edge19" class="edge"><title>2,1->2,2</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-350.562C252.601,-350.258 262.531,-350.176 272.159,-350.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-353.822 282.443,-350.554 272.527,-346.823 272.364,-353.822"/>
+</g>
+<!-- 2,1->4,1 -->
+<g id="edge17" class="edge"><title>2,1->4,1</title>
+<path fill="none" stroke="#dd1e2f" d="M227.005,-327.971C236.061,-317.862 244,-309 244,-309 244,-309 274,-213 274,-213 274,-213 280.502,-206.881 288.863,-199.012"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="291.338,-201.489 296.221,-192.086 286.54,-196.391 291.338,-201.489"/>
+</g>
+<!-- 2,2->2,1 -->
+<g id="edge25" class="edge"><title>2,2->2,1</title>
+<path fill="none" stroke="#218559" d="M282.443,-363.446C273.158,-363.745 263.225,-363.824 253.602,-363.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-360.175 243.327,-363.438 253.242,-367.173 253.407,-360.175"/>
+</g>
+<!-- 2,3 -->
+<g id="node27" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-357" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-368.167" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-349 427,-363 472,-363 472,-349 427,-349"/>
+<text text-anchor="start" x="435" y="-353.667" font-family="Times Roman,serif" font-size="10.00">TCCTT</text>
+<polygon fill="#218559" stroke="#218559" points="427,-335 427,-349 472,-349 472,-335 427,-335"/>
+<text text-anchor="start" x="431.5" y="-339.667" font-family="Times Roman,serif" font-size="10.00">AAGGA</text>
+</g>
+<!-- 2,2->2,3 -->
+<g id="edge23" class="edge"><title>2,2->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-350.562C376.601,-350.258 386.531,-350.176 396.159,-350.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-353.822 406.443,-350.554 396.527,-346.823 396.364,-353.822"/>
+</g>
+<!-- 2,3->2,2 -->
+<g id="edge29" class="edge"><title>2,3->2,2</title>
+<path fill="none" stroke="#218559" d="M406.443,-363.446C397.158,-363.745 387.225,-363.824 377.602,-363.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-360.175 367.327,-363.438 377.242,-367.173 377.407,-360.175"/>
+</g>
+<!-- 2,3->2,4 -->
+<g id="edge27" class="edge"><title>2,3->2,4</title>
+<path fill="none" stroke="#dd1e2f" d="M491.327,-350.562C500.601,-350.258 510.531,-350.176 520.159,-350.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="520.364,-353.822 530.443,-350.554 520.527,-346.823 520.364,-353.822"/>
+</g>
+<!-- 2,4->1,4 -->
+<g id="edge35" class="edge"><title>2,4->1,4</title>
+<path fill="none" stroke="#218559" d="M541.015,-332.238C525.051,-319.878 505.587,-304.81 488.68,-291.72"/>
+<polygon fill="#218559" stroke="#218559" points="490.661,-288.827 480.611,-285.473 486.376,-294.363 490.661,-288.827"/>
+</g>
+<!-- 2,4->3,4 -->
+<g id="edge33" class="edge"><title>2,4->3,4</title>
+<path fill="none" stroke="#dd1e2f" d="M603.652,-382.461C620.195,-396.202 640.78,-413.301 658.406,-427.942"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="656.342,-430.778 666.271,-434.475 660.815,-425.393 656.342,-430.778"/>
+</g>
+<!-- 2,4->2,3 -->
+<g id="edge37" class="edge"><title>2,4->2,3</title>
+<path fill="none" stroke="#218559" d="M530.443,-363.446C521.158,-363.745 511.225,-363.824 501.602,-363.681"/>
+<polygon fill="#218559" stroke="#218559" points="501.407,-360.175 491.327,-363.438 501.242,-367.173 501.407,-360.175"/>
+</g>
+<!-- 4,4 -->
+<g id="node33" class="node"><title>4,4</title>
+<ellipse fill="none" stroke="black" cx="697" cy="-165" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="677.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">4,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="675,-157 675,-171 720,-171 720,-157 675,-157"/>
+<text text-anchor="start" x="682" y="-161.667" font-family="Times Roman,serif" font-size="10.00">CTTAG</text>
+<polygon fill="#218559" stroke="#218559" points="675,-143 675,-157 720,-157 720,-143 675,-143"/>
+<text text-anchor="start" x="681" y="-147.667" font-family="Times Roman,serif" font-size="10.00">CTAAG</text>
+</g>
+<!-- 2,4->4,4 -->
+<g id="edge31" class="edge"><title>2,4->4,4</title>
+<path fill="none" stroke="#dd1e2f" d="M593.476,-325.295C614.656,-292.5 647.802,-241.178 670.862,-205.472"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="674.008,-207.051 676.494,-196.752 668.128,-203.254 674.008,-207.051"/>
+</g>
+<!-- 4,1->1,2 -->
+<g id="edge41" class="edge"><title>4,1->1,2</title>
+<path fill="none" stroke="#218559" d="M285.459,-179.436C272.801,-184.057 262,-188 262,-188 262,-188 244,-213 244,-213 244,-213 239.718,-217.779 233.857,-224.322"/>
+<polygon fill="#218559" stroke="#218559" points="231.071,-222.187 227.005,-231.971 236.285,-226.858 231.071,-222.187"/>
+</g>
+<!-- 4,1->2,1 -->
+<g id="edge43" class="edge"><title>4,1->2,1</title>
+<path fill="none" stroke="#218559" d="M296.221,-192.086C284.653,-202.974 274,-213 274,-213 274,-213 262,-243 262,-243 262,-243 244,-309 244,-309 244,-309 239.718,-313.779 233.857,-320.322"/>
+<polygon fill="#218559" stroke="#218559" points="231.071,-318.187 227.005,-327.971 236.285,-322.858 231.071,-318.187"/>
+</g>
+<!-- 4,2 -->
+<g id="node31" class="node"><title>4,2</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-165" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">4,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-157 427,-171 472,-171 472,-157 427,-157"/>
+<text text-anchor="start" x="435" y="-161.667" font-family="Times Roman,serif" font-size="10.00">TCCTT</text>
+<polygon fill="#218559" stroke="#218559" points="427,-143 427,-157 472,-157 472,-143 427,-143"/>
+<text text-anchor="start" x="431.5" y="-147.667" font-family="Times Roman,serif" font-size="10.00">AAGGA</text>
+</g>
+<!-- 4,1->4,2 -->
+<g id="edge39" class="edge"><title>4,1->4,2</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-158.562C376.601,-158.258 386.531,-158.176 396.159,-158.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-161.822 406.443,-158.554 396.527,-154.823 396.364,-161.822"/>
+</g>
+<!-- 4,2->4,1 -->
+<g id="edge47" class="edge"><title>4,2->4,1</title>
+<path fill="none" stroke="#218559" d="M406.443,-171.446C397.158,-171.745 387.225,-171.824 377.602,-171.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-168.175 367.327,-171.438 377.242,-175.173 377.407,-168.175"/>
+</g>
+<!-- 4,2->4,3 -->
+<g id="edge45" class="edge"><title>4,2->4,3</title>
+<path fill="none" stroke="#dd1e2f" d="M491.327,-158.562C500.601,-158.258 510.531,-158.176 520.159,-158.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="520.364,-161.822 530.443,-158.554 520.527,-154.823 520.364,-161.822"/>
+</g>
+<!-- 4,3->1,4 -->
+<g id="edge51" class="edge"><title>4,3->1,4</title>
+<path fill="none" stroke="#218559" d="M541.015,-189.762C525.051,-202.122 505.587,-217.19 488.68,-230.28"/>
+<polygon fill="#218559" stroke="#218559" points="486.376,-227.637 480.611,-236.527 490.661,-233.173 486.376,-227.637"/>
+</g>
+<!-- 4,3->4,2 -->
+<g id="edge53" class="edge"><title>4,3->4,2</title>
+<path fill="none" stroke="#218559" d="M530.443,-171.446C521.158,-171.745 511.225,-171.824 501.602,-171.681"/>
+<polygon fill="#218559" stroke="#218559" points="501.407,-168.175 491.327,-171.438 501.242,-175.173 501.407,-168.175"/>
+</g>
+<!-- 4,3->4,4 -->
+<g id="edge49" class="edge"><title>4,3->4,4</title>
+<path fill="none" stroke="#dd1e2f" d="M615.327,-158.562C624.601,-158.258 634.531,-158.176 644.159,-158.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="644.364,-161.822 654.443,-158.554 644.527,-154.823 644.364,-161.822"/>
+</g>
+<!-- 4,4->2,4 -->
+<g id="edge55" class="edge"><title>4,4->2,4</title>
+<path fill="none" stroke="#218559" d="M678.863,-197.706C660.882,-230.131 636,-275 636,-275 636,-275 618.994,-297.135 602.652,-318.405"/>
+<polygon fill="#218559" stroke="#218559" points="599.776,-316.404 596.459,-326.466 605.327,-320.669 599.776,-316.404"/>
+</g>
+<!-- 4,4->4,3 -->
+<g id="edge57" class="edge"><title>4,4->4,3</title>
+<path fill="none" stroke="#218559" d="M654.443,-171.446C645.158,-171.745 635.225,-171.824 625.602,-171.681"/>
+<polygon fill="#218559" stroke="#218559" points="625.407,-168.175 615.327,-171.438 625.242,-175.173 625.407,-168.175"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_ff_bubbles/.part-0.crc b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_ff_bubbles/.part-0.crc
new file mode 100644
index 0000000..2085368
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_ff_bubbles/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_ff_bubbles/.part-1.crc b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_ff_bubbles/.part-1.crc
new file mode 100644
index 0000000..69dcf7d
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_ff_bubbles/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_ff_bubbles/part-0 b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_ff_bubbles/part-0
new file mode 100755
index 0000000..36702f5
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_ff_bubbles/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_ff_bubbles/part-1 b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_ff_bubbles/part-1
new file mode 100755
index 0000000..9045432
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_ff_bubbles/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_length1_bubbles.txt b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_length1_bubbles.txt
new file mode 100644
index 0000000..a41ce56
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_length1_bubbles.txt
@@ -0,0 +1,18 @@
+((2,1) [(2,3)] [] [] [] AATAGA) (null)
+((2,3) [(6,1),(2,4)] [] [] [(2,1)] TAGAA) (null)
+((2,4) [(6,2)] [] [] [(2,3)] AGAAG) (null)
+((4,1) [(4,3)] [] [] [] AATAGA) (null)
+((4,3) [(6,1),(4,4)] [] [] [(4,1)] TAGAA) (null)
+((4,4) [(6,2)] [] [] [(4,3)] AGAAG) (null)
+((6,1) [(6,2)] [] [] [(2,3),(1,3),(3,3),(4,3),(5,3)] AGAAG) (null)
+((6,2) [(6,3)] [] [] [(2,4),(3,4),(1,4),(4,4),(5,4),(6,1)] GAAGA) (null)
+((6,3) [] [] [] [(6,2)] AAGAAG) (null)
+((1,1) [(1,3)] [] [] [] AATAGA) (null)
+((1,3) [(6,1),(1,4)] [] [] [(1,1)] TAGAA) (null)
+((1,4) [(6,2)] [] [] [(1,3)] AGAAG) (null)
+((3,1) [(3,3)] [] [] [] AATAGA) (null)
+((3,3) [(6,1),(3,4)] [] [] [(3,1)] TAGAA) (null)
+((3,4) [(6,2)] [] [] [(3,3)] AGAAG) (null)
+((5,1) [(5,3)] [] [] [] AATAGA) (null)
+((5,3) [(6,1),(5,4)] [] [] [(5,1)] TAGAA) (null)
+((5,4) [(6,2)] [] [] [(5,3)] AGAAG) (null)
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_length1_bubbles.txt.svg b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_length1_bubbles.txt.svg
new file mode 100644
index 0000000..ff38f7d
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_length1_bubbles.txt.svg
@@ -0,0 +1,473 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: five_length1_bubbles_txt Pages: 1 -->
+<svg width="646pt" height="844pt"
+ viewBox="0.00 0.00 646.00 844.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 840)">
+<title>five_length1_bubbles_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-840 643,-840 643,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-8 49,-209 211,-209 211,-8 49,-8"/>
+<text text-anchor="middle" x="130" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-548 8,-636 376,-636 376,-548 8,-548"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_3</title>
+<polygon fill="none" stroke="black" points="8,-320 8,-408 376,-408 376,-320 8,-320"/>
+</g>
+<g id="graph5" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="8,-740 8,-828 376,-828 376,-740 8,-740"/>
+</g>
+<g id="graph6" class="cluster"><title>cluster_5</title>
+<polygon fill="none" stroke="black" points="8,-644 8,-732 376,-732 376,-644 8,-644"/>
+</g>
+<g id="graph7" class="cluster"><title>cluster_4</title>
+<polygon fill="none" stroke="black" points="8,-217 8,-305 376,-305 376,-217 8,-217"/>
+</g>
+<g id="graph8" class="cluster"><title>cluster_6</title>
+<polygon fill="none" stroke="black" points="274,-426 274,-514 630,-514 630,-426 274,-426"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.0074,-157C74.8673,-157 156.744,-157 188.46,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="188.862,-160.5 198.862,-157 188.861,-153.5 188.862,-160.5"/>
+<text text-anchor="middle" x="130" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.0074,-116C74.8673,-116 156.744,-116 188.46,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="188.862,-119.5 198.862,-116 188.861,-112.5 188.862,-119.5"/>
+<text text-anchor="middle" x="130" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.0074,-75C74.8673,-75 156.744,-75 188.46,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="188.862,-78.5001 198.862,-75 188.861,-71.5001 188.862,-78.5001"/>
+<text text-anchor="middle" x="130" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.0074,-34C74.8673,-34 156.744,-34 188.46,-34"/>
+<polygon fill="#218559" stroke="#218559" points="188.862,-37.5001 198.862,-34 188.861,-30.5001 188.862,-37.5001"/>
+<text text-anchor="middle" x="130" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-592" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-603.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-584 37,-598 82,-598 82,-584 37,-584"/>
+<text text-anchor="start" x="40" y="-588.667" font-family="Times Roman,serif" font-size="10.00">AATAGA</text>
+<polygon fill="#218559" stroke="#218559" points="37,-570 37,-584 82,-584 82,-570 37,-570"/>
+<text text-anchor="start" x="43" y="-574.667" font-family="Times Roman,serif" font-size="10.00">TCTATT</text>
+</g>
+<!-- 1,3 -->
+<g id="node16" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-592" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-603.167" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-584 179,-598 224,-598 224,-584 179,-584"/>
+<text text-anchor="start" x="185" y="-588.667" font-family="Times Roman,serif" font-size="10.00">TAGAA</text>
+<polygon fill="#218559" stroke="#218559" points="179,-570 179,-584 224,-584 224,-570 179,-570"/>
+<text text-anchor="start" x="187.5" y="-574.667" font-family="Times Roman,serif" font-size="10.00">TTCTA</text>
+</g>
+<!-- 1,1->1,3 -->
+<g id="edge69" class="edge"><title>1,1->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-585.755C116.207,-585.208 132.729,-585.105 148.049,-585.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-588.957 158.425,-585.756 148.534,-581.96 148.326,-588.957"/>
+</g>
+<!-- 1,3->1,1 -->
+<g id="edge75" class="edge"><title>1,3->1,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-598.244C143.825,-598.792 127.305,-598.895 111.982,-598.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-595.043 101.605,-598.245 111.497,-602.04 111.704,-595.043"/>
+</g>
+<!-- 1,4 -->
+<g id="node17" class="node"><title>1,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-592" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-603.167" font-family="Times Roman,serif" font-size="10.00">1,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-584 303,-598 348,-598 348,-584 303,-584"/>
+<text text-anchor="start" x="307.5" y="-588.667" font-family="Times Roman,serif" font-size="10.00">AGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-570 303,-584 348,-584 348,-570 303,-570"/>
+<text text-anchor="start" x="311" y="-574.667" font-family="Times Roman,serif" font-size="10.00">CTTCT</text>
+</g>
+<!-- 1,3->1,4 -->
+<g id="edge73" class="edge"><title>1,3->1,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-585.562C252.601,-585.258 262.531,-585.176 272.159,-585.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-588.822 282.443,-585.554 272.527,-581.823 272.364,-588.822"/>
+</g>
+<!-- 6,1 -->
+<g id="node35" class="node"><title>6,1</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-470" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-481.167" font-family="Times Roman,serif" font-size="10.00">6,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-462 303,-476 348,-476 348,-462 303,-462"/>
+<text text-anchor="start" x="307.5" y="-466.667" font-family="Times Roman,serif" font-size="10.00">AGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-448 303,-462 348,-462 348,-448 303,-448"/>
+<text text-anchor="start" x="311" y="-452.667" font-family="Times Roman,serif" font-size="10.00">CTTCT</text>
+</g>
+<!-- 1,3->6,1 -->
+<g id="edge71" class="edge"><title>1,3->6,1</title>
+<path fill="none" stroke="#dd1e2f" d="M217.507,-558.714C235.573,-522.287 262,-469 262,-469 262,-469 264,-469 264,-469 264,-469 267.077,-469.05 271.85,-469.129"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="271.916,-472.63 281.972,-469.295 272.031,-465.631 271.916,-472.63"/>
+</g>
+<!-- 1,4->1,3 -->
+<g id="edge79" class="edge"><title>1,4->1,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-598.446C273.158,-598.745 263.225,-598.824 253.602,-598.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-595.175 243.327,-598.438 253.242,-602.173 253.407,-595.175"/>
+</g>
+<!-- 6,2 -->
+<g id="node36" class="node"><title>6,2</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-470" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-481.167" font-family="Times Roman,serif" font-size="10.00">6,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-462 427,-476 472,-476 472,-462 427,-462"/>
+<text text-anchor="start" x="431.5" y="-466.667" font-family="Times Roman,serif" font-size="10.00">GAAGA</text>
+<polygon fill="#218559" stroke="#218559" points="427,-448 427,-462 472,-462 472,-448 427,-448"/>
+<text text-anchor="start" x="435.5" y="-452.667" font-family="Times Roman,serif" font-size="10.00">TCTTC</text>
+</g>
+<!-- 1,4->6,2 -->
+<g id="edge77" class="edge"><title>1,4->6,2</title>
+<path fill="none" stroke="#dd1e2f" d="M353.138,-564.316C371.132,-546.612 394.616,-523.507 413.837,-504.596"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="416.3,-507.082 420.974,-497.574 411.391,-502.093 416.3,-507.082"/>
+</g>
+<!-- 3,1 -->
+<g id="node19" class="node"><title>3,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-364" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-375.167" font-family="Times Roman,serif" font-size="10.00">3,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-356 37,-370 82,-370 82,-356 37,-356"/>
+<text text-anchor="start" x="40" y="-360.667" font-family="Times Roman,serif" font-size="10.00">AATAGA</text>
+<polygon fill="#218559" stroke="#218559" points="37,-342 37,-356 82,-356 82,-342 37,-342"/>
+<text text-anchor="start" x="43" y="-346.667" font-family="Times Roman,serif" font-size="10.00">TCTATT</text>
+</g>
+<!-- 3,3 -->
+<g id="node20" class="node"><title>3,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-364" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-375.167" font-family="Times Roman,serif" font-size="10.00">3,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-356 179,-370 224,-370 224,-356 179,-356"/>
+<text text-anchor="start" x="185" y="-360.667" font-family="Times Roman,serif" font-size="10.00">TAGAA</text>
+<polygon fill="#218559" stroke="#218559" points="179,-342 179,-356 224,-356 224,-342 179,-342"/>
+<text text-anchor="start" x="187.5" y="-346.667" font-family="Times Roman,serif" font-size="10.00">TTCTA</text>
+</g>
+<!-- 3,1->3,3 -->
+<g id="edge81" class="edge"><title>3,1->3,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-357.755C116.207,-357.208 132.729,-357.105 148.049,-357.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-360.957 158.425,-357.756 148.534,-353.96 148.326,-360.957"/>
+</g>
+<!-- 3,3->3,1 -->
+<g id="edge87" class="edge"><title>3,3->3,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-370.244C143.825,-370.792 127.305,-370.895 111.982,-370.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-367.043 101.605,-370.245 111.497,-374.04 111.704,-367.043"/>
+</g>
+<!-- 3,4 -->
+<g id="node21" class="node"><title>3,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-364" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-375.167" font-family="Times Roman,serif" font-size="10.00">3,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-356 303,-370 348,-370 348,-356 303,-356"/>
+<text text-anchor="start" x="307.5" y="-360.667" font-family="Times Roman,serif" font-size="10.00">AGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-342 303,-356 348,-356 348,-342 303,-342"/>
+<text text-anchor="start" x="311" y="-346.667" font-family="Times Roman,serif" font-size="10.00">CTTCT</text>
+</g>
+<!-- 3,3->3,4 -->
+<g id="edge85" class="edge"><title>3,3->3,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-357.562C252.601,-357.258 262.531,-357.176 272.159,-357.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-360.822 282.443,-357.554 272.527,-353.823 272.364,-360.822"/>
+</g>
+<!-- 3,3->6,1 -->
+<g id="edge83" class="edge"><title>3,3->6,1</title>
+<path fill="none" stroke="#dd1e2f" d="M231.334,-389.931C247.997,-404.175 268.832,-421.985 286.618,-437.19"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="284.674,-440.132 294.55,-443.97 289.223,-434.812 284.674,-440.132"/>
+</g>
+<!-- 3,4->3,3 -->
+<g id="edge91" class="edge"><title>3,4->3,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-370.446C273.158,-370.745 263.225,-370.824 253.602,-370.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-367.175 243.327,-370.438 253.242,-374.173 253.407,-367.175"/>
+</g>
+<!-- 3,4->6,2 -->
+<g id="edge89" class="edge"><title>3,4->6,2</title>
+<path fill="none" stroke="#dd1e2f" d="M356.869,-388.787C372.495,-400.941 388,-413 388,-413 388,-413 399.721,-423.953 412.888,-436.256"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="410.59,-438.899 420.286,-443.169 415.369,-433.784 410.59,-438.899"/>
+</g>
+<!-- 2,1 -->
+<g id="node23" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-784" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-795.167" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-776 37,-790 82,-790 82,-776 37,-776"/>
+<text text-anchor="start" x="40" y="-780.667" font-family="Times Roman,serif" font-size="10.00">AATAGA</text>
+<polygon fill="#218559" stroke="#218559" points="37,-762 37,-776 82,-776 82,-762 37,-762"/>
+<text text-anchor="start" x="43" y="-766.667" font-family="Times Roman,serif" font-size="10.00">TCTATT</text>
+</g>
+<!-- 2,3 -->
+<g id="node24" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-784" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-795.167" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-776 179,-790 224,-790 224,-776 179,-776"/>
+<text text-anchor="start" x="185" y="-780.667" font-family="Times Roman,serif" font-size="10.00">TAGAA</text>
+<polygon fill="#218559" stroke="#218559" points="179,-762 179,-776 224,-776 224,-762 179,-762"/>
+<text text-anchor="start" x="187.5" y="-766.667" font-family="Times Roman,serif" font-size="10.00">TTCTA</text>
+</g>
+<!-- 2,1->2,3 -->
+<g id="edge17" class="edge"><title>2,1->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-777.755C116.207,-777.208 132.729,-777.105 148.049,-777.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-780.957 158.425,-777.756 148.534,-773.96 148.326,-780.957"/>
+</g>
+<!-- 2,3->2,1 -->
+<g id="edge23" class="edge"><title>2,3->2,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-790.244C143.825,-790.792 127.305,-790.895 111.982,-790.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-787.043 101.605,-790.245 111.497,-794.04 111.704,-787.043"/>
+</g>
+<!-- 2,4 -->
+<g id="node25" class="node"><title>2,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-784" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-795.167" font-family="Times Roman,serif" font-size="10.00">2,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-776 303,-790 348,-790 348,-776 303,-776"/>
+<text text-anchor="start" x="307.5" y="-780.667" font-family="Times Roman,serif" font-size="10.00">AGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-762 303,-776 348,-776 348,-762 303,-762"/>
+<text text-anchor="start" x="311" y="-766.667" font-family="Times Roman,serif" font-size="10.00">CTTCT</text>
+</g>
+<!-- 2,3->2,4 -->
+<g id="edge21" class="edge"><title>2,3->2,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-777.562C252.601,-777.258 262.531,-777.176 272.159,-777.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-780.822 282.443,-777.554 272.527,-773.823 272.364,-780.822"/>
+</g>
+<!-- 2,3->6,1 -->
+<g id="edge19" class="edge"><title>2,3->6,1</title>
+<path fill="none" stroke="#dd1e2f" d="M227.005,-754.971C236.061,-744.862 244,-736 244,-736 244,-736 274,-544 274,-544 274,-544 285.426,-527.421 297.461,-509.958"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="300.579,-511.603 303.371,-501.383 294.815,-507.63 300.579,-511.603"/>
+</g>
+<!-- 2,4->2,3 -->
+<g id="edge27" class="edge"><title>2,4->2,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-790.446C273.158,-790.745 263.225,-790.824 253.602,-790.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-787.175 243.327,-790.438 253.242,-794.173 253.407,-787.175"/>
+</g>
+<!-- 2,4->6,2 -->
+<g id="edge25" class="edge"><title>2,4->6,2</title>
+<path fill="none" stroke="#dd1e2f" d="M353.779,-756.914C365.347,-746.026 376,-736 376,-736 376,-736 415.795,-590.993 436.621,-515.107"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="440.035,-515.892 439.306,-505.323 433.284,-514.04 440.035,-515.892"/>
+</g>
+<!-- 5,1 -->
+<g id="node27" class="node"><title>5,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-688" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-699.167" font-family="Times Roman,serif" font-size="10.00">5,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-680 37,-694 82,-694 82,-680 37,-680"/>
+<text text-anchor="start" x="40" y="-684.667" font-family="Times Roman,serif" font-size="10.00">AATAGA</text>
+<polygon fill="#218559" stroke="#218559" points="37,-666 37,-680 82,-680 82,-666 37,-666"/>
+<text text-anchor="start" x="43" y="-670.667" font-family="Times Roman,serif" font-size="10.00">TCTATT</text>
+</g>
+<!-- 5,3 -->
+<g id="node28" class="node"><title>5,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-688" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-699.167" font-family="Times Roman,serif" font-size="10.00">5,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-680 179,-694 224,-694 224,-680 179,-680"/>
+<text text-anchor="start" x="185" y="-684.667" font-family="Times Roman,serif" font-size="10.00">TAGAA</text>
+<polygon fill="#218559" stroke="#218559" points="179,-666 179,-680 224,-680 224,-666 179,-666"/>
+<text text-anchor="start" x="187.5" y="-670.667" font-family="Times Roman,serif" font-size="10.00">TTCTA</text>
+</g>
+<!-- 5,1->5,3 -->
+<g id="edge93" class="edge"><title>5,1->5,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-681.755C116.207,-681.208 132.729,-681.105 148.049,-681.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-684.957 158.425,-681.756 148.534,-677.96 148.326,-684.957"/>
+</g>
+<!-- 5,3->5,1 -->
+<g id="edge99" class="edge"><title>5,3->5,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-694.244C143.825,-694.792 127.305,-694.895 111.982,-694.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-691.043 101.605,-694.245 111.497,-698.04 111.704,-691.043"/>
+</g>
+<!-- 5,4 -->
+<g id="node29" class="node"><title>5,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-688" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-699.167" font-family="Times Roman,serif" font-size="10.00">5,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-680 303,-694 348,-694 348,-680 303,-680"/>
+<text text-anchor="start" x="307.5" y="-684.667" font-family="Times Roman,serif" font-size="10.00">AGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-666 303,-680 348,-680 348,-666 303,-666"/>
+<text text-anchor="start" x="311" y="-670.667" font-family="Times Roman,serif" font-size="10.00">CTTCT</text>
+</g>
+<!-- 5,3->5,4 -->
+<g id="edge97" class="edge"><title>5,3->5,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-681.562C252.601,-681.258 262.531,-681.176 272.159,-681.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-684.822 282.443,-681.554 272.527,-677.823 272.364,-684.822"/>
+</g>
+<!-- 5,3->6,1 -->
+<g id="edge95" class="edge"><title>5,3->6,1</title>
+<path fill="none" stroke="#dd1e2f" d="M227.005,-658.971C236.061,-648.862 244,-640 244,-640 244,-640 262,-514 262,-514 262,-514 271.601,-507.295 283.307,-499.119"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="285.461,-501.884 291.655,-493.288 281.453,-496.145 285.461,-501.884"/>
+</g>
+<!-- 5,4->5,3 -->
+<g id="edge103" class="edge"><title>5,4->5,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-694.446C273.158,-694.745 263.225,-694.824 253.602,-694.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-691.175 243.327,-694.438 253.242,-698.173 253.407,-691.175"/>
+</g>
+<!-- 5,4->6,2 -->
+<g id="edge101" class="edge"><title>5,4->6,2</title>
+<path fill="none" stroke="#dd1e2f" d="M353.779,-660.914C365.347,-650.026 376,-640 376,-640 376,-640 408.743,-563.749 430.371,-513.382"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="433.673,-514.563 434.403,-503.993 427.241,-511.801 433.673,-514.563"/>
+</g>
+<!-- 4,1 -->
+<g id="node31" class="node"><title>4,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">4,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-253 37,-267 82,-267 82,-253 37,-253"/>
+<text text-anchor="start" x="40" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AATAGA</text>
+<polygon fill="#218559" stroke="#218559" points="37,-239 37,-253 82,-253 82,-239 37,-239"/>
+<text text-anchor="start" x="43" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TCTATT</text>
+</g>
+<!-- 4,3 -->
+<g id="node32" class="node"><title>4,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">4,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-253 179,-267 224,-267 224,-253 179,-253"/>
+<text text-anchor="start" x="185" y="-257.667" font-family="Times Roman,serif" font-size="10.00">TAGAA</text>
+<polygon fill="#218559" stroke="#218559" points="179,-239 179,-253 224,-253 224,-239 179,-239"/>
+<text text-anchor="start" x="187.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TTCTA</text>
+</g>
+<!-- 4,1->4,3 -->
+<g id="edge29" class="edge"><title>4,1->4,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-254.755C116.207,-254.208 132.729,-254.105 148.049,-254.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-257.957 158.425,-254.756 148.534,-250.96 148.326,-257.957"/>
+</g>
+<!-- 4,3->4,1 -->
+<g id="edge35" class="edge"><title>4,3->4,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-267.244C143.825,-267.792 127.305,-267.895 111.982,-267.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-264.043 101.605,-267.245 111.497,-271.04 111.704,-264.043"/>
+</g>
+<!-- 4,4 -->
+<g id="node33" class="node"><title>4,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">4,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-253 303,-267 348,-267 348,-253 303,-253"/>
+<text text-anchor="start" x="307.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-239 303,-253 348,-253 348,-239 303,-239"/>
+<text text-anchor="start" x="311" y="-243.667" font-family="Times Roman,serif" font-size="10.00">CTTCT</text>
+</g>
+<!-- 4,3->4,4 -->
+<g id="edge33" class="edge"><title>4,3->4,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-254.562C252.601,-254.258 262.531,-254.176 272.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-257.822 282.443,-254.554 272.527,-250.823 272.364,-257.822"/>
+</g>
+<!-- 4,3->6,1 -->
+<g id="edge31" class="edge"><title>4,3->6,1</title>
+<path fill="none" stroke="#dd1e2f" d="M238.738,-278.971C252.156,-285.36 264,-291 264,-291 264,-291 274,-412 274,-412 274,-412 282.58,-421.757 292.701,-433.268"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="290.268,-435.801 299.5,-441 295.525,-431.179 290.268,-435.801"/>
+</g>
+<!-- 4,4->4,3 -->
+<g id="edge39" class="edge"><title>4,4->4,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-267.446C273.158,-267.745 263.225,-267.824 253.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-264.175 243.327,-267.438 253.242,-271.173 253.407,-264.175"/>
+</g>
+<!-- 4,4->6,2 -->
+<g id="edge37" class="edge"><title>4,4->6,2</title>
+<path fill="none" stroke="#dd1e2f" d="M351.718,-289.414C368.888,-307.675 388,-328 388,-328 388,-328 412.394,-384.787 430.33,-426.538"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427.226,-428.181 434.389,-435.987 433.658,-425.418 427.226,-428.181"/>
+</g>
+<!-- 6,1->1,3 -->
+<g id="edge45" class="edge"><title>6,1->1,3</title>
+<path fill="none" stroke="#218559" d="M286.368,-453.534C274.262,-448.374 264,-444 264,-444 264,-444 262,-444 262,-444 262,-444 236.975,-504.716 218.982,-548.371"/>
+<polygon fill="#218559" stroke="#218559" points="215.723,-547.095 215.148,-557.674 222.195,-549.762 215.723,-547.095"/>
+</g>
+<!-- 6,1->3,3 -->
+<g id="edge47" class="edge"><title>6,1->3,3</title>
+<path fill="none" stroke="#218559" d="M299.5,-441C286.75,-426.5 274,-412 274,-412 274,-412 264,-394 264,-394 264,-394 257.057,-390.694 247.781,-386.277"/>
+<polygon fill="#218559" stroke="#218559" points="249.272,-383.11 238.738,-381.971 246.262,-389.43 249.272,-383.11"/>
+</g>
+<!-- 6,1->2,3 -->
+<g id="edge43" class="edge"><title>6,1->2,3</title>
+<path fill="none" stroke="#218559" d="M303.371,-501.383C289.471,-521.551 274,-544 274,-544 274,-544 264,-629 264,-629 264,-629 244,-736 244,-736 244,-736 239.718,-740.779 233.857,-747.322"/>
+<polygon fill="#218559" stroke="#218559" points="231.071,-745.187 227.005,-754.971 236.285,-749.858 231.071,-745.187"/>
+</g>
+<!-- 6,1->5,3 -->
+<g id="edge51" class="edge"><title>6,1->5,3</title>
+<path fill="none" stroke="#218559" d="M293.131,-494.787C277.505,-506.941 262,-519 262,-519 262,-519 244,-640 244,-640 244,-640 239.718,-644.779 233.857,-651.322"/>
+<polygon fill="#218559" stroke="#218559" points="231.071,-649.187 227.005,-658.971 236.285,-653.858 231.071,-649.187"/>
+</g>
+<!-- 6,1->4,3 -->
+<g id="edge49" class="edge"><title>6,1->4,3</title>
+<path fill="none" stroke="#218559" d="M299.5,-441C286.75,-426.5 274,-412 274,-412 274,-412 264,-316 264,-316 264,-316 252.076,-305.59 238.602,-293.828"/>
+<polygon fill="#218559" stroke="#218559" points="240.859,-291.151 231.024,-287.211 236.255,-296.425 240.859,-291.151"/>
+</g>
+<!-- 6,1->6,2 -->
+<g id="edge41" class="edge"><title>6,1->6,2</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-463.562C376.601,-463.258 386.531,-463.176 396.159,-463.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-466.822 406.443,-463.554 396.527,-459.823 396.364,-466.822"/>
+</g>
+<!-- 6,2->1,4 -->
+<g id="edge59" class="edge"><title>6,2->1,4</title>
+<path fill="none" stroke="#218559" d="M423.484,-499.281C406.768,-518.463 388,-540 388,-540 388,-540 376.647,-549.37 363.581,-560.156"/>
+<polygon fill="#218559" stroke="#218559" points="361.246,-557.544 355.762,-566.609 365.702,-562.943 361.246,-557.544"/>
+</g>
+<!-- 6,2->3,4 -->
+<g id="edge57" class="edge"><title>6,2->3,4</title>
+<path fill="none" stroke="#218559" d="M418.55,-443.97C401.867,-429.709 381.029,-411.896 363.258,-396.704"/>
+<polygon fill="#218559" stroke="#218559" points="365.209,-393.768 355.334,-389.931 360.661,-399.089 365.209,-393.768"/>
+</g>
+<!-- 6,2->2,4 -->
+<g id="edge55" class="edge"><title>6,2->2,4</title>
+<path fill="none" stroke="#218559" d="M440.065,-505.448C423.121,-572.666 388,-712 388,-712 388,-712 376,-736 376,-736 376,-736 369.498,-742.119 361.137,-749.988"/>
+<polygon fill="#218559" stroke="#218559" points="358.662,-747.511 353.779,-756.914 363.46,-752.609 358.662,-747.511"/>
+</g>
+<!-- 6,2->5,4 -->
+<g id="edge63" class="edge"><title>6,2->5,4</title>
+<path fill="none" stroke="#218559" d="M436.802,-504.593C419.001,-555.079 388,-643 388,-643 388,-643 378.242,-649.97 366.407,-658.423"/>
+<polygon fill="#218559" stroke="#218559" points="364.079,-655.785 357.976,-664.446 368.148,-661.481 364.079,-655.785"/>
+</g>
+<!-- 6,2->4,4 -->
+<g id="edge61" class="edge"><title>6,2->4,4</title>
+<path fill="none" stroke="#218559" d="M432.98,-436.204C411.491,-390.872 376,-316 376,-316 376,-316 367.973,-307.343 358.288,-296.899"/>
+<polygon fill="#218559" stroke="#218559" points="360.762,-294.42 351.396,-289.467 355.629,-299.179 360.762,-294.42"/>
+</g>
+<!-- 6,2->6,1 -->
+<g id="edge65" class="edge"><title>6,2->6,1</title>
+<path fill="none" stroke="#218559" d="M406.443,-476.446C397.158,-476.745 387.225,-476.824 377.602,-476.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-473.175 367.327,-476.438 377.242,-480.173 377.407,-473.175"/>
+</g>
+<!-- 6,3 -->
+<g id="node37" class="node"><title>6,3</title>
+<ellipse fill="none" stroke="black" cx="576" cy="-470" rx="44.7575" ry="36.0624"/>
+<text text-anchor="start" x="556" y="-481.167" font-family="Times Roman,serif" font-size="10.00">6,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="552,-462 552,-476 600,-476 600,-462 552,-462"/>
+<text text-anchor="start" x="554.5" y="-466.667" font-family="Times Roman,serif" font-size="10.00">AAGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="552,-448 552,-462 600,-462 600,-448 552,-448"/>
+<text text-anchor="start" x="559" y="-452.667" font-family="Times Roman,serif" font-size="10.00">CTTCTT</text>
+</g>
+<!-- 6,2->6,3 -->
+<g id="edge53" class="edge"><title>6,2->6,3</title>
+<path fill="none" stroke="#dd1e2f" d="M491.642,-463.585C501.055,-463.272 511.156,-463.178 520.981,-463.304"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="521.009,-466.805 531.079,-463.514 521.154,-459.807 521.009,-466.805"/>
+</g>
+<!-- 6,3->6,2 -->
+<g id="edge67" class="edge"><title>6,3->6,2</title>
+<path fill="none" stroke="#218559" d="M531.079,-476.486C521.562,-476.764 511.435,-476.822 501.66,-476.662"/>
+<polygon fill="#218559" stroke="#218559" points="501.726,-473.162 491.642,-476.415 501.553,-480.16 501.726,-473.162"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_length1_bubbles/.part-0.crc b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_length1_bubbles/.part-0.crc
new file mode 100644
index 0000000..53d1b45
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_length1_bubbles/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_length1_bubbles/.part-1.crc b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_length1_bubbles/.part-1.crc
new file mode 100644
index 0000000..e76bd36
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_length1_bubbles/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_length1_bubbles/part-0 b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_length1_bubbles/part-0
new file mode 100755
index 0000000..a9f8b84
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_length1_bubbles/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_length1_bubbles/part-1 b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_length1_bubbles/part-1
new file mode 100755
index 0000000..f1c5899
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/five_length1_bubbles/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble.txt b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble.txt
new file mode 100644
index 0000000..51cc418
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble.txt
@@ -0,0 +1,6 @@
+((2,1) [(2,3)] [] [] [] GGAATA) (null)
+((2,3) [(2,4)] [(1,4)] [] [(2,1)] AATAC) (null)
+((2,4) [] [(1,3)] [] [(2,3)] ATACG) (null)
+((1,1) [(1,3)] [] [] [] AAACGT) (null)
+((1,3) [(1,4)] [(2,4)] [] [(1,1)] ACGTA) (null)
+((1,4) [] [(2,3)] [] [(1,3)] CGTAT) (null)
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble.txt.svg b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble.txt.svg
new file mode 100644
index 0000000..cb1b291
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble.txt.svg
@@ -0,0 +1,193 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: fr_bubble_txt Pages: 1 -->
+<svg width="392pt" height="428pt"
+ viewBox="0.00 0.00 392.00 428.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 424)">
+<title>fr_bubble_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-424 389,-424 389,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-8 49,-209 211,-209 211,-8 49,-8"/>
+<text text-anchor="middle" x="130" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-217 8,-305 376,-305 376,-217 8,-217"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="8,-324 8,-412 376,-412 376,-324 8,-324"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.0074,-157C74.8673,-157 156.744,-157 188.46,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="188.862,-160.5 198.862,-157 188.861,-153.5 188.862,-160.5"/>
+<text text-anchor="middle" x="130" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.0074,-116C74.8673,-116 156.744,-116 188.46,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="188.862,-119.5 198.862,-116 188.861,-112.5 188.862,-119.5"/>
+<text text-anchor="middle" x="130" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.0074,-75C74.8673,-75 156.744,-75 188.46,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="188.862,-78.5001 198.862,-75 188.861,-71.5001 188.862,-78.5001"/>
+<text text-anchor="middle" x="130" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.0074,-34C74.8673,-34 156.744,-34 188.46,-34"/>
+<polygon fill="#218559" stroke="#218559" points="188.862,-37.5001 198.862,-34 188.861,-30.5001 188.862,-37.5001"/>
+<text text-anchor="middle" x="130" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-253 37,-267 82,-267 82,-253 37,-253"/>
+<text text-anchor="start" x="39.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AAACGT</text>
+<polygon fill="#218559" stroke="#218559" points="37,-239 37,-253 82,-253 82,-239 37,-239"/>
+<text text-anchor="start" x="41.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">ACGTTT</text>
+</g>
+<!-- 1,3 -->
+<g id="node16" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-253 179,-267 224,-267 224,-253 179,-253"/>
+<text text-anchor="start" x="185.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">ACGTA</text>
+<polygon fill="#218559" stroke="#218559" points="179,-239 179,-253 224,-253 224,-239 179,-239"/>
+<text text-anchor="start" x="186" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TACGT</text>
+</g>
+<!-- 1,1->1,3 -->
+<g id="edge25" class="edge"><title>1,1->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-254.755C116.207,-254.208 132.729,-254.105 148.049,-254.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-257.957 158.425,-254.756 148.534,-250.96 148.326,-257.957"/>
+</g>
+<!-- 1,3->1,1 -->
+<g id="edge31" class="edge"><title>1,3->1,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-267.244C143.825,-267.792 127.305,-267.895 111.982,-267.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-264.043 101.605,-267.245 111.497,-271.04 111.704,-264.043"/>
+</g>
+<!-- 1,4 -->
+<g id="node17" class="node"><title>1,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-253 303,-267 348,-267 348,-253 303,-253"/>
+<text text-anchor="start" x="310.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">CGTAT</text>
+<polygon fill="#218559" stroke="#218559" points="303,-239 303,-253 348,-253 348,-239 303,-239"/>
+<text text-anchor="start" x="309.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">ATACG</text>
+</g>
+<!-- 1,3->1,4 -->
+<g id="edge27" class="edge"><title>1,3->1,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-254.562C252.601,-254.258 262.531,-254.176 272.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-257.822 282.443,-254.554 272.527,-250.823 272.364,-257.822"/>
+</g>
+<!-- 2,4 -->
+<g id="node21" class="node"><title>2,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-368" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-379.167" font-family="Times Roman,serif" font-size="10.00">2,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-360 303,-374 348,-374 348,-360 303,-360"/>
+<text text-anchor="start" x="309.5" y="-364.667" font-family="Times Roman,serif" font-size="10.00">ATACG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-346 303,-360 348,-360 348,-346 303,-346"/>
+<text text-anchor="start" x="310.5" y="-350.667" font-family="Times Roman,serif" font-size="10.00">CGTAT</text>
+</g>
+<!-- 1,3->2,4 -->
+<g id="edge29" class="edge"><title>1,3->2,4</title>
+<path fill="none" stroke="#ebb035" d="M231.017,-286.902C247.799,-301.383 268.887,-319.58 286.832,-335.065"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="284.972,-338.083 294.83,-341.966 289.545,-332.783 284.972,-338.083"/>
+</g>
+<!-- 1,4->1,3 -->
+<g id="edge35" class="edge"><title>1,4->1,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-267.446C273.158,-267.745 263.225,-267.824 253.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-264.175 243.327,-267.438 253.242,-271.173 253.407,-264.175"/>
+</g>
+<!-- 2,3 -->
+<g id="node20" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-368" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-379.167" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-360 179,-374 224,-374 224,-360 179,-360"/>
+<text text-anchor="start" x="186" y="-364.667" font-family="Times Roman,serif" font-size="10.00">AATAC</text>
+<polygon fill="#218559" stroke="#218559" points="179,-346 179,-360 224,-360 224,-346 179,-346"/>
+<text text-anchor="start" x="187.5" y="-350.667" font-family="Times Roman,serif" font-size="10.00">GTATT</text>
+</g>
+<!-- 1,4->2,3 -->
+<g id="edge33" class="edge"><title>1,4->2,3</title>
+<path fill="none" stroke="#ebb035" d="M299.131,-289.838C282.505,-308.372 264,-329 264,-329 264,-329 255.356,-334.351 244.471,-341.089"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="242.477,-338.207 235.817,-346.447 246.162,-344.159 242.477,-338.207"/>
+</g>
+<!-- 2,1 -->
+<g id="node19" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-368" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-379.167" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-360 37,-374 82,-374 82,-360 37,-360"/>
+<text text-anchor="start" x="39.5" y="-364.667" font-family="Times Roman,serif" font-size="10.00">GGAATA</text>
+<polygon fill="#218559" stroke="#218559" points="37,-346 37,-360 82,-360 82,-346 37,-346"/>
+<text text-anchor="start" x="42.5" y="-350.667" font-family="Times Roman,serif" font-size="10.00">TATTCC</text>
+</g>
+<!-- 2,1->2,3 -->
+<g id="edge13" class="edge"><title>2,1->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-361.755C116.207,-361.208 132.729,-361.105 148.049,-361.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-364.957 158.425,-361.756 148.534,-357.96 148.326,-364.957"/>
+</g>
+<!-- 2,3->1,4 -->
+<g id="edge17" class="edge"><title>2,3->1,4</title>
+<path fill="none" stroke="#ebb035" d="M231.017,-342.098C247.799,-327.617 268.887,-309.42 286.832,-293.935"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="289.545,-296.217 294.83,-287.034 284.972,-290.917 289.545,-296.217"/>
+</g>
+<!-- 2,3->2,1 -->
+<g id="edge19" class="edge"><title>2,3->2,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-374.244C143.825,-374.792 127.305,-374.895 111.982,-374.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-371.043 101.605,-374.245 111.497,-378.04 111.704,-371.043"/>
+</g>
+<!-- 2,3->2,4 -->
+<g id="edge15" class="edge"><title>2,3->2,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-361.562C252.601,-361.258 262.531,-361.176 272.159,-361.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-364.822 282.443,-361.554 272.527,-357.823 272.364,-364.822"/>
+</g>
+<!-- 2,4->1,3 -->
+<g id="edge21" class="edge"><title>2,4->1,3</title>
+<path fill="none" stroke="#ebb035" d="M294.83,-341.966C278.021,-327.462 256.93,-309.262 239.004,-293.794"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="240.875,-290.785 231.017,-286.902 236.301,-296.085 240.875,-290.785"/>
+</g>
+<!-- 2,4->2,3 -->
+<g id="edge23" class="edge"><title>2,4->2,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-374.446C273.158,-374.745 263.225,-374.824 253.602,-374.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-371.175 243.327,-374.438 253.242,-378.173 253.407,-371.175"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble/.part-0.crc b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble/.part-0.crc
new file mode 100644
index 0000000..d40cb4d
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble/.part-1.crc b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble/.part-1.crc
new file mode 100644
index 0000000..bc50c79
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble/part-0 b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble/part-0
new file mode 100755
index 0000000..f757027
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble/part-1 b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble/part-1
new file mode 100755
index 0000000..96978fb
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble_and_ff_bubble.txt b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble_and_ff_bubble.txt
new file mode 100644
index 0000000..b53f352
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble_and_ff_bubble.txt
@@ -0,0 +1,9 @@
+((2,1) [(2,2)] [] [] [(1,3)] CGTAT) (null)
+((2,2) [(2,3)] [] [] [(1,4),(2,1)] GTATT) (null)
+((2,3) [] [] [] [(2,2)] TATTCC) (null)
+((1,1) [(1,3)] [] [] [] AAACGT) (null)
+((1,3) [(2,1),(1,4)] [(3,4)] [] [(1,1)] ACGTA) (null)
+((1,4) [(2,2)] [(3,3)] [] [(1,3)] CGTAT) (null)
+((3,1) [(3,3)] [] [] [] GGAATA) (null)
+((3,3) [(3,4)] [(1,4)] [] [(3,1)] AATAC) (null)
+((3,4) [] [(1,3)] [] [(3,3)] ATACG) (null)
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble_and_ff_bubble.txt.svg b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble_and_ff_bubble.txt.svg
new file mode 100644
index 0000000..aaaab66
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble_and_ff_bubble.txt.svg
@@ -0,0 +1,263 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: fr_bubble_and_ff_bubble_txt Pages: 1 -->
+<svg width="640pt" height="428pt"
+ viewBox="0.00 0.00 640.00 428.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 424)">
+<title>fr_bubble_and_ff_bubble_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-424 637,-424 637,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-8 49,-209 211,-209 211,-8 49,-8"/>
+<text text-anchor="middle" x="130" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-217 8,-305 376,-305 376,-217 8,-217"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_3</title>
+<polygon fill="none" stroke="black" points="8,-324 8,-412 376,-412 376,-324 8,-324"/>
+</g>
+<g id="graph5" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="274,-121 274,-209 624,-209 624,-121 274,-121"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.0074,-157C74.8673,-157 156.744,-157 188.46,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="188.862,-160.5 198.862,-157 188.861,-153.5 188.862,-160.5"/>
+<text text-anchor="middle" x="130" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.0074,-116C74.8673,-116 156.744,-116 188.46,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="188.862,-119.5 198.862,-116 188.861,-112.5 188.862,-119.5"/>
+<text text-anchor="middle" x="130" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.0074,-75C74.8673,-75 156.744,-75 188.46,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="188.862,-78.5001 198.862,-75 188.861,-71.5001 188.862,-78.5001"/>
+<text text-anchor="middle" x="130" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.0074,-34C74.8673,-34 156.744,-34 188.46,-34"/>
+<polygon fill="#218559" stroke="#218559" points="188.862,-37.5001 198.862,-34 188.861,-30.5001 188.862,-37.5001"/>
+<text text-anchor="middle" x="130" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-253 37,-267 82,-267 82,-253 37,-253"/>
+<text text-anchor="start" x="39.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AAACGT</text>
+<polygon fill="#218559" stroke="#218559" points="37,-239 37,-253 82,-253 82,-239 37,-239"/>
+<text text-anchor="start" x="41.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">ACGTTT</text>
+</g>
+<!-- 1,3 -->
+<g id="node16" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-253 179,-267 224,-267 224,-253 179,-253"/>
+<text text-anchor="start" x="185.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">ACGTA</text>
+<polygon fill="#218559" stroke="#218559" points="179,-239 179,-253 224,-253 224,-239 179,-239"/>
+<text text-anchor="start" x="186" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TACGT</text>
+</g>
+<!-- 1,1->1,3 -->
+<g id="edge26" class="edge"><title>1,1->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-254.755C116.207,-254.208 132.729,-254.105 148.049,-254.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-257.957 158.425,-254.756 148.534,-250.96 148.326,-257.957"/>
+</g>
+<!-- 1,3->1,1 -->
+<g id="edge34" class="edge"><title>1,3->1,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-267.244C143.825,-267.792 127.305,-267.895 111.982,-267.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-264.043 101.605,-267.245 111.497,-271.04 111.704,-264.043"/>
+</g>
+<!-- 1,4 -->
+<g id="node17" class="node"><title>1,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-253 303,-267 348,-267 348,-253 303,-253"/>
+<text text-anchor="start" x="310.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">CGTAT</text>
+<polygon fill="#218559" stroke="#218559" points="303,-239 303,-253 348,-253 348,-239 303,-239"/>
+<text text-anchor="start" x="309.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">ATACG</text>
+</g>
+<!-- 1,3->1,4 -->
+<g id="edge30" class="edge"><title>1,3->1,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-254.562C252.601,-254.258 262.531,-254.176 272.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-257.822 282.443,-254.554 272.527,-250.823 272.364,-257.822"/>
+</g>
+<!-- 3,4 -->
+<g id="node21" class="node"><title>3,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-368" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-379.167" font-family="Times Roman,serif" font-size="10.00">3,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-360 303,-374 348,-374 348,-360 303,-360"/>
+<text text-anchor="start" x="309.5" y="-364.667" font-family="Times Roman,serif" font-size="10.00">ATACG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-346 303,-360 348,-360 348,-346 303,-346"/>
+<text text-anchor="start" x="310.5" y="-350.667" font-family="Times Roman,serif" font-size="10.00">CGTAT</text>
+</g>
+<!-- 1,3->3,4 -->
+<g id="edge32" class="edge"><title>1,3->3,4</title>
+<path fill="none" stroke="#ebb035" d="M231.017,-286.902C247.799,-301.383 268.887,-319.58 286.832,-335.065"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="284.972,-338.083 294.83,-341.966 289.545,-332.783 284.972,-338.083"/>
+</g>
+<!-- 2,1 -->
+<g id="node23" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-165" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-157 303,-171 348,-171 348,-157 303,-157"/>
+<text text-anchor="start" x="310.5" y="-161.667" font-family="Times Roman,serif" font-size="10.00">CGTAT</text>
+<polygon fill="#218559" stroke="#218559" points="303,-143 303,-157 348,-157 348,-143 303,-143"/>
+<text text-anchor="start" x="309.5" y="-147.667" font-family="Times Roman,serif" font-size="10.00">ATACG</text>
+</g>
+<!-- 1,3->2,1 -->
+<g id="edge28" class="edge"><title>1,3->2,1</title>
+<path fill="none" stroke="#dd1e2f" d="M234.345,-237.712C249.456,-227.157 264,-217 264,-217 264,-217 274.622,-207.945 287.002,-197.392"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="289.518,-199.846 294.857,-190.695 284.977,-194.519 289.518,-199.846"/>
+</g>
+<!-- 1,4->1,3 -->
+<g id="edge40" class="edge"><title>1,4->1,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-267.446C273.158,-267.745 263.225,-267.824 253.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-264.175 243.327,-267.438 253.242,-271.173 253.407,-264.175"/>
+</g>
+<!-- 3,3 -->
+<g id="node20" class="node"><title>3,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-368" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-379.167" font-family="Times Roman,serif" font-size="10.00">3,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-360 179,-374 224,-374 224,-360 179,-360"/>
+<text text-anchor="start" x="186" y="-364.667" font-family="Times Roman,serif" font-size="10.00">AATAC</text>
+<polygon fill="#218559" stroke="#218559" points="179,-346 179,-360 224,-360 224,-346 179,-346"/>
+<text text-anchor="start" x="187.5" y="-350.667" font-family="Times Roman,serif" font-size="10.00">GTATT</text>
+</g>
+<!-- 1,4->3,3 -->
+<g id="edge38" class="edge"><title>1,4->3,3</title>
+<path fill="none" stroke="#ebb035" d="M294.83,-287.034C278.021,-301.538 256.93,-319.738 239.004,-335.206"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="236.301,-332.915 231.017,-342.098 240.875,-338.215 236.301,-332.915"/>
+</g>
+<!-- 2,2 -->
+<g id="node24" class="node"><title>2,2</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-165" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">2,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-157 427,-171 472,-171 472,-157 427,-157"/>
+<text text-anchor="start" x="435.5" y="-161.667" font-family="Times Roman,serif" font-size="10.00">GTATT</text>
+<polygon fill="#218559" stroke="#218559" points="427,-143 427,-157 472,-157 472,-143 427,-143"/>
+<text text-anchor="start" x="434" y="-147.667" font-family="Times Roman,serif" font-size="10.00">AATAC</text>
+</g>
+<!-- 1,4->2,2 -->
+<g id="edge36" class="edge"><title>1,4->2,2</title>
+<path fill="none" stroke="#dd1e2f" d="M356.611,-236.527C372.521,-224.21 391.976,-209.147 408.924,-196.027"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="411.251,-198.652 417.015,-189.762 406.965,-193.117 411.251,-198.652"/>
+</g>
+<!-- 3,1 -->
+<g id="node19" class="node"><title>3,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-368" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-379.167" font-family="Times Roman,serif" font-size="10.00">3,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-360 37,-374 82,-374 82,-360 37,-360"/>
+<text text-anchor="start" x="39.5" y="-364.667" font-family="Times Roman,serif" font-size="10.00">GGAATA</text>
+<polygon fill="#218559" stroke="#218559" points="37,-346 37,-360 82,-360 82,-346 37,-346"/>
+<text text-anchor="start" x="42.5" y="-350.667" font-family="Times Roman,serif" font-size="10.00">TATTCC</text>
+</g>
+<!-- 3,1->3,3 -->
+<g id="edge42" class="edge"><title>3,1->3,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-361.755C116.207,-361.208 132.729,-361.105 148.049,-361.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-364.957 158.425,-361.756 148.534,-357.96 148.326,-364.957"/>
+</g>
+<!-- 3,3->1,4 -->
+<g id="edge46" class="edge"><title>3,3->1,4</title>
+<path fill="none" stroke="#ebb035" d="M229.714,-341.169C245.535,-326.385 262,-311 262,-311 262,-311 272.801,-302.428 285.459,-292.382"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="287.843,-294.958 293.5,-286 283.491,-289.475 287.843,-294.958"/>
+</g>
+<!-- 3,3->3,1 -->
+<g id="edge48" class="edge"><title>3,3->3,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-374.244C143.825,-374.792 127.305,-374.895 111.982,-374.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-371.043 101.605,-374.245 111.497,-378.04 111.704,-371.043"/>
+</g>
+<!-- 3,3->3,4 -->
+<g id="edge44" class="edge"><title>3,3->3,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-361.562C252.601,-361.258 262.531,-361.176 272.159,-361.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-364.822 282.443,-361.554 272.527,-357.823 272.364,-364.822"/>
+</g>
+<!-- 3,4->1,3 -->
+<g id="edge50" class="edge"><title>3,4->1,3</title>
+<path fill="none" stroke="#ebb035" d="M294.83,-341.966C278.021,-327.462 256.93,-309.262 239.004,-293.794"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="240.875,-290.785 231.017,-286.902 236.301,-296.085 240.875,-290.785"/>
+</g>
+<!-- 3,4->3,3 -->
+<g id="edge52" class="edge"><title>3,4->3,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-374.446C273.158,-374.745 263.225,-374.824 253.602,-374.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-371.175 243.327,-374.438 253.242,-378.173 253.407,-371.175"/>
+</g>
+<!-- 2,1->1,3 -->
+<g id="edge16" class="edge"><title>2,1->1,3</title>
+<path fill="none" stroke="#218559" d="M293.015,-189.762C277.051,-202.122 257.587,-217.19 240.68,-230.28"/>
+<polygon fill="#218559" stroke="#218559" points="238.376,-227.637 232.611,-236.527 242.661,-233.173 238.376,-227.637"/>
+</g>
+<!-- 2,1->2,2 -->
+<g id="edge14" class="edge"><title>2,1->2,2</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-158.562C376.601,-158.258 386.531,-158.176 396.159,-158.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-161.822 406.443,-158.554 396.527,-154.823 396.364,-161.822"/>
+</g>
+<!-- 2,2->1,4 -->
+<g id="edge20" class="edge"><title>2,2->1,4</title>
+<path fill="none" stroke="#218559" d="M420.286,-191.831C404.465,-206.615 388,-222 388,-222 388,-222 379.356,-227.351 368.471,-234.089"/>
+<polygon fill="#218559" stroke="#218559" points="366.477,-231.207 359.817,-239.447 370.162,-237.159 366.477,-231.207"/>
+</g>
+<!-- 2,2->2,1 -->
+<g id="edge22" class="edge"><title>2,2->2,1</title>
+<path fill="none" stroke="#218559" d="M406.443,-171.446C397.158,-171.745 387.225,-171.824 377.602,-171.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-168.175 367.327,-171.438 377.242,-175.173 377.407,-168.175"/>
+</g>
+<!-- 2,3 -->
+<g id="node25" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="573" cy="-165" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="553.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="551,-157 551,-171 596,-171 596,-157 551,-157"/>
+<text text-anchor="start" x="556.5" y="-161.667" font-family="Times Roman,serif" font-size="10.00">TATTCC</text>
+<polygon fill="#218559" stroke="#218559" points="551,-143 551,-157 596,-157 596,-143 551,-143"/>
+<text text-anchor="start" x="553.5" y="-147.667" font-family="Times Roman,serif" font-size="10.00">GGAATA</text>
+</g>
+<!-- 2,2->2,3 -->
+<g id="edge18" class="edge"><title>2,2->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M491.327,-158.562C500.601,-158.258 510.531,-158.176 520.159,-158.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="520.364,-161.822 530.443,-158.554 520.527,-154.823 520.364,-161.822"/>
+</g>
+<!-- 2,3->2,2 -->
+<g id="edge24" class="edge"><title>2,3->2,2</title>
+<path fill="none" stroke="#218559" d="M530.443,-171.446C521.158,-171.745 511.225,-171.824 501.602,-171.681"/>
+<polygon fill="#218559" stroke="#218559" points="501.407,-168.175 491.327,-171.438 501.242,-175.173 501.407,-168.175"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/.part-0.crc b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/.part-0.crc
new file mode 100644
index 0000000..528de91
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/.part-1.crc b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/.part-1.crc
new file mode 100644
index 0000000..247ccc7
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/part-0 b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/part-0
new file mode 100755
index 0000000..795d0c7
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/part-1 b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/part-1
new file mode 100755
index 0000000..8a0aa72
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/fr_bubble_and_ff_bubble/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/rf_bubble.txt b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/rf_bubble.txt
new file mode 100644
index 0000000..8b0bfdc
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/rf_bubble.txt
@@ -0,0 +1,6 @@
+((2,1) [(2,2)] [] [(1,2)] [] ACCGT) (null)
+((2,2) [(2,3)] [] [(1,1)] [(2,1)] CCGTG) (null)
+((2,3) [] [] [] [(2,2)] CGTGGT) (null)
+((1,1) [(1,2)] [] [(2,2)] [] ACGGT) (null)
+((1,2) [(1,3)] [] [(2,1)] [(1,1)] CGGTG) (null)
+((1,3) [] [] [] [(1,2)] GGTGTA) (null)
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/rf_bubble.txt.svg b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/rf_bubble.txt.svg
new file mode 100644
index 0000000..80a9d65
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/rf_bubble.txt.svg
@@ -0,0 +1,193 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: rf_bubble_txt Pages: 1 -->
+<svg width="394pt" height="428pt"
+ viewBox="0.00 0.00 394.00 428.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 424)">
+<title>rf_bubble_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-424 391,-424 391,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-8 49,-209 211,-209 211,-8 49,-8"/>
+<text text-anchor="middle" x="130" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-324 8,-412 377,-412 377,-324 8,-324"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="8,-217 8,-305 378,-305 378,-217 8,-217"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.0074,-157C74.8673,-157 156.744,-157 188.46,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="188.862,-160.5 198.862,-157 188.861,-153.5 188.862,-160.5"/>
+<text text-anchor="middle" x="130" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.0074,-116C74.8673,-116 156.744,-116 188.46,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="188.862,-119.5 198.862,-116 188.861,-112.5 188.862,-119.5"/>
+<text text-anchor="middle" x="130" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.0074,-75C74.8673,-75 156.744,-75 188.46,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="188.862,-78.5001 198.862,-75 188.861,-71.5001 188.862,-78.5001"/>
+<text text-anchor="middle" x="130" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.0074,-34C74.8673,-34 156.744,-34 188.46,-34"/>
+<polygon fill="#218559" stroke="#218559" points="188.862,-37.5001 198.862,-34 188.861,-30.5001 188.862,-37.5001"/>
+<text text-anchor="middle" x="130" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-368" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-379.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-360 37,-374 82,-374 82,-360 37,-360"/>
+<text text-anchor="start" x="42.5" y="-364.667" font-family="Times Roman,serif" font-size="10.00">ACGGT</text>
+<polygon fill="#218559" stroke="#218559" points="37,-346 37,-360 82,-360 82,-346 37,-346"/>
+<text text-anchor="start" x="43" y="-350.667" font-family="Times Roman,serif" font-size="10.00">ACCGT</text>
+</g>
+<!-- 1,2 -->
+<g id="node16" class="node"><title>1,2</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-368" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-379.167" font-family="Times Roman,serif" font-size="10.00">1,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-360 179,-374 224,-374 224,-360 179,-360"/>
+<text text-anchor="start" x="184" y="-364.667" font-family="Times Roman,serif" font-size="10.00">CGGTG</text>
+<polygon fill="#218559" stroke="#218559" points="179,-346 179,-360 224,-360 224,-346 179,-346"/>
+<text text-anchor="start" x="184" y="-350.667" font-family="Times Roman,serif" font-size="10.00">CACCG</text>
+</g>
+<!-- 1,1->1,2 -->
+<g id="edge25" class="edge"><title>1,1->1,2</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-361.755C116.207,-361.208 132.729,-361.105 148.049,-361.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-364.957 158.425,-361.756 148.534,-357.96 148.326,-364.957"/>
+</g>
+<!-- 2,2 -->
+<g id="node20" class="node"><title>2,2</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-253 179,-267 224,-267 224,-253 179,-253"/>
+<text text-anchor="start" x="184.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">CCGTG</text>
+<polygon fill="#218559" stroke="#218559" points="179,-239 179,-253 224,-253 224,-239 179,-239"/>
+<text text-anchor="start" x="183.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">CACGG</text>
+</g>
+<!-- 1,1->2,2 -->
+<g id="edge27" class="edge"><title>1,1->2,2</title>
+<path fill="none" stroke="#06a2cb" d="M96.6544,-349.87C117.807,-339.685 140,-329 140,-329 140,-329 153.666,-313.765 168.062,-297.718"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="171.058,-299.619 175.131,-289.838 165.848,-294.945 171.058,-299.619"/>
+</g>
+<!-- 1,2->1,1 -->
+<g id="edge33" class="edge"><title>1,2->1,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-374.244C143.825,-374.792 127.305,-374.895 111.982,-374.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-371.043 101.605,-374.245 111.497,-378.04 111.704,-371.043"/>
+</g>
+<!-- 1,3 -->
+<g id="node17" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="326" cy="-368" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="306.5" y="-379.167" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="304,-360 304,-374 349,-374 349,-360 304,-360"/>
+<text text-anchor="start" x="307" y="-364.667" font-family="Times Roman,serif" font-size="10.00">GGTGTA</text>
+<polygon fill="#218559" stroke="#218559" points="304,-346 304,-360 349,-360 349,-346 304,-346"/>
+<text text-anchor="start" x="307.5" y="-350.667" font-family="Times Roman,serif" font-size="10.00">TACACC</text>
+</g>
+<!-- 1,2->1,3 -->
+<g id="edge29" class="edge"><title>1,2->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M243.319,-361.573C252.886,-361.256 263.16,-361.173 273.102,-361.321"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="273.22,-364.825 283.299,-361.561 273.384,-357.827 273.22,-364.825"/>
+</g>
+<!-- 2,1 -->
+<g id="node19" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-253 37,-267 82,-267 82,-253 37,-253"/>
+<text text-anchor="start" x="43" y="-257.667" font-family="Times Roman,serif" font-size="10.00">ACCGT</text>
+<polygon fill="#218559" stroke="#218559" points="37,-239 37,-253 82,-253 82,-239 37,-239"/>
+<text text-anchor="start" x="42.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">ACGGT</text>
+</g>
+<!-- 1,2->2,1 -->
+<g id="edge31" class="edge"><title>1,2->2,1</title>
+<path fill="none" stroke="#06a2cb" d="M167.587,-345.312C145.274,-330.161 120,-313 120,-313 120,-313 109.378,-303.945 96.9983,-293.392"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="99.0233,-290.519 89.1426,-286.695 94.4822,-295.846 99.0233,-290.519"/>
+</g>
+<!-- 1,3->1,2 -->
+<g id="edge35" class="edge"><title>1,3->1,2</title>
+<path fill="none" stroke="#218559" d="M283.299,-374.439C273.713,-374.75 263.434,-374.828 253.502,-374.673"/>
+<polygon fill="#218559" stroke="#218559" points="253.401,-371.169 243.319,-374.427 253.232,-378.167 253.401,-371.169"/>
+</g>
+<!-- 2,1->1,2 -->
+<g id="edge15" class="edge"><title>2,1->1,2</title>
+<path fill="none" stroke="#06a2cb" d="M92.4134,-283.688C114.726,-298.839 140,-316 140,-316 140,-316 150.622,-325.055 163.002,-335.608"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="160.977,-338.481 170.857,-342.305 165.518,-333.154 160.977,-338.481"/>
+</g>
+<!-- 2,1->2,2 -->
+<g id="edge13" class="edge"><title>2,1->2,2</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-254.755C116.207,-254.208 132.729,-254.105 148.049,-254.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-257.957 158.425,-254.756 148.534,-250.96 148.326,-257.957"/>
+</g>
+<!-- 2,2->1,1 -->
+<g id="edge19" class="edge"><title>2,2->1,1</title>
+<path fill="none" stroke="#06a2cb" d="M168.906,-285.184C148.325,-300.692 121.435,-320.954 99.4042,-337.555"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="97.1022,-334.907 91.222,-343.72 101.315,-340.497 97.1022,-334.907"/>
+</g>
+<!-- 2,2->2,1 -->
+<g id="edge21" class="edge"><title>2,2->2,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-267.244C143.825,-267.792 127.305,-267.895 111.982,-267.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-264.043 101.605,-267.245 111.497,-271.04 111.704,-264.043"/>
+</g>
+<!-- 2,3 -->
+<g id="node21" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="326" cy="-261" rx="43.8406" ry="36.0624"/>
+<text text-anchor="start" x="306" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-253 303,-267 349,-267 349,-253 303,-253"/>
+<text text-anchor="start" x="306" y="-257.667" font-family="Times Roman,serif" font-size="10.00">CGTGGT</text>
+<polygon fill="#218559" stroke="#218559" points="303,-239 303,-253 349,-253 349,-239 303,-239"/>
+<text text-anchor="start" x="305.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">ACCACG</text>
+</g>
+<!-- 2,2->2,3 -->
+<g id="edge17" class="edge"><title>2,2->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M243.319,-254.573C252.676,-254.263 262.709,-254.176 272.446,-254.312"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.37,-257.811 282.446,-254.534 272.526,-250.813 272.37,-257.811"/>
+</g>
+<!-- 2,3->2,2 -->
+<g id="edge23" class="edge"><title>2,3->2,2</title>
+<path fill="none" stroke="#218559" d="M282.446,-267.466C273.13,-267.753 263.195,-267.823 253.581,-267.674"/>
+<polygon fill="#218559" stroke="#218559" points="253.401,-264.169 243.319,-267.427 253.232,-271.167 253.401,-264.169"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/rf_bubble/.part-0.crc b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/rf_bubble/.part-0.crc
new file mode 100644
index 0000000..e87ee4f
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/rf_bubble/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/rf_bubble/.part-1.crc b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/rf_bubble/.part-1.crc
new file mode 100644
index 0000000..2c9bb22
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/rf_bubble/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/rf_bubble/part-0 b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/rf_bubble/part-0
new file mode 100755
index 0000000..4388d34
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/rf_bubble/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/rf_bubble/part-1 b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/rf_bubble/part-1
new file mode 100755
index 0000000..fbd0d87
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/rf_bubble/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/small_bubble.txt b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/small_bubble.txt
new file mode 100644
index 0000000..14cf2e1
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/small_bubble.txt
@@ -0,0 +1,6 @@
+((2,1) [(2,2)] [] [] [(1,3)] AGAAG) (null)
+((2,2) [(2,3)] [] [] [(1,4),(2,1)] GAAGC) (null)
+((2,3) [] [] [] [(2,2)] AAGCCC) (null)
+((1,1) [(1,3)] [] [] [] AATAGA) (null)
+((1,3) [(2,1),(1,4)] [] [] [(1,1)] TAGAA) (null)
+((1,4) [(2,2)] [] [] [(1,3)] AGAAG) (null)
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/small_bubble.txt.svg b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/small_bubble.txt.svg
new file mode 100644
index 0000000..550660b
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/small_bubble.txt.svg
@@ -0,0 +1,193 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: small_bubble_txt Pages: 1 -->
+<svg width="644pt" height="321pt"
+ viewBox="0.00 0.00 644.00 321.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 317)">
+<title>small_bubble_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-317 641,-317 641,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-8 49,-209 211,-209 211,-8 49,-8"/>
+<text text-anchor="middle" x="130" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-217 8,-305 376,-305 376,-217 8,-217"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="274,-121 274,-209 628,-209 628,-121 274,-121"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.0074,-157C74.8673,-157 156.744,-157 188.46,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="188.862,-160.5 198.862,-157 188.861,-153.5 188.862,-160.5"/>
+<text text-anchor="middle" x="130" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.0074,-116C74.8673,-116 156.744,-116 188.46,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="188.862,-119.5 198.862,-116 188.861,-112.5 188.862,-119.5"/>
+<text text-anchor="middle" x="130" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.0074,-75C74.8673,-75 156.744,-75 188.46,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="188.862,-78.5001 198.862,-75 188.861,-71.5001 188.862,-78.5001"/>
+<text text-anchor="middle" x="130" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.0074,-34C74.8673,-34 156.744,-34 188.46,-34"/>
+<polygon fill="#218559" stroke="#218559" points="188.862,-37.5001 198.862,-34 188.861,-30.5001 188.862,-37.5001"/>
+<text text-anchor="middle" x="130" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-253 37,-267 82,-267 82,-253 37,-253"/>
+<text text-anchor="start" x="40" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AATAGA</text>
+<polygon fill="#218559" stroke="#218559" points="37,-239 37,-253 82,-253 82,-239 37,-239"/>
+<text text-anchor="start" x="43" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TCTATT</text>
+</g>
+<!-- 1,3 -->
+<g id="node16" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-253 179,-267 224,-267 224,-253 179,-253"/>
+<text text-anchor="start" x="185" y="-257.667" font-family="Times Roman,serif" font-size="10.00">TAGAA</text>
+<polygon fill="#218559" stroke="#218559" points="179,-239 179,-253 224,-253 224,-239 179,-239"/>
+<text text-anchor="start" x="187.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TTCTA</text>
+</g>
+<!-- 1,1->1,3 -->
+<g id="edge25" class="edge"><title>1,1->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-254.755C116.207,-254.208 132.729,-254.105 148.049,-254.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-257.957 158.425,-254.756 148.534,-250.96 148.326,-257.957"/>
+</g>
+<!-- 1,3->1,1 -->
+<g id="edge31" class="edge"><title>1,3->1,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-267.244C143.825,-267.792 127.305,-267.895 111.982,-267.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-264.043 101.605,-267.245 111.497,-271.04 111.704,-264.043"/>
+</g>
+<!-- 1,4 -->
+<g id="node17" class="node"><title>1,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-253 303,-267 348,-267 348,-253 303,-253"/>
+<text text-anchor="start" x="307.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-239 303,-253 348,-253 348,-239 303,-239"/>
+<text text-anchor="start" x="311" y="-243.667" font-family="Times Roman,serif" font-size="10.00">CTTCT</text>
+</g>
+<!-- 1,3->1,4 -->
+<g id="edge29" class="edge"><title>1,3->1,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-254.562C252.601,-254.258 262.531,-254.176 272.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-257.822 282.443,-254.554 272.527,-250.823 272.364,-257.822"/>
+</g>
+<!-- 2,1 -->
+<g id="node19" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-165" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-157 303,-171 348,-171 348,-157 303,-157"/>
+<text text-anchor="start" x="307.5" y="-161.667" font-family="Times Roman,serif" font-size="10.00">AGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-143 303,-157 348,-157 348,-143 303,-143"/>
+<text text-anchor="start" x="311" y="-147.667" font-family="Times Roman,serif" font-size="10.00">CTTCT</text>
+</g>
+<!-- 1,3->2,1 -->
+<g id="edge27" class="edge"><title>1,3->2,1</title>
+<path fill="none" stroke="#dd1e2f" d="M234.345,-237.712C249.456,-227.157 264,-217 264,-217 264,-217 274.622,-207.945 287.002,-197.392"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="289.518,-199.846 294.857,-190.695 284.977,-194.519 289.518,-199.846"/>
+</g>
+<!-- 1,4->1,3 -->
+<g id="edge35" class="edge"><title>1,4->1,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-267.446C273.158,-267.745 263.225,-267.824 253.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-264.175 243.327,-267.438 253.242,-271.173 253.407,-264.175"/>
+</g>
+<!-- 2,2 -->
+<g id="node20" class="node"><title>2,2</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-165" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">2,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-157 427,-171 472,-171 472,-157 427,-157"/>
+<text text-anchor="start" x="431.5" y="-161.667" font-family="Times Roman,serif" font-size="10.00">GAAGC</text>
+<polygon fill="#218559" stroke="#218559" points="427,-143 427,-157 472,-157 472,-143 427,-143"/>
+<text text-anchor="start" x="434" y="-147.667" font-family="Times Roman,serif" font-size="10.00">GCTTC</text>
+</g>
+<!-- 1,4->2,2 -->
+<g id="edge33" class="edge"><title>1,4->2,2</title>
+<path fill="none" stroke="#dd1e2f" d="M361.282,-241.419C375.287,-233.861 388,-227 388,-227 388,-227 400.484,-214.311 414.157,-200.414"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="416.837,-202.681 421.356,-193.098 411.847,-197.771 416.837,-202.681"/>
+</g>
+<!-- 2,1->1,3 -->
+<g id="edge15" class="edge"><title>2,1->1,3</title>
+<path fill="none" stroke="#218559" d="M293.015,-189.762C277.051,-202.122 257.587,-217.19 240.68,-230.28"/>
+<polygon fill="#218559" stroke="#218559" points="238.376,-227.637 232.611,-236.527 242.661,-233.173 238.376,-227.637"/>
+</g>
+<!-- 2,1->2,2 -->
+<g id="edge13" class="edge"><title>2,1->2,2</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-158.562C376.601,-158.258 386.531,-158.176 396.159,-158.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-161.822 406.443,-158.554 396.527,-154.823 396.364,-161.822"/>
+</g>
+<!-- 2,2->1,4 -->
+<g id="edge19" class="edge"><title>2,2->1,4</title>
+<path fill="none" stroke="#218559" d="M417.015,-189.762C401.051,-202.122 381.587,-217.19 364.68,-230.28"/>
+<polygon fill="#218559" stroke="#218559" points="362.376,-227.637 356.611,-236.527 366.661,-233.173 362.376,-227.637"/>
+</g>
+<!-- 2,2->2,1 -->
+<g id="edge21" class="edge"><title>2,2->2,1</title>
+<path fill="none" stroke="#218559" d="M406.443,-171.446C397.158,-171.745 387.225,-171.824 377.602,-171.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-168.175 367.327,-171.438 377.242,-175.173 377.407,-168.175"/>
+</g>
+<!-- 2,3 -->
+<g id="node21" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="575" cy="-165" rx="44.0472" ry="36.0624"/>
+<text text-anchor="start" x="555.5" y="-176.167" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="552,-157 552,-171 599,-171 599,-157 552,-157"/>
+<text text-anchor="start" x="554.5" y="-161.667" font-family="Times Roman,serif" font-size="10.00">AAGCCC</text>
+<polygon fill="#218559" stroke="#218559" points="552,-143 552,-157 599,-157 599,-143 552,-143"/>
+<text text-anchor="start" x="555.5" y="-147.667" font-family="Times Roman,serif" font-size="10.00">GGGCTT</text>
+</g>
+<!-- 2,2->2,3 -->
+<g id="edge17" class="edge"><title>2,2->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M491.658,-158.573C501.09,-158.263 511.203,-158.176 521.018,-158.312"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="521.023,-161.813 531.097,-158.534 521.177,-154.814 521.023,-161.813"/>
+</g>
+<!-- 2,3->2,2 -->
+<g id="edge23" class="edge"><title>2,3->2,2</title>
+<path fill="none" stroke="#218559" d="M531.097,-171.466C521.607,-171.756 511.479,-171.824 501.692,-171.669"/>
+<polygon fill="#218559" stroke="#218559" points="501.739,-168.169 491.658,-171.427 501.571,-175.167 501.739,-168.169"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/small_bubble/.part-0.crc b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/small_bubble/.part-0.crc
new file mode 100644
index 0000000..fc426bc
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/small_bubble/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/small_bubble/.part-1.crc b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/small_bubble/.part-1.crc
new file mode 100644
index 0000000..611cf9d
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/small_bubble/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/small_bubble/part-0 b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/small_bubble/part-0
new file mode 100755
index 0000000..de5dd64
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/small_bubble/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/small_bubble/part-1 b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/small_bubble/part-1
new file mode 100755
index 0000000..d84c1d2
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/small_bubble/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/tip_and_bubble.txt b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/tip_and_bubble.txt
new file mode 100644
index 0000000..5fde720
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/tip_and_bubble.txt
@@ -0,0 +1,9 @@
+((2,1) [(3,1),(2,2)] [] [] [(1,1)] ATAGA) (null)
+((2,2) [(2,3)] [] [] [(2,1)] TAGAC) (null)
+((2,3) [(2,4)] [] [] [(2,2)] AGACT) (null)
+((2,4) [(3,4)] [] [] [(2,3)] GACTA) (null)
+((1,1) [(2,1),(1,2)] [] [] [] AATAG) (null)
+((1,2) [] [] [] [(1,1)] ATAGAAG) (null)
+((3,1) [(3,3)] [] [] [(2,1)] TAGACT) (null)
+((3,3) [(3,4)] [] [] [(3,1)] GACTA) (null)
+((3,4) [] [] [] [(2,4),(3,3)] ACTAC) (null)
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/tip_and_bubble.txt.svg b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/tip_and_bubble.txt.svg
new file mode 100644
index 0000000..1ae4c53
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/tip_and_bubble.txt.svg
@@ -0,0 +1,253 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: tip_and_bubble_txt Pages: 1 -->
+<svg width="650pt" height="417pt"
+ viewBox="0.00 0.00 650.00 417.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 413)">
+<title>tip_and_bubble_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-413 647,-413 647,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-8 49,-209 216,-209 216,-8 49,-8"/>
+<text text-anchor="middle" x="132.5" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-313 8,-401 262,-401 262,-313 8,-313"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_3</title>
+<polygon fill="none" stroke="black" points="284,-109 284,-197 634,-197 634,-109 284,-109"/>
+</g>
+<g id="graph5" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="155,-217 155,-305 634,-305 634,-217 155,-217"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="206" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.078,-157C75.5476,-157 161.628,-157 193.842,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="194.198,-160.5 204.198,-157 194.198,-153.5 194.198,-160.5"/>
+<text text-anchor="middle" x="130" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="206" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.078,-116C75.5476,-116 161.628,-116 193.842,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="194.198,-119.5 204.198,-116 194.198,-112.5 194.198,-119.5"/>
+<text text-anchor="middle" x="130" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="206" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.078,-75C75.5476,-75 161.628,-75 193.842,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="194.198,-78.5001 204.198,-75 194.198,-71.5001 194.198,-78.5001"/>
+<text text-anchor="middle" x="130" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="206" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.078,-34C75.5476,-34 161.628,-34 193.842,-34"/>
+<polygon fill="#218559" stroke="#218559" points="194.198,-37.5001 204.198,-34 194.198,-30.5001 194.198,-37.5001"/>
+<text text-anchor="middle" x="130" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-357" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-368.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-349 37,-363 82,-363 82,-349 37,-349"/>
+<text text-anchor="start" x="43.5" y="-353.667" font-family="Times Roman,serif" font-size="10.00">AATAG</text>
+<polygon fill="#218559" stroke="#218559" points="37,-335 37,-349 82,-349 82,-335 37,-335"/>
+<text text-anchor="start" x="45.5" y="-339.667" font-family="Times Roman,serif" font-size="10.00">CTATT</text>
+</g>
+<!-- 1,2 -->
+<g id="node16" class="node"><title>1,2</title>
+<ellipse fill="none" stroke="black" cx="206" cy="-357" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="186.5" y="-368.167" font-family="Times Roman,serif" font-size="10.00">1,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="181,-349 181,-363 232,-363 232,-349 181,-349"/>
+<text text-anchor="start" x="183.5" y="-353.667" font-family="Times Roman,serif" font-size="10.00">ATAGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="181,-335 181,-349 232,-349 232,-335 181,-335"/>
+<text text-anchor="start" x="187" y="-339.667" font-family="Times Roman,serif" font-size="10.00">CTTCTAT</text>
+</g>
+<!-- 1,1->1,2 -->
+<g id="edge34" class="edge"><title>1,1->1,2</title>
+<path fill="none" stroke="#dd1e2f" d="M101.518,-350.814C116.334,-350.242 133.206,-350.105 149.005,-350.404"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="149.114,-353.907 159.199,-350.663 149.292,-346.91 149.114,-353.907"/>
+</g>
+<!-- 2,1 -->
+<g id="node22" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="206" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="186.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="184,-253 184,-267 229,-267 229,-253 184,-253"/>
+<text text-anchor="start" x="190.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">ATAGA</text>
+<polygon fill="#218559" stroke="#218559" points="184,-239 184,-253 229,-253 229,-239 184,-239"/>
+<text text-anchor="start" x="192.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TCTAT</text>
+</g>
+<!-- 1,1->2,1 -->
+<g id="edge32" class="edge"><title>1,1->2,1</title>
+<path fill="none" stroke="#dd1e2f" d="M95.2356,-337.316C116.802,-325.601 140,-313 140,-313 140,-313 152.492,-303.158 166.607,-292.037"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="168.857,-294.72 174.546,-285.782 164.525,-289.221 168.857,-294.72"/>
+</g>
+<!-- 1,2->1,1 -->
+<g id="edge36" class="edge"><title>1,2->1,1</title>
+<path fill="none" stroke="#218559" d="M159.199,-363.337C144.021,-363.827 127.084,-363.881 111.518,-363.499"/>
+<polygon fill="#218559" stroke="#218559" points="111.623,-360.001 101.518,-363.186 111.404,-366.998 111.623,-360.001"/>
+</g>
+<!-- 3,1 -->
+<g id="node18" class="node"><title>3,1</title>
+<ellipse fill="none" stroke="black" cx="335" cy="-153" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="315.5" y="-164.167" font-family="Times Roman,serif" font-size="10.00">3,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="313,-145 313,-159 358,-159 358,-145 313,-145"/>
+<text text-anchor="start" x="316.5" y="-149.667" font-family="Times Roman,serif" font-size="10.00">TAGACT</text>
+<polygon fill="#218559" stroke="#218559" points="313,-131 313,-145 358,-145 358,-131 313,-131"/>
+<text text-anchor="start" x="317" y="-135.667" font-family="Times Roman,serif" font-size="10.00">AGTCTA</text>
+</g>
+<!-- 3,3 -->
+<g id="node19" class="node"><title>3,3</title>
+<ellipse fill="none" stroke="black" cx="459" cy="-153" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="439.5" y="-164.167" font-family="Times Roman,serif" font-size="10.00">3,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="437,-145 437,-159 482,-159 482,-145 437,-145"/>
+<text text-anchor="start" x="443" y="-149.667" font-family="Times Roman,serif" font-size="10.00">GACTA</text>
+<polygon fill="#218559" stroke="#218559" points="437,-131 437,-145 482,-145 482,-131 437,-131"/>
+<text text-anchor="start" x="444.5" y="-135.667" font-family="Times Roman,serif" font-size="10.00">TAGTC</text>
+</g>
+<!-- 3,1->3,3 -->
+<g id="edge38" class="edge"><title>3,1->3,3</title>
+<path fill="none" stroke="#dd1e2f" d="M377.327,-146.562C386.601,-146.258 396.531,-146.176 406.159,-146.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="406.364,-149.822 416.443,-146.554 406.527,-142.823 406.364,-149.822"/>
+</g>
+<!-- 3,1->2,1 -->
+<g id="edge40" class="edge"><title>3,1->2,1</title>
+<path fill="none" stroke="#218559" d="M307.356,-180.644C291.216,-196.784 274,-214 274,-214 274,-214 261.746,-222.469 247.643,-232.218"/>
+<polygon fill="#218559" stroke="#218559" points="245.439,-229.486 239.203,-238.051 249.42,-235.244 245.439,-229.486"/>
+</g>
+<!-- 3,3->3,1 -->
+<g id="edge44" class="edge"><title>3,3->3,1</title>
+<path fill="none" stroke="#218559" d="M416.443,-159.446C407.158,-159.745 397.225,-159.824 387.602,-159.681"/>
+<polygon fill="#218559" stroke="#218559" points="387.407,-156.175 377.327,-159.438 387.242,-163.173 387.407,-156.175"/>
+</g>
+<!-- 3,4 -->
+<g id="node20" class="node"><title>3,4</title>
+<ellipse fill="none" stroke="black" cx="583" cy="-153" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="563.5" y="-164.167" font-family="Times Roman,serif" font-size="10.00">3,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="561,-145 561,-159 606,-159 606,-145 561,-145"/>
+<text text-anchor="start" x="567.5" y="-149.667" font-family="Times Roman,serif" font-size="10.00">ACTAC</text>
+<polygon fill="#218559" stroke="#218559" points="561,-131 561,-145 606,-145 606,-131 561,-131"/>
+<text text-anchor="start" x="568" y="-135.667" font-family="Times Roman,serif" font-size="10.00">GTAGT</text>
+</g>
+<!-- 3,3->3,4 -->
+<g id="edge42" class="edge"><title>3,3->3,4</title>
+<path fill="none" stroke="#dd1e2f" d="M501.327,-146.562C510.601,-146.258 520.531,-146.176 530.159,-146.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="530.364,-149.822 540.443,-146.554 530.527,-142.823 530.364,-149.822"/>
+</g>
+<!-- 3,4->3,3 -->
+<g id="edge48" class="edge"><title>3,4->3,3</title>
+<path fill="none" stroke="#218559" d="M540.443,-159.446C531.158,-159.745 521.225,-159.824 511.602,-159.681"/>
+<polygon fill="#218559" stroke="#218559" points="511.407,-156.175 501.327,-159.438 511.242,-163.173 511.407,-156.175"/>
+</g>
+<!-- 2,4 -->
+<g id="node25" class="node"><title>2,4</title>
+<ellipse fill="none" stroke="black" cx="583" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="563.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="561,-253 561,-267 606,-267 606,-253 561,-253"/>
+<text text-anchor="start" x="567" y="-257.667" font-family="Times Roman,serif" font-size="10.00">GACTA</text>
+<polygon fill="#218559" stroke="#218559" points="561,-239 561,-253 606,-253 606,-239 561,-239"/>
+<text text-anchor="start" x="568.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TAGTC</text>
+</g>
+<!-- 3,4->2,4 -->
+<g id="edge46" class="edge"><title>3,4->2,4</title>
+<path fill="none" stroke="#218559" d="M583,-188.897C583,-188.965 583,-189 583,-189 583,-189 583,-204.137 583,-214.808"/>
+<polygon fill="#218559" stroke="#218559" points="579.5,-215.103 583,-225.103 586.5,-215.103 579.5,-215.103"/>
+</g>
+<!-- 2,1->1,1 -->
+<g id="edge18" class="edge"><title>2,1->1,1</title>
+<path fill="none" stroke="#218559" d="M171.021,-282.15C147.211,-296.547 120,-313 120,-313 120,-313 111.029,-319.471 99.9682,-327.449"/>
+<polygon fill="#218559" stroke="#218559" points="97.7057,-324.766 91.643,-333.454 101.801,-330.443 97.7057,-324.766"/>
+</g>
+<!-- 2,1->3,1 -->
+<g id="edge14" class="edge"><title>2,1->3,1</title>
+<path fill="none" stroke="#dd1e2f" d="M236.571,-235.406C254.514,-220.384 277.353,-201.262 296.514,-185.221"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="298.78,-187.888 304.201,-178.785 294.287,-182.521 298.78,-187.888"/>
+</g>
+<!-- 2,2 -->
+<g id="node23" class="node"><title>2,2</title>
+<ellipse fill="none" stroke="black" cx="335" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="315.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="313,-253 313,-267 358,-267 358,-253 313,-253"/>
+<text text-anchor="start" x="319.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">TAGAC</text>
+<polygon fill="#218559" stroke="#218559" points="313,-239 313,-253 358,-253 358,-239 313,-239"/>
+<text text-anchor="start" x="320.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">GTCTA</text>
+</g>
+<!-- 2,1->2,2 -->
+<g id="edge16" class="edge"><title>2,1->2,2</title>
+<path fill="none" stroke="#dd1e2f" d="M248.597,-254.609C259.385,-254.242 271.101,-254.157 282.322,-254.352"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="282.362,-257.854 292.448,-254.61 282.54,-250.856 282.362,-257.854"/>
+</g>
+<!-- 2,2->2,1 -->
+<g id="edge22" class="edge"><title>2,2->2,1</title>
+<path fill="none" stroke="#218559" d="M292.448,-267.39C281.662,-267.757 269.947,-267.843 258.724,-267.649"/>
+<polygon fill="#218559" stroke="#218559" points="258.683,-264.147 248.597,-267.391 258.505,-271.145 258.683,-264.147"/>
+</g>
+<!-- 2,3 -->
+<g id="node24" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="459" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="439.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="437,-253 437,-267 482,-267 482,-253 437,-253"/>
+<text text-anchor="start" x="443" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AGACT</text>
+<polygon fill="#218559" stroke="#218559" points="437,-239 437,-253 482,-253 482,-239 437,-239"/>
+<text text-anchor="start" x="444" y="-243.667" font-family="Times Roman,serif" font-size="10.00">AGTCT</text>
+</g>
+<!-- 2,2->2,3 -->
+<g id="edge20" class="edge"><title>2,2->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M377.327,-254.562C386.601,-254.258 396.531,-254.176 406.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="406.364,-257.822 416.443,-254.554 406.527,-250.823 406.364,-257.822"/>
+</g>
+<!-- 2,3->2,2 -->
+<g id="edge26" class="edge"><title>2,3->2,2</title>
+<path fill="none" stroke="#218559" d="M416.443,-267.446C407.158,-267.745 397.225,-267.824 387.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="387.407,-264.175 377.327,-267.438 387.242,-271.173 387.407,-264.175"/>
+</g>
+<!-- 2,3->2,4 -->
+<g id="edge24" class="edge"><title>2,3->2,4</title>
+<path fill="none" stroke="#dd1e2f" d="M501.327,-254.562C510.601,-254.258 520.531,-254.176 530.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="530.364,-257.822 540.443,-254.554 530.527,-250.823 530.364,-257.822"/>
+</g>
+<!-- 2,4->3,4 -->
+<g id="edge28" class="edge"><title>2,4->3,4</title>
+<path fill="none" stroke="#dd1e2f" d="M583,-225.103C583,-225.035 583,-225 583,-225 583,-225 583,-209.863 583,-199.192"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="586.5,-198.897 583,-188.897 579.5,-198.897 586.5,-198.897"/>
+</g>
+<!-- 2,4->2,3 -->
+<g id="edge30" class="edge"><title>2,4->2,3</title>
+<path fill="none" stroke="#218559" d="M540.443,-267.446C531.158,-267.745 521.225,-267.824 511.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="511.407,-264.175 501.327,-267.438 511.242,-271.173 511.407,-264.175"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/tip_and_bubble/.part-0.crc b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/tip_and_bubble/.part-0.crc
new file mode 100644
index 0000000..3b5cec2
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/tip_and_bubble/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/tip_and_bubble/.part-1.crc b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/tip_and_bubble/.part-1.crc
new file mode 100644
index 0000000..6621cf4
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/tip_and_bubble/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/tip_and_bubble/part-0 b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/tip_and_bubble/part-0
new file mode 100755
index 0000000..a6a5e83
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/tip_and_bubble/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/bubblemerge/tip_and_bubble/part-1 b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/tip_and_bubble/part-1
new file mode 100755
index 0000000..a65fe7e
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/bubblemerge/tip_and_bubble/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/pathmerge/singleread.txt b/genomix/genomix-pregelix/data/input/graphs/pathmerge/singleread.txt
new file mode 100644
index 0000000..7a3e64f
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/pathmerge/singleread.txt
@@ -0,0 +1,4 @@
+((1,1) [(1,2)] [] [] [] AATAG) (null)
+((1,2) [(1,3)] [] [] [(1,1)] ATAGA) (null)
+((1,3) [(1,4)] [] [] [(1,2)] TAGAA) (null)
+((1,4) [] [] [] [(1,3)] AGAAG) (null)
diff --git a/genomix/genomix-pregelix/data/input/graphs/pathmerge/singleread.txt.svg b/genomix/genomix-pregelix/data/input/graphs/pathmerge/singleread.txt.svg
new file mode 100644
index 0000000..efaf9e1
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/pathmerge/singleread.txt.svg
@@ -0,0 +1,142 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: singleread_txt Pages: 1 -->
+<svg width="516pt" height="321pt"
+ viewBox="0.00 0.00 516.00 321.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 317)">
+<title>singleread_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-317 513,-317 513,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-8 49,-209 211,-209 211,-8 49,-8"/>
+<text text-anchor="middle" x="130" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-217 8,-305 500,-305 500,-217 8,-217"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.0074,-157C74.8673,-157 156.744,-157 188.46,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="188.862,-160.5 198.862,-157 188.861,-153.5 188.862,-160.5"/>
+<text text-anchor="middle" x="130" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.0074,-116C74.8673,-116 156.744,-116 188.46,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="188.862,-119.5 198.862,-116 188.861,-112.5 188.862,-119.5"/>
+<text text-anchor="middle" x="130" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.0074,-75C74.8673,-75 156.744,-75 188.46,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="188.862,-78.5001 198.862,-75 188.861,-71.5001 188.862,-78.5001"/>
+<text text-anchor="middle" x="130" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.0074,-34C74.8673,-34 156.744,-34 188.46,-34"/>
+<polygon fill="#218559" stroke="#218559" points="188.862,-37.5001 198.862,-34 188.861,-30.5001 188.862,-37.5001"/>
+<text text-anchor="middle" x="130" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-253 37,-267 82,-267 82,-253 37,-253"/>
+<text text-anchor="start" x="43.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AATAG</text>
+<polygon fill="#218559" stroke="#218559" points="37,-239 37,-253 82,-253 82,-239 37,-239"/>
+<text text-anchor="start" x="45.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">CTATT</text>
+</g>
+<!-- 1,2 -->
+<g id="node16" class="node"><title>1,2</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-253 179,-267 224,-267 224,-253 179,-253"/>
+<text text-anchor="start" x="185.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">ATAGA</text>
+<polygon fill="#218559" stroke="#218559" points="179,-239 179,-253 224,-253 224,-239 179,-239"/>
+<text text-anchor="start" x="187.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TCTAT</text>
+</g>
+<!-- 1,1->1,2 -->
+<g id="edge12" class="edge"><title>1,1->1,2</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-254.755C116.207,-254.208 132.729,-254.105 148.049,-254.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-257.957 158.425,-254.756 148.534,-250.96 148.326,-257.957"/>
+</g>
+<!-- 1,2->1,1 -->
+<g id="edge16" class="edge"><title>1,2->1,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-267.244C143.825,-267.792 127.305,-267.895 111.982,-267.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-264.043 101.605,-267.245 111.497,-271.04 111.704,-264.043"/>
+</g>
+<!-- 1,3 -->
+<g id="node17" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-253 303,-267 348,-267 348,-253 303,-253"/>
+<text text-anchor="start" x="309" y="-257.667" font-family="Times Roman,serif" font-size="10.00">TAGAA</text>
+<polygon fill="#218559" stroke="#218559" points="303,-239 303,-253 348,-253 348,-239 303,-239"/>
+<text text-anchor="start" x="311.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TTCTA</text>
+</g>
+<!-- 1,2->1,3 -->
+<g id="edge14" class="edge"><title>1,2->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-254.562C252.601,-254.258 262.531,-254.176 272.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-257.822 282.443,-254.554 272.527,-250.823 272.364,-257.822"/>
+</g>
+<!-- 1,3->1,2 -->
+<g id="edge20" class="edge"><title>1,3->1,2</title>
+<path fill="none" stroke="#218559" d="M282.443,-267.446C273.158,-267.745 263.225,-267.824 253.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-264.175 243.327,-267.438 253.242,-271.173 253.407,-264.175"/>
+</g>
+<!-- 1,4 -->
+<g id="node18" class="node"><title>1,4</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">1,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-253 427,-267 472,-267 472,-253 427,-253"/>
+<text text-anchor="start" x="431.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AGAAG</text>
+<polygon fill="#218559" stroke="#218559" points="427,-239 427,-253 472,-253 472,-239 427,-239"/>
+<text text-anchor="start" x="435" y="-243.667" font-family="Times Roman,serif" font-size="10.00">CTTCT</text>
+</g>
+<!-- 1,3->1,4 -->
+<g id="edge18" class="edge"><title>1,3->1,4</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-254.562C376.601,-254.258 386.531,-254.176 396.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-257.822 406.443,-254.554 396.527,-250.823 396.364,-257.822"/>
+</g>
+<!-- 1,4->1,3 -->
+<g id="edge22" class="edge"><title>1,4->1,3</title>
+<path fill="none" stroke="#218559" d="M406.443,-267.446C397.158,-267.745 387.225,-267.824 377.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-264.175 367.327,-267.438 377.242,-271.173 377.407,-264.175"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-pregelix/data/input/graphs/pathmerge/singleread/.part-0.crc b/genomix/genomix-pregelix/data/input/graphs/pathmerge/singleread/.part-0.crc
new file mode 100644
index 0000000..b972cba
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/pathmerge/singleread/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/pathmerge/singleread/.part-1.crc b/genomix/genomix-pregelix/data/input/graphs/pathmerge/singleread/.part-1.crc
new file mode 100644
index 0000000..23505c8
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/pathmerge/singleread/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/pathmerge/singleread/part-0 b/genomix/genomix-pregelix/data/input/graphs/pathmerge/singleread/part-0
new file mode 100755
index 0000000..39d79bc
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/pathmerge/singleread/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/pathmerge/singleread/part-1 b/genomix/genomix-pregelix/data/input/graphs/pathmerge/singleread/part-1
new file mode 100755
index 0000000..e49fcdd
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/pathmerge/singleread/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq1.txt b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq1.txt
new file mode 100644
index 0000000..bdeb586
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq1.txt
@@ -0,0 +1,147 @@
+((2,1) [(2,2)] [] [] [] CCTCG) (null)
+((2,2) [(2,3)] [(1,4)] [] [(2,1)] CTCGC) (null)
+((2,3) [(2,4)] [] [] [(2,2)] TCGCA) (null)
+((2,4) [] [(1,2)] [] [(2,3)] CGCAC) (null)
+((4,1) [(4,2)] [] [] [] GAGGG) (null)
+((4,2) [(4,3)] [(5,4)] [] [(4,1)] AGGGT) (null)
+((4,3) [(4,4)] [] [] [(3,4),(4,2)] GGGTT) (null)
+((4,4) [] [(5,2)] [] [(4,3)] GGTTG) (null)
+((6,1) [(6,2)] [] [(5,3)] [] GTTGC) (null)
+((6,2) [(6,3)] [(7,4)] [] [(6,1)] TTGCT) (null)
+((6,3) [(6,4)] [] [(5,1)] [(6,2)] TGCTG) (null)
+((6,4) [] [(7,2)] [] [(6,3)] GCTGA) (null)
+((8,1) [(24,1),(8,2)] [] [(7,3)] [] CTGAA) (null)
+((8,2) [(8,3)] [(9,4)] [] [(8,1)] TGAAA) (null)
+((8,3) [(8,4)] [] [(7,1)] [(19,4),(8,2)] GAAAT) (null)
+((8,4) [] [(9,2)] [] [(8,3)] AAATC) (null)
+((10,1) [(10,2)] [] [] [(11,2)] GGCAG) (null)
+((10,2) [(9,1),(10,3)] [] [] [(10,1)] GCAGA) (null)
+((10,3) [(10,4)] [] [] [(11,4),(10,2)] CAGAT) (null)
+((10,4) [(9,3)] [] [] [(10,3)] AGATT) (null)
+((12,1) [(12,2)] [] [] [(13,2)] CTCTG) (null)
+((12,2) [(11,1),(12,3)] [] [] [(12,1)] TCTGG) (null)
+((12,3) [(12,4)] [] [] [(13,4),(12,2)] CTGGC) (null)
+((12,4) [(11,3)] [] [] [(12,3)] TGGCA) (null)
+((14,1) [(14,2)] [] [] [(15,2)] GCATC) (null)
+((14,2) [(13,1),(14,3)] [] [] [(14,1)] CATCT) (null)
+((14,3) [(14,4)] [] [] [(15,4),(14,2)] ATCTC) (null)
+((14,4) [(13,3)] [] [] [(14,3)] TCTCT) (null)
+((16,1) [(16,2)] [] [] [(17,2)] AACGG) (null)
+((16,2) [(15,1),(16,3)] [] [] [(16,1)] ACGGC) (null)
+((16,3) [(16,4)] [] [] [(17,4),(16,2)] CGGCA) (null)
+((16,4) [(15,3)] [] [] [(16,3)] GGCAT) (null)
+((18,1) [(23,1),(18,2)] [] [(17,3)] [] CGTTT) (null)
+((18,2) [(7,1),(18,3)] [(19,4)] [] [(18,1)] GTTTC) (null)
+((18,3) [(18,4)] [] [(17,1)] [(18,2)] TTTCA) (null)
+((18,4) [] [(19,2)] [(24,1)] [(18,3)] TTCAA) (null)
+((20,1) [(20,2)] [] [(19,3)] [] TCAAT) (null)
+((20,2) [(21,1),(20,3)] [] [] [(20,1)] CAATA) (null)
+((20,3) [(20,4)] [] [(19,1)] [(20,2)] AATAC) (null)
+((20,4) [(21,3)] [] [] [(20,3)] ATACG) (null)
+((22,1) [(22,2)] [] [] [(28,1),(27,3),(21,2)] TACGT) (null)
+((22,2) [(22,3)] [(23,4)] [] [(27,4),(22,1)] ACGTG) (null)
+((22,3) [(22,4)] [] [] [(21,4),(22,2)] CGTGA) (null)
+((22,4) [] [(23,2)] [] [(22,3)] GTGAA) (null)
+((24,1) [(17,1),(24,2)] [] [(23,3),(18,4),(7,2)] [(8,1),(19,3)] TGAAA) (null)
+((24,2) [(25,1),(24,3)] [] [(7,1)] [(19,4),(24,1)] GAAAC) (null)
+((24,3) [(24,4)] [] [(23,1)] [(24,2)] AAACT) (null)
+((24,4) [(25,3)] [] [] [(24,3)] AACTA) (null)
+((26,1) [(26,2)] [] [(27,3)] [] GTAAT) (null)
+((26,2) [(26,3)] [(25,4)] [] [(26,1)] TAATA) (null)
+((26,3) [(26,4)] [] [(27,1)] [(26,2)] AATAG) (null)
+((26,4) [] [(25,2)] [] [(26,3)] ATAGT) (null)
+((28,1) [(22,1),(28,2)] [] [] [(27,2)] TTACG) (null)
+((28,2) [(28,3)] [(29,4)] [] [(28,1)] TACGT) (null)
+((28,3) [(28,4)] [] [] [(27,4),(28,2)] ACGTC) (null)
+((28,4) [(31,3)] [(29,2)] [] [(28,3)] CGTCA) (null)
+((30,1) [(30,2)] [] [(29,3)] [(31,2)] GTCAT) (null)
+((30,2) [(29,1),(30,3)] [(31,4)] [] [(30,1)] TCATG) (null)
+((30,3) [(30,4)] [] [(29,1)] [(31,4),(30,2)] CATGA) (null)
+((30,4) [(29,3)] [(31,2)] [] [(30,3)] ATGAC) (null)
+((32,1) [(32,2)] [] [(33,3),(35,3)] [] AAGCG) (null)
+((32,2) [(31,1),(32,3)] [] [] [(32,1)] AGCGT) (null)
+((32,3) [(32,4)] [] [] [(32,2)] GCGTC) (null)
+((32,4) [(31,3)] [(29,2)] [] [(32,3)] CGTCA) (null)
+((34,1) [(34,2)] [] [] [(33,2),(35,2)] GCTTA) (null)
+((34,2) [(34,3)] [(35,4),(33,4)] [] [(34,1)] CTTAA) (null)
+((34,3) [(34,4)] [] [] [(35,4),(33,4),(34,2)] TTAAG) (null)
+((34,4) [] [(33,2),(35,2)] [] [(34,3)] TAAGC) (null)
+((36,1) [(36,2)] [] [(33,3),(35,3)] [] AAGCG) (null)
+((36,2) [(36,3)] [(37,4)] [] [(36,1)] AGCGT) (null)
+((36,3) [(36,4)] [] [] [(36,2)] GCGTG) (null)
+((36,4) [] [(37,2)] [] [(36,3)] CGTGT) (null)
+((1,1) [(1,2)] [] [] [] TAGTG) (null)
+((1,2) [(1,3)] [(2,4)] [] [(1,1)] AGTGC) (null)
+((1,3) [(1,4)] [] [] [(1,2)] GTGCG) (null)
+((1,4) [] [(2,2)] [] [(1,3)] TGCGA) (null)
+((3,1) [(3,3)] [] [] [] GCTAGG) (null)
+((3,3) [(3,4)] [] [] [(3,1)] TAGGG) (null)
+((3,4) [(4,3)] [] [] [(3,3)] AGGGT) (null)
+((5,1) [(5,2)] [] [(6,3)] [] AGCAA) (null)
+((5,2) [(5,3)] [(4,4)] [] [(5,1)] GCAAC) (null)
+((5,3) [(5,4)] [] [(6,1)] [(5,2)] CAACC) (null)
+((5,4) [] [(4,2)] [] [(5,3)] AACCC) (null)
+((7,1) [(7,2)] [] [(8,3),(24,2)] [(18,2),(23,1)] TTTCA) (null)
+((7,2) [(7,3)] [(6,4)] [(24,1)] [(7,1)] TTCAG) (null)
+((7,3) [(7,4)] [] [(8,1)] [(7,2)] TCAGC) (null)
+((7,4) [] [(6,2)] [] [(7,3)] CAGCA) (null)
+((9,1) [(9,2)] [] [] [(10,2)] CAGAT) (null)
+((9,2) [(9,3)] [(8,4)] [] [(9,1)] AGATT) (null)
+((9,3) [(9,4)] [] [] [(10,4),(9,2)] GATTT) (null)
+((9,4) [] [(8,2)] [] [(9,3)] ATTTC) (null)
+((11,1) [(11,2)] [] [] [(12,2)] CTGGC) (null)
+((11,2) [(10,1),(11,3)] [] [] [(11,1)] TGGCA) (null)
+((11,3) [(11,4)] [] [] [(12,4),(11,2)] GGCAG) (null)
+((11,4) [(10,3)] [] [] [(11,3)] GCAGA) (null)
+((13,1) [(13,2)] [] [] [(14,2)] ATCTC) (null)
+((13,2) [(12,1),(13,3)] [] [] [(13,1)] TCTCT) (null)
+((13,3) [(13,4)] [] [] [(14,4),(13,2)] CTCTG) (null)
+((13,4) [(12,3)] [] [] [(13,3)] TCTGG) (null)
+((15,1) [(15,2)] [] [] [(16,2)] CGGCA) (null)
+((15,2) [(14,1),(15,3)] [] [] [(15,1)] GGCAT) (null)
+((15,3) [(15,4)] [] [] [(16,4),(15,2)] GCATC) (null)
+((15,4) [(14,3)] [] [] [(15,3)] CATCT) (null)
+((17,1) [(17,2)] [] [(23,2),(18,3)] [(24,1)] GAAAC) (null)
+((17,2) [(16,1),(17,3)] [] [(23,1)] [(17,1)] AAACG) (null)
+((17,3) [(17,4)] [] [(18,1)] [(17,2)] AACGG) (null)
+((17,4) [(16,3)] [] [] [(17,3)] ACGGC) (null)
+((19,1) [(19,2)] [] [(20,3)] [] TATTG) (null)
+((19,2) [(19,3)] [(18,4)] [] [(19,1)] ATTGA) (null)
+((19,3) [(24,1),(19,4)] [] [(20,1)] [(19,2)] TTGAA) (null)
+((19,4) [(8,3),(24,2)] [(18,2),(23,1)] [] [(19,3)] TGAAA) (null)
+((21,1) [(21,2)] [] [] [(20,2)] AATAC) (null)
+((21,2) [(22,1),(21,3)] [] [] [(21,1)] ATACG) (null)
+((21,3) [(21,4)] [] [] [(20,4),(21,2)] TACGT) (null)
+((21,4) [(22,3)] [] [] [(27,4),(21,3)] ACGTG) (null)
+((23,1) [(7,1),(23,2)] [(19,4)] [(24,3),(17,2)] [(18,1)] GTTTC) (null)
+((23,2) [(23,3)] [(22,4)] [(17,1)] [(23,1)] TTTCA) (null)
+((23,3) [(23,4)] [] [(24,1)] [(23,2)] TTCAC) (null)
+((23,4) [] [(22,2)] [] [(23,3)] TCACG) (null)
+((25,1) [(25,2)] [] [] [(24,2)] AAACT) (null)
+((25,2) [(25,3)] [(26,4)] [] [(25,1)] AACTA) (null)
+((25,3) [(25,4)] [] [] [(24,4),(25,2)] ACTAT) (null)
+((25,4) [] [(26,2)] [] [(25,3)] CTATT) (null)
+((27,1) [(27,2)] [] [(26,3)] [] TATTA) (null)
+((27,2) [(28,1),(27,3)] [] [] [(27,1)] ATTAC) (null)
+((27,3) [(22,1),(27,4)] [] [(26,1)] [(27,2)] TTACG) (null)
+((27,4) [(28,3),(21,4),(22,2)] [] [] [(27,3)] TACGT) (null)
+((29,1) [(29,2)] [] [(30,3)] [(30,2)] CATGA) (null)
+((29,2) [(29,3)] [(32,4),(28,4)] [] [(29,1)] ATGAC) (null)
+((29,3) [(29,4)] [] [(30,1)] [(30,4),(29,2)] TGACG) (null)
+((29,4) [] [(28,2)] [] [(29,3)] GACGT) (null)
+((31,1) [(31,2)] [] [] [(32,2)] GCGTC) (null)
+((31,2) [(30,1),(31,3)] [(30,4)] [] [(31,1)] CGTCA) (null)
+((31,3) [(31,4)] [] [] [(32,4),(28,4),(31,2)] GTCAT) (null)
+((31,4) [(30,3)] [(30,2)] [] [(31,3)] TCATG) (null)
+((33,1) [(33,2)] [] [] [] TCGCT) (null)
+((33,2) [(34,1),(33,3)] [(34,4)] [] [(33,1)] CGCTT) (null)
+((33,3) [(33,4)] [] [(36,1),(32,1)] [(33,2)] GCTTA) (null)
+((33,4) [(34,3)] [(34,2)] [] [(33,3)] CTTAA) (null)
+((35,1) [(35,2)] [] [] [] TCGCT) (null)
+((35,2) [(34,1),(35,3)] [(34,4)] [] [(35,1)] CGCTT) (null)
+((35,3) [(35,4)] [] [(36,1),(32,1)] [(35,2)] GCTTA) (null)
+((35,4) [(34,3)] [(34,2)] [] [(35,3)] CTTAA) (null)
+((37,1) [(37,2)] [] [] [] CCACA) (null)
+((37,2) [(37,3)] [(36,4)] [] [(37,1)] CACAC) (null)
+((37,3) [(37,4)] [] [] [(37,2)] ACACG) (null)
+((37,4) [] [(36,2)] [] [(37,3)] CACGC) (null)
diff --git a/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq1.txt.svg b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq1.txt.svg
new file mode 100644
index 0000000..655df86
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq1.txt.svg
@@ -0,0 +1,3577 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: walk_random_seq1_txt Pages: 1 -->
+<svg width="1494pt" height="5501pt"
+ viewBox="0.00 0.00 1494.00 5501.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 5497)">
+<title>walk_random_seq1_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-5497 1491,-5497 1491,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="54,-1935 54,-2136 226,-2136 226,-1935 54,-1935"/>
+<text text-anchor="middle" x="140" y="-2119.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_24</title>
+<polygon fill="none" stroke="black" points="428,-5076 428,-5164 942,-5164 942,-5076 428,-5076"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_25</title>
+<polygon fill="none" stroke="black" points="562,-1126 562,-1214 1076,-1214 1076,-1126 562,-1126"/>
+</g>
+<g id="graph5" class="cluster"><title>cluster_26</title>
+<polygon fill="none" stroke="black" points="428,-2358 428,-2446 942,-2446 942,-2358 428,-2358"/>
+</g>
+<g id="graph6" class="cluster"><title>cluster_27</title>
+<polygon fill="none" stroke="black" points="294,-2792 294,-2880 808,-2880 808,-2792 294,-2792"/>
+</g>
+<g id="graph7" class="cluster"><title>cluster_20</title>
+<polygon fill="none" stroke="black" points="160,-4070 160,-4158 674,-4158 674,-4070 160,-4070"/>
+</g>
+<g id="graph8" class="cluster"><title>cluster_21</title>
+<polygon fill="none" stroke="black" points="428,-3605 428,-3693 942,-3693 942,-3605 428,-3605"/>
+</g>
+<g id="graph9" class="cluster"><title>cluster_22</title>
+<polygon fill="none" stroke="black" points="428,-3190 428,-3278 942,-3278 942,-3190 428,-3190"/>
+</g>
+<g id="graph10" class="cluster"><title>cluster_23</title>
+<polygon fill="none" stroke="black" points="294,-4681 294,-4769 808,-4769 808,-4681 294,-4681"/>
+</g>
+<g id="graph11" class="cluster"><title>cluster_28</title>
+<polygon fill="none" stroke="black" points="562,-2696 562,-2784 1076,-2784 1076,-2696 562,-2696"/>
+</g>
+<g id="graph12" class="cluster"><title>cluster_29</title>
+<polygon fill="none" stroke="black" points="696,-2262 696,-2350 1210,-2350 1210,-2262 696,-2262"/>
+</g>
+<g id="graph13" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="13,-5371 13,-5459 535,-5459 535,-5371 13,-5371"/>
+</g>
+<g id="graph14" class="cluster"><title>cluster_3</title>
+<polygon fill="none" stroke="black" points="700,-8 700,-96 1071,-96 1071,-8 700,-8"/>
+</g>
+<g id="graph15" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="165,-5275 165,-5363 669,-5363 669,-5275 165,-5275"/>
+</g>
+<g id="graph16" class="cluster"><title>cluster_5</title>
+<polygon fill="none" stroke="black" points="701,-2454 701,-2542 1205,-2542 1205,-2454 701,-2454"/>
+</g>
+<g id="graph17" class="cluster"><title>cluster_4</title>
+<polygon fill="none" stroke="black" points="835,-1400 835,-1488 1339,-1488 1339,-1400 835,-1400"/>
+</g>
+<g id="graph18" class="cluster"><title>cluster_7</title>
+<polygon fill="none" stroke="black" points="433,-4585 433,-4673 937,-4673 937,-4585 433,-4585"/>
+</g>
+<g id="graph19" class="cluster"><title>cluster_6</title>
+<polygon fill="none" stroke="black" points="567,-3087 567,-3175 1071,-3175 1071,-3087 567,-3087"/>
+</g>
+<g id="graph20" class="cluster"><title>cluster_9</title>
+<polygon fill="none" stroke="black" points="433,-3439 433,-3527 937,-3527 937,-3439 433,-3439"/>
+</g>
+<g id="graph21" class="cluster"><title>cluster_8</title>
+<polygon fill="none" stroke="black" points="299,-4980 299,-5068 803,-5068 803,-4980 299,-4980"/>
+</g>
+<g id="graph22" class="cluster"><title>cluster_11</title>
+<polygon fill="none" stroke="black" points="294,-616 294,-704 808,-704 808,-616 294,-616"/>
+</g>
+<g id="graph23" class="cluster"><title>cluster_10</title>
+<polygon fill="none" stroke="black" points="428,-2991 428,-3079 942,-3079 942,-2991 428,-2991"/>
+</g>
+<g id="graph24" class="cluster"><title>cluster_13</title>
+<polygon fill="none" stroke="black" points="294,-424 294,-512 808,-512 808,-424 294,-424"/>
+</g>
+<g id="graph25" class="cluster"><title>cluster_12</title>
+<polygon fill="none" stroke="black" points="294,-520 294,-608 808,-608 808,-520 294,-520"/>
+</g>
+<g id="graph26" class="cluster"><title>cluster_15</title>
+<polygon fill="none" stroke="black" points="294,-232 294,-320 808,-320 808,-232 294,-232"/>
+</g>
+<g id="graph27" class="cluster"><title>cluster_14</title>
+<polygon fill="none" stroke="black" points="294,-328 294,-416 808,-416 808,-328 294,-328"/>
+</g>
+<g id="graph28" class="cluster"><title>cluster_17</title>
+<polygon fill="none" stroke="black" points="294,-3904 294,-3992 808,-3992 808,-3904 294,-3904"/>
+</g>
+<g id="graph29" class="cluster"><title>cluster_16</title>
+<polygon fill="none" stroke="black" points="294,-104 294,-192 808,-192 808,-104 294,-104"/>
+</g>
+<g id="graph30" class="cluster"><title>cluster_19</title>
+<polygon fill="none" stroke="black" points="8,-5172 8,-5260 540,-5260 540,-5172 8,-5172"/>
+</g>
+<g id="graph31" class="cluster"><title>cluster_18</title>
+<polygon fill="none" stroke="black" points="160,-4419 160,-4507 674,-4507 674,-4419 160,-4419"/>
+</g>
+<g id="graph32" class="cluster"><title>cluster_31</title>
+<polygon fill="none" stroke="black" points="562,-1688 562,-1776 1076,-1776 1076,-1688 562,-1688"/>
+</g>
+<g id="graph33" class="cluster"><title>cluster_30</title>
+<polygon fill="none" stroke="black" points="696,-2096 696,-2184 1210,-2184 1210,-2096 696,-2096"/>
+</g>
+<g id="graph34" class="cluster"><title>cluster_37</title>
+<polygon fill="none" stroke="black" points="964,-616 964,-704 1478,-704 1478,-616 964,-616"/>
+</g>
+<g id="graph35" class="cluster"><title>cluster_36</title>
+<polygon fill="none" stroke="black" points="830,-1030 830,-1118 1344,-1118 1344,-1030 830,-1030"/>
+</g>
+<g id="graph36" class="cluster"><title>cluster_35</title>
+<polygon fill="none" stroke="black" points="428,-1592 428,-1680 942,-1680 942,-1592 428,-1592"/>
+</g>
+<g id="graph37" class="cluster"><title>cluster_34</title>
+<polygon fill="none" stroke="black" points="562,-712 562,-800 1076,-800 1076,-712 562,-712"/>
+</g>
+<g id="graph38" class="cluster"><title>cluster_33</title>
+<polygon fill="none" stroke="black" points="428,-1496 428,-1584 942,-1584 942,-1496 428,-1496"/>
+</g>
+<g id="graph39" class="cluster"><title>cluster_32</title>
+<polygon fill="none" stroke="black" points="562,-1930 562,-2018 1076,-2018 1076,-1930 562,-1930"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="64" cy="-2084" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="216" cy="-2084" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M66.1487,-2084C81.1775,-2084 170.918,-2084 203.873,-2084"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="204.136,-2087.5 214.136,-2084 204.136,-2080.5 204.136,-2087.5"/>
+<text text-anchor="middle" x="140" y="-2089.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="64" cy="-2043" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="216" cy="-2043" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M66.1487,-2043C81.1775,-2043 170.918,-2043 203.873,-2043"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="204.136,-2046.5 214.136,-2043 204.136,-2039.5 204.136,-2046.5"/>
+<text text-anchor="middle" x="140" y="-2048.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="64" cy="-2002" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="216" cy="-2002" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M66.1487,-2002C81.1775,-2002 170.918,-2002 203.873,-2002"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="204.136,-2005.5 214.136,-2002 204.136,-1998.5 204.136,-2005.5"/>
+<text text-anchor="middle" x="140" y="-2007.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="64" cy="-1961" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="216" cy="-1961" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M66.1487,-1961C81.1775,-1961 170.918,-1961 203.873,-1961"/>
+<polygon fill="#218559" stroke="#218559" points="204.136,-1964.5 214.136,-1961 204.136,-1957.5 204.136,-1964.5"/>
+<text text-anchor="middle" x="140" y="-1966.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 24,1 -->
+<g id="node15" class="node"><title>24,1</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-5120" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-5131.17" font-family="Times Roman,serif" font-size="10.00">24,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-5112 459,-5126 510,-5126 510,-5112 459,-5112"/>
+<text text-anchor="start" x="467.5" y="-5116.67" font-family="Times Roman,serif" font-size="10.00">TGAAA</text>
+<polygon fill="#218559" stroke="#218559" points="459,-5098 459,-5112 510,-5112 510,-5098 459,-5098"/>
+<text text-anchor="start" x="470" y="-5102.67" font-family="Times Roman,serif" font-size="10.00">TTTCA</text>
+</g>
+<!-- 24,2 -->
+<g id="node16" class="node"><title>24,2</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-5120" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-5131.17" font-family="Times Roman,serif" font-size="10.00">24,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-5112 593,-5126 644,-5126 644,-5112 593,-5112"/>
+<text text-anchor="start" x="601" y="-5116.67" font-family="Times Roman,serif" font-size="10.00">GAAAC</text>
+<polygon fill="#218559" stroke="#218559" points="593,-5098 593,-5112 644,-5112 644,-5098 593,-5098"/>
+<text text-anchor="start" x="604" y="-5102.67" font-family="Times Roman,serif" font-size="10.00">GTTTC</text>
+</g>
+<!-- 24,1->24,2 -->
+<g id="edge280" class="edge"><title>24,1->24,2</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-5113.53C540.501,-5113.25 550.748,-5113.18 560.703,-5113.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-5116.82 570.933,-5113.52 561.007,-5109.82 560.864,-5116.82"/>
+</g>
+<!-- 23,3 -->
+<g id="node52" class="node"><title>23,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-4725" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-4736.17" font-family="Times Roman,serif" font-size="10.00">23,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-4717 593,-4731 644,-4731 644,-4717 593,-4717"/>
+<text text-anchor="start" x="603.5" y="-4721.67" font-family="Times Roman,serif" font-size="10.00">TTCAC</text>
+<polygon fill="#218559" stroke="#218559" points="593,-4703 593,-4717 644,-4717 644,-4703 593,-4703"/>
+<text text-anchor="start" x="601.5" y="-4707.67" font-family="Times Roman,serif" font-size="10.00">GTGAA</text>
+</g>
+<!-- 24,1->23,3 -->
+<g id="edge282" class="edge"><title>24,1->23,3</title>
+<path fill="none" stroke="#06a2cb" d="M515.6,-5092.91C528.303,-5082.03 540,-5072 540,-5072 540,-5072 586.676,-4864.35 607.814,-4770.32"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="611.241,-4771.03 610.019,-4760.5 604.411,-4769.49 611.241,-4771.03"/>
+</g>
+<!-- 7,2 -->
+<g id="node90" class="node"><title>7,2</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-4629" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="598.5" y="-4640.17" font-family="Times Roman,serif" font-size="10.00">7,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="596,-4621 596,-4635 641,-4635 641,-4621 596,-4621"/>
+<text text-anchor="start" x="603" y="-4625.67" font-family="Times Roman,serif" font-size="10.00">TTCAG</text>
+<polygon fill="#218559" stroke="#218559" points="596,-4607 596,-4621 641,-4621 641,-4607 596,-4607"/>
+<text text-anchor="start" x="601.5" y="-4611.67" font-family="Times Roman,serif" font-size="10.00">CTGAA</text>
+</g>
+<!-- 24,1->7,2 -->
+<g id="edge286" class="edge"><title>24,1->7,2</title>
+<path fill="none" stroke="#06a2cb" d="M515.6,-5092.91C528.303,-5082.03 540,-5072 540,-5072 540,-5072 552,-4898 552,-4898 552,-4898 562,-4677 562,-4677 562,-4677 569.944,-4670.19 579.849,-4661.7"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="582.391,-4664.13 587.705,-4654.97 577.835,-4658.82 582.391,-4664.13"/>
+</g>
+<!-- 8,1 -->
+<g id="node104" class="node"><title>8,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-5024" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="330.5" y="-5035.17" font-family="Times Roman,serif" font-size="10.00">8,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="328,-5016 328,-5030 373,-5030 373,-5016 328,-5016"/>
+<text text-anchor="start" x="333.5" y="-5020.67" font-family="Times Roman,serif" font-size="10.00">CTGAA</text>
+<polygon fill="#218559" stroke="#218559" points="328,-5002 328,-5016 373,-5016 373,-5002 328,-5002"/>
+<text text-anchor="start" x="335" y="-5006.67" font-family="Times Roman,serif" font-size="10.00">TTCAG</text>
+</g>
+<!-- 24,1->8,1 -->
+<g id="edge288" class="edge"><title>24,1->8,1</title>
+<path fill="none" stroke="#218559" d="M445.233,-5098.91C429.962,-5090.6 416,-5083 416,-5083 416,-5083 402.293,-5070.75 387.372,-5057.41"/>
+<polygon fill="#218559" stroke="#218559" points="389.313,-5054.45 379.525,-5050.39 384.648,-5059.67 389.313,-5054.45"/>
+</g>
+<!-- 17,1 -->
+<g id="node139" class="node"><title>17,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-3948" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-3959.17" font-family="Times Roman,serif" font-size="10.00">17,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-3940 325,-3954 376,-3954 376,-3940 325,-3940"/>
+<text text-anchor="start" x="333" y="-3944.67" font-family="Times Roman,serif" font-size="10.00">GAAAC</text>
+<polygon fill="#218559" stroke="#218559" points="325,-3926 325,-3940 376,-3940 376,-3926 325,-3926"/>
+<text text-anchor="start" x="336" y="-3930.67" font-family="Times Roman,serif" font-size="10.00">GTTTC</text>
+</g>
+<!-- 24,1->17,1 -->
+<g id="edge278" class="edge"><title>24,1->17,1</title>
+<path fill="none" stroke="#dd1e2f" d="M452.4,-5092.91C439.697,-5082.03 428,-5072 428,-5072 428,-5072 418,-3850 418,-3850 418,-3850 416,-3850 416,-3850 416,-3850 395.53,-3880.4 377.39,-3907.33"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="374.294,-3905.66 371.611,-3915.91 380.1,-3909.57 374.294,-3905.66"/>
+</g>
+<!-- 19,3 -->
+<g id="node151" class="node"><title>19,3</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-5216" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-5227.17" font-family="Times Roman,serif" font-size="10.00">19,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-5208 325,-5222 376,-5222 376,-5208 325,-5208"/>
+<text text-anchor="start" x="334.5" y="-5212.67" font-family="Times Roman,serif" font-size="10.00">TTGAA</text>
+<polygon fill="#218559" stroke="#218559" points="325,-5194 325,-5208 376,-5208 376,-5194 325,-5194"/>
+<text text-anchor="start" x="335" y="-5198.67" font-family="Times Roman,serif" font-size="10.00">TTCAA</text>
+</g>
+<!-- 24,1->19,3 -->
+<g id="edge290" class="edge"><title>24,1->19,3</title>
+<path fill="none" stroke="#218559" d="M449.317,-5144.85C432.127,-5157.16 411.205,-5172.15 393.005,-5185.19"/>
+<polygon fill="#218559" stroke="#218559" points="390.948,-5182.36 384.857,-5191.03 395.025,-5188.05 390.948,-5182.36"/>
+</g>
+<!-- 18,4 -->
+<g id="node157" class="node"><title>18,4</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-4463" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-4474.17" font-family="Times Roman,serif" font-size="10.00">18,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-4455 593,-4469 644,-4469 644,-4455 593,-4455"/>
+<text text-anchor="start" x="603" y="-4459.67" font-family="Times Roman,serif" font-size="10.00">TTCAA</text>
+<polygon fill="#218559" stroke="#218559" points="593,-4441 593,-4455 644,-4455 644,-4441 593,-4441"/>
+<text text-anchor="start" x="602.5" y="-4445.67" font-family="Times Roman,serif" font-size="10.00">TTGAA</text>
+</g>
+<!-- 24,1->18,4 -->
+<g id="edge284" class="edge"><title>24,1->18,4</title>
+<path fill="none" stroke="#06a2cb" d="M515.6,-5092.91C528.303,-5082.03 540,-5072 540,-5072 540,-5072 550,-4876 550,-4876 550,-4876 552,-4843 552,-4843 552,-4843 562,-4581 562,-4581 562,-4581 581.437,-4540.04 597.409,-4506.39"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="600.677,-4507.67 601.802,-4497.13 594.353,-4504.66 600.677,-4507.67"/>
+</g>
+<!-- 24,2->24,1 -->
+<g id="edge300" class="edge"><title>24,2->24,1</title>
+<path fill="none" stroke="#218559" d="M570.933,-5126.48C561.29,-5126.75 551.041,-5126.82 541.091,-5126.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-5123.18 530.867,-5126.47 540.792,-5130.18 540.937,-5123.18"/>
+</g>
+<!-- 24,3 -->
+<g id="node17" class="node"><title>24,3</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-5120" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-5131.17" font-family="Times Roman,serif" font-size="10.00">24,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-5112 727,-5126 778,-5126 778,-5112 727,-5112"/>
+<text text-anchor="start" x="736" y="-5116.67" font-family="Times Roman,serif" font-size="10.00">AAACT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-5098 727,-5112 778,-5112 778,-5098 727,-5098"/>
+<text text-anchor="start" x="737.5" y="-5102.67" font-family="Times Roman,serif" font-size="10.00">AGTTT</text>
+</g>
+<!-- 24,2->24,3 -->
+<g id="edge294" class="edge"><title>24,2->24,3</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-5113.53C674.501,-5113.25 684.748,-5113.18 694.703,-5113.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-5116.82 704.933,-5113.52 695.007,-5109.82 694.864,-5116.82"/>
+</g>
+<!-- 25,1 -->
+<g id="node20" class="node"><title>25,1</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-1170" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-1181.17" font-family="Times Roman,serif" font-size="10.00">25,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-1162 593,-1176 644,-1176 644,-1162 593,-1162"/>
+<text text-anchor="start" x="602" y="-1166.67" font-family="Times Roman,serif" font-size="10.00">AAACT</text>
+<polygon fill="#218559" stroke="#218559" points="593,-1148 593,-1162 644,-1162 644,-1148 593,-1148"/>
+<text text-anchor="start" x="603.5" y="-1152.67" font-family="Times Roman,serif" font-size="10.00">AGTTT</text>
+</g>
+<!-- 24,2->25,1 -->
+<g id="edge292" class="edge"><title>24,2->25,1</title>
+<path fill="none" stroke="#dd1e2f" d="M586.07,-5092.84C568.813,-5078.15 551,-5063 551,-5063 551,-5063 551,-1501 551,-1501 551,-1501 590.35,-1306.6 608.737,-1215.76"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="612.213,-1216.23 610.766,-1205.74 605.352,-1214.84 612.213,-1216.23"/>
+</g>
+<!-- 7,1 -->
+<g id="node89" class="node"><title>7,1</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-4629" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="464.5" y="-4640.17" font-family="Times Roman,serif" font-size="10.00">7,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="462,-4621 462,-4635 507,-4635 507,-4621 462,-4621"/>
+<text text-anchor="start" x="470" y="-4625.67" font-family="Times Roman,serif" font-size="10.00">TTTCA</text>
+<polygon fill="#218559" stroke="#218559" points="462,-4607 462,-4621 507,-4621 507,-4607 462,-4607"/>
+<text text-anchor="start" x="467.5" y="-4611.67" font-family="Times Roman,serif" font-size="10.00">TGAAA</text>
+</g>
+<!-- 24,2->7,1 -->
+<g id="edge296" class="edge"><title>24,2->7,1</title>
+<path fill="none" stroke="#06a2cb" d="M586.4,-5092.91C573.697,-5082.03 562,-5072 562,-5072 562,-5072 552,-4365 552,-4365 552,-4365 550,-4365 550,-4365 550,-4365 540,-4579 540,-4579 540,-4579 531.629,-4586.47 521.36,-4595.64"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="518.768,-4593.26 513.64,-4602.54 523.43,-4598.49 518.768,-4593.26"/>
+</g>
+<!-- 19,4 -->
+<g id="node152" class="node"><title>19,4</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-5216" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-5227.17" font-family="Times Roman,serif" font-size="10.00">19,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-5208 459,-5222 510,-5222 510,-5208 459,-5208"/>
+<text text-anchor="start" x="467.5" y="-5212.67" font-family="Times Roman,serif" font-size="10.00">TGAAA</text>
+<polygon fill="#218559" stroke="#218559" points="459,-5194 459,-5208 510,-5208 510,-5194 459,-5194"/>
+<text text-anchor="start" x="470" y="-5198.67" font-family="Times Roman,serif" font-size="10.00">TTTCA</text>
+</g>
+<!-- 24,2->19,4 -->
+<g id="edge298" class="edge"><title>24,2->19,4</title>
+<path fill="none" stroke="#218559" d="M599.965,-5153.61C580.424,-5190.03 552,-5243 552,-5243 552,-5243 550,-5243 550,-5243 550,-5243 544,-5240.55 535.618,-5237.12"/>
+<polygon fill="#218559" stroke="#218559" points="536.753,-5233.8 526.172,-5233.25 534.103,-5240.28 536.753,-5233.8"/>
+</g>
+<!-- 24,3->24,2 -->
+<g id="edge306" class="edge"><title>24,3->24,2</title>
+<path fill="none" stroke="#218559" d="M704.933,-5126.48C695.29,-5126.75 685.041,-5126.82 675.091,-5126.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-5123.18 664.867,-5126.47 674.792,-5130.18 674.937,-5123.18"/>
+</g>
+<!-- 24,4 -->
+<g id="node18" class="node"><title>24,4</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-5120" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-5131.17" font-family="Times Roman,serif" font-size="10.00">24,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-5112 861,-5126 912,-5126 912,-5112 861,-5112"/>
+<text text-anchor="start" x="870.5" y="-5116.67" font-family="Times Roman,serif" font-size="10.00">AACTA</text>
+<polygon fill="#218559" stroke="#218559" points="861,-5098 861,-5112 912,-5112 912,-5098 861,-5098"/>
+<text text-anchor="start" x="872" y="-5102.67" font-family="Times Roman,serif" font-size="10.00">TAGTT</text>
+</g>
+<!-- 24,3->24,4 -->
+<g id="edge302" class="edge"><title>24,3->24,4</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-5113.53C808.501,-5113.25 818.748,-5113.18 828.703,-5113.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-5116.82 838.933,-5113.52 829.007,-5109.82 828.864,-5116.82"/>
+</g>
+<!-- 23,1 -->
+<g id="node50" class="node"><title>23,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-4725" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-4736.17" font-family="Times Roman,serif" font-size="10.00">23,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-4717 325,-4731 376,-4731 376,-4717 325,-4717"/>
+<text text-anchor="start" x="336" y="-4721.67" font-family="Times Roman,serif" font-size="10.00">GTTTC</text>
+<polygon fill="#218559" stroke="#218559" points="325,-4703 325,-4717 376,-4717 376,-4703 325,-4703"/>
+<text text-anchor="start" x="333" y="-4707.67" font-family="Times Roman,serif" font-size="10.00">GAAAC</text>
+</g>
+<!-- 24,3->23,1 -->
+<g id="edge304" class="edge"><title>24,3->23,1</title>
+<path fill="none" stroke="#06a2cb" d="M720.4,-5092.91C707.697,-5082.03 696,-5072 696,-5072 696,-5072 674,-3900 674,-3900 674,-3900 562,-3900 562,-3900 562,-3900 540,-4026 540,-4026 540,-4026 428,-4066 428,-4066 428,-4066 418,-4240 418,-4240 418,-4240 398,-4511 398,-4511 398,-4511 374.399,-4616.22 360.296,-4679.1"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="356.808,-4678.66 358.034,-4689.18 363.638,-4680.19 356.808,-4678.66"/>
+</g>
+<!-- 24,4->24,3 -->
+<g id="edge310" class="edge"><title>24,4->24,3</title>
+<path fill="none" stroke="#218559" d="M838.933,-5126.48C829.29,-5126.75 819.041,-5126.82 809.091,-5126.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-5123.18 798.867,-5126.47 808.792,-5130.18 808.937,-5123.18"/>
+</g>
+<!-- 25,3 -->
+<g id="node22" class="node"><title>25,3</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-1170" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-1181.17" font-family="Times Roman,serif" font-size="10.00">25,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-1162 861,-1176 912,-1176 912,-1162 861,-1162"/>
+<text text-anchor="start" x="871.5" y="-1166.67" font-family="Times Roman,serif" font-size="10.00">ACTAT</text>
+<polygon fill="#218559" stroke="#218559" points="861,-1148 861,-1162 912,-1162 912,-1148 861,-1148"/>
+<text text-anchor="start" x="871.5" y="-1152.67" font-family="Times Roman,serif" font-size="10.00">ATAGT</text>
+</g>
+<!-- 24,4->25,3 -->
+<g id="edge308" class="edge"><title>24,4->25,3</title>
+<path fill="none" stroke="#dd1e2f" d="M880.687,-5084.15C864.84,-4977.25 819,-4668 819,-4668 819,-4668 819,-1405 819,-1405 819,-1405 853.684,-1283.35 873.142,-1215.1"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="876.521,-1216.01 875.897,-1205.44 869.789,-1214.09 876.521,-1216.01"/>
+</g>
+<!-- 25,1->24,2 -->
+<g id="edge716" class="edge"><title>25,1->24,2</title>
+<path fill="none" stroke="#218559" d="M610.766,-1205.74C593.441,-1291.33 551,-1501 551,-1501 551,-1501 551,-5063 551,-5063 551,-5063 563.917,-5073.99 578.409,-5086.32"/>
+<polygon fill="#218559" stroke="#218559" points="576.185,-5089.02 586.07,-5092.84 580.721,-5083.69 576.185,-5089.02"/>
+</g>
+<!-- 25,2 -->
+<g id="node21" class="node"><title>25,2</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-1170" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-1181.17" font-family="Times Roman,serif" font-size="10.00">25,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-1162 727,-1176 778,-1176 778,-1162 727,-1162"/>
+<text text-anchor="start" x="736.5" y="-1166.67" font-family="Times Roman,serif" font-size="10.00">AACTA</text>
+<polygon fill="#218559" stroke="#218559" points="727,-1148 727,-1162 778,-1162 778,-1148 727,-1148"/>
+<text text-anchor="start" x="738" y="-1152.67" font-family="Times Roman,serif" font-size="10.00">TAGTT</text>
+</g>
+<!-- 25,1->25,2 -->
+<g id="edge714" class="edge"><title>25,1->25,2</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-1163.53C674.501,-1163.25 684.748,-1163.18 694.703,-1163.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-1166.82 704.933,-1163.52 695.007,-1159.82 694.864,-1166.82"/>
+</g>
+<!-- 25,2->25,1 -->
+<g id="edge722" class="edge"><title>25,2->25,1</title>
+<path fill="none" stroke="#218559" d="M704.933,-1176.48C695.29,-1176.75 685.041,-1176.82 675.091,-1176.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-1173.18 664.867,-1176.47 674.792,-1180.18 674.937,-1173.18"/>
+</g>
+<!-- 25,2->25,3 -->
+<g id="edge718" class="edge"><title>25,2->25,3</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-1163.53C808.501,-1163.25 818.748,-1163.18 828.703,-1163.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-1166.82 838.933,-1163.52 829.007,-1159.82 828.864,-1166.82"/>
+</g>
+<!-- 26,4 -->
+<g id="node28" class="node"><title>26,4</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-2402" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-2413.17" font-family="Times Roman,serif" font-size="10.00">26,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-2394 861,-2408 912,-2408 912,-2394 861,-2394"/>
+<text text-anchor="start" x="871.5" y="-2398.67" font-family="Times Roman,serif" font-size="10.00">ATAGT</text>
+<polygon fill="#218559" stroke="#218559" points="861,-2380 861,-2394 912,-2394 912,-2380 861,-2380"/>
+<text text-anchor="start" x="871.5" y="-2384.67" font-family="Times Roman,serif" font-size="10.00">ACTAT</text>
+</g>
+<!-- 25,2->26,4 -->
+<g id="edge720" class="edge"><title>25,2->26,4</title>
+<path fill="none" stroke="#ebb035" d="M758.261,-1206C772.874,-1290.02 808,-1492 808,-1492 808,-1492 820,-1894 820,-1894 820,-1894 830,-2354 830,-2354 830,-2354 837.283,-2360.24 846.596,-2368.22"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="844.529,-2371.06 854.4,-2374.91 849.085,-2365.75 844.529,-2371.06"/>
+</g>
+<!-- 25,3->24,4 -->
+<g id="edge726" class="edge"><title>25,3->24,4</title>
+<path fill="none" stroke="#218559" d="M875.897,-1205.44C857.18,-1271.09 819,-1405 819,-1405 819,-1405 819,-4668 819,-4668 819,-4668 862.193,-4959.39 879.219,-5074.25"/>
+<polygon fill="#218559" stroke="#218559" points="875.758,-5074.78 880.687,-5084.15 882.682,-5073.75 875.758,-5074.78"/>
+</g>
+<!-- 25,3->25,2 -->
+<g id="edge728" class="edge"><title>25,3->25,2</title>
+<path fill="none" stroke="#218559" d="M838.933,-1176.48C829.29,-1176.75 819.041,-1176.82 809.091,-1176.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-1173.18 798.867,-1176.47 808.792,-1180.18 808.937,-1173.18"/>
+</g>
+<!-- 25,4 -->
+<g id="node23" class="node"><title>25,4</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-1170" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="997.5" y="-1181.17" font-family="Times Roman,serif" font-size="10.00">25,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="995,-1162 995,-1176 1046,-1176 1046,-1162 995,-1162"/>
+<text text-anchor="start" x="1006.5" y="-1166.67" font-family="Times Roman,serif" font-size="10.00">CTATT</text>
+<polygon fill="#218559" stroke="#218559" points="995,-1148 995,-1162 1046,-1162 1046,-1148 995,-1148"/>
+<text text-anchor="start" x="1004.5" y="-1152.67" font-family="Times Roman,serif" font-size="10.00">AATAG</text>
+</g>
+<!-- 25,3->25,4 -->
+<g id="edge724" class="edge"><title>25,3->25,4</title>
+<path fill="none" stroke="#dd1e2f" d="M932.867,-1163.53C942.501,-1163.25 952.748,-1163.18 962.703,-1163.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="962.864,-1166.82 972.933,-1163.52 963.007,-1159.82 962.864,-1166.82"/>
+</g>
+<!-- 25,4->25,3 -->
+<g id="edge732" class="edge"><title>25,4->25,3</title>
+<path fill="none" stroke="#218559" d="M972.933,-1176.48C963.29,-1176.75 953.041,-1176.82 943.091,-1176.68"/>
+<polygon fill="#218559" stroke="#218559" points="942.937,-1173.18 932.867,-1176.47 942.792,-1180.18 942.937,-1173.18"/>
+</g>
+<!-- 26,2 -->
+<g id="node26" class="node"><title>26,2</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-2402" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-2413.17" font-family="Times Roman,serif" font-size="10.00">26,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-2394 593,-2408 644,-2408 644,-2394 593,-2394"/>
+<text text-anchor="start" x="604" y="-2398.67" font-family="Times Roman,serif" font-size="10.00">TAATA</text>
+<polygon fill="#218559" stroke="#218559" points="593,-2380 593,-2394 644,-2394 644,-2380 593,-2380"/>
+<text text-anchor="start" x="605" y="-2384.67" font-family="Times Roman,serif" font-size="10.00">TATTA</text>
+</g>
+<!-- 25,4->26,2 -->
+<g id="edge730" class="edge"><title>25,4->26,2</title>
+<path fill="none" stroke="#ebb035" d="M1007.69,-1205.13C988.09,-1261.04 952,-1364 952,-1364 952,-1364 942,-1814 942,-1814 942,-1814 830,-1848 830,-1848 830,-1848 818,-1892 818,-1892 818,-1892 808,-2054 808,-2054 808,-2054 696,-2092 696,-2092 696,-2092 651.064,-2270.59 629.406,-2356.67"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="625.994,-2355.89 626.948,-2366.44 632.782,-2357.6 625.994,-2355.89"/>
+</g>
+<!-- 26,1 -->
+<g id="node25" class="node"><title>26,1</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-2402" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-2413.17" font-family="Times Roman,serif" font-size="10.00">26,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-2394 459,-2408 510,-2408 510,-2394 459,-2394"/>
+<text text-anchor="start" x="469.5" y="-2398.67" font-family="Times Roman,serif" font-size="10.00">GTAAT</text>
+<polygon fill="#218559" stroke="#218559" points="459,-2380 459,-2394 510,-2394 510,-2380 459,-2380"/>
+<text text-anchor="start" x="470" y="-2384.67" font-family="Times Roman,serif" font-size="10.00">ATTAC</text>
+</g>
+<!-- 26,1->26,2 -->
+<g id="edge312" class="edge"><title>26,1->26,2</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-2395.53C540.501,-2395.25 550.748,-2395.18 560.703,-2395.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-2398.82 570.933,-2395.52 561.007,-2391.82 560.864,-2398.82"/>
+</g>
+<!-- 27,3 -->
+<g id="node32" class="node"><title>27,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-2836" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-2847.17" font-family="Times Roman,serif" font-size="10.00">27,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-2828 593,-2842 644,-2842 644,-2828 593,-2828"/>
+<text text-anchor="start" x="603" y="-2832.67" font-family="Times Roman,serif" font-size="10.00">TTACG</text>
+<polygon fill="#218559" stroke="#218559" points="593,-2814 593,-2828 644,-2828 644,-2814 593,-2814"/>
+<text text-anchor="start" x="602" y="-2818.67" font-family="Times Roman,serif" font-size="10.00">CGTAA</text>
+</g>
+<!-- 26,1->27,3 -->
+<g id="edge314" class="edge"><title>26,1->27,3</title>
+<path fill="none" stroke="#06a2cb" d="M491.198,-2437.62C510.508,-2533.18 562,-2788 562,-2788 562,-2788 569.283,-2794.24 578.596,-2802.22"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="576.529,-2805.06 586.4,-2808.91 581.085,-2799.75 576.529,-2805.06"/>
+</g>
+<!-- 26,2->25,4 -->
+<g id="edge318" class="edge"><title>26,2->25,4</title>
+<path fill="none" stroke="#ebb035" d="M624.938,-2366.26C641.884,-2278.96 684,-2062 684,-2062 684,-2062 808,-2022 808,-2022 808,-2022 818,-1826 818,-1826 818,-1826 830,-1820 830,-1820 830,-1820 942,-1780 942,-1780 942,-1780 952,-1268 952,-1268 952,-1268 973.274,-1237.34 992.023,-1210.32"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="994.909,-1212.3 997.734,-1202.09 989.158,-1208.31 994.909,-1212.3"/>
+</g>
+<!-- 26,2->26,1 -->
+<g id="edge320" class="edge"><title>26,2->26,1</title>
+<path fill="none" stroke="#218559" d="M570.933,-2408.48C561.29,-2408.75 551.041,-2408.82 541.091,-2408.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-2405.18 530.867,-2408.47 540.792,-2412.18 540.937,-2405.18"/>
+</g>
+<!-- 26,3 -->
+<g id="node27" class="node"><title>26,3</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-2402" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-2413.17" font-family="Times Roman,serif" font-size="10.00">26,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-2394 727,-2408 778,-2408 778,-2394 727,-2394"/>
+<text text-anchor="start" x="736.5" y="-2398.67" font-family="Times Roman,serif" font-size="10.00">AATAG</text>
+<polygon fill="#218559" stroke="#218559" points="727,-2380 727,-2394 778,-2394 778,-2380 727,-2380"/>
+<text text-anchor="start" x="738.5" y="-2384.67" font-family="Times Roman,serif" font-size="10.00">CTATT</text>
+</g>
+<!-- 26,2->26,3 -->
+<g id="edge316" class="edge"><title>26,2->26,3</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-2395.53C674.501,-2395.25 684.748,-2395.18 694.703,-2395.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-2398.82 704.933,-2395.52 695.007,-2391.82 694.864,-2398.82"/>
+</g>
+<!-- 26,3->26,2 -->
+<g id="edge326" class="edge"><title>26,3->26,2</title>
+<path fill="none" stroke="#218559" d="M704.933,-2408.48C695.29,-2408.75 685.041,-2408.82 675.091,-2408.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-2405.18 664.867,-2408.47 674.792,-2412.18 674.937,-2405.18"/>
+</g>
+<!-- 26,3->26,4 -->
+<g id="edge322" class="edge"><title>26,3->26,4</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-2395.53C808.501,-2395.25 818.748,-2395.18 828.703,-2395.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-2398.82 838.933,-2395.52 829.007,-2391.82 828.864,-2398.82"/>
+</g>
+<!-- 27,1 -->
+<g id="node30" class="node"><title>27,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-2836" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-2847.17" font-family="Times Roman,serif" font-size="10.00">27,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-2828 325,-2842 376,-2842 376,-2828 325,-2828"/>
+<text text-anchor="start" x="337" y="-2832.67" font-family="Times Roman,serif" font-size="10.00">TATTA</text>
+<polygon fill="#218559" stroke="#218559" points="325,-2814 325,-2828 376,-2828 376,-2814 325,-2814"/>
+<text text-anchor="start" x="336" y="-2818.67" font-family="Times Roman,serif" font-size="10.00">TAATA</text>
+</g>
+<!-- 26,3->27,1 -->
+<g id="edge324" class="edge"><title>26,3->27,1</title>
+<path fill="none" stroke="#06a2cb" d="M720.4,-2429.09C707.697,-2439.97 696,-2450 696,-2450 696,-2450 686,-2566 686,-2566 686,-2566 480.412,-2731.2 390.866,-2803.16"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="388.432,-2800.63 382.829,-2809.62 392.816,-2806.08 388.432,-2800.63"/>
+</g>
+<!-- 26,4->25,2 -->
+<g id="edge328" class="edge"><title>26,4->25,2</title>
+<path fill="none" stroke="#ebb035" d="M854.4,-2374.91C841.697,-2364.03 830,-2354 830,-2354 830,-2354 808,-1492 808,-1492 808,-1492 775.499,-1305.12 760.017,-1216.1"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="763.423,-1215.25 758.261,-1206 756.526,-1216.45 763.423,-1215.25"/>
+</g>
+<!-- 26,4->26,3 -->
+<g id="edge330" class="edge"><title>26,4->26,3</title>
+<path fill="none" stroke="#218559" d="M838.933,-2408.48C829.29,-2408.75 819.041,-2408.82 809.091,-2408.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-2405.18 798.867,-2408.47 808.792,-2412.18 808.937,-2405.18"/>
+</g>
+<!-- 27,1->26,3 -->
+<g id="edge736" class="edge"><title>27,1->26,3</title>
+<path fill="none" stroke="#06a2cb" d="M366.925,-2802.15C389.865,-2756.27 428,-2680 428,-2680 428,-2680 696,-2450 696,-2450 696,-2450 703.283,-2443.76 712.596,-2435.78"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="715.085,-2438.25 720.4,-2429.09 710.529,-2432.94 715.085,-2438.25"/>
+</g>
+<!-- 27,2 -->
+<g id="node31" class="node"><title>27,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-2836" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-2847.17" font-family="Times Roman,serif" font-size="10.00">27,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-2828 459,-2842 510,-2842 510,-2828 459,-2828"/>
+<text text-anchor="start" x="470" y="-2832.67" font-family="Times Roman,serif" font-size="10.00">ATTAC</text>
+<polygon fill="#218559" stroke="#218559" points="459,-2814 459,-2828 510,-2828 510,-2814 459,-2814"/>
+<text text-anchor="start" x="469.5" y="-2818.67" font-family="Times Roman,serif" font-size="10.00">GTAAT</text>
+</g>
+<!-- 27,1->27,2 -->
+<g id="edge734" class="edge"><title>27,1->27,2</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-2829.53C406.501,-2829.25 416.748,-2829.18 426.703,-2829.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-2832.82 436.933,-2829.52 427.007,-2825.82 426.864,-2832.82"/>
+</g>
+<!-- 27,2->27,1 -->
+<g id="edge742" class="edge"><title>27,2->27,1</title>
+<path fill="none" stroke="#218559" d="M436.933,-2842.48C427.29,-2842.75 417.041,-2842.82 407.091,-2842.68"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-2839.18 396.867,-2842.47 406.792,-2846.18 406.937,-2839.18"/>
+</g>
+<!-- 27,2->27,3 -->
+<g id="edge740" class="edge"><title>27,2->27,3</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-2829.53C540.501,-2829.25 550.748,-2829.18 560.703,-2829.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-2832.82 570.933,-2829.52 561.007,-2825.82 560.864,-2832.82"/>
+</g>
+<!-- 28,1 -->
+<g id="node55" class="node"><title>28,1</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-2740" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-2751.17" font-family="Times Roman,serif" font-size="10.00">28,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-2732 593,-2746 644,-2746 644,-2732 593,-2732"/>
+<text text-anchor="start" x="603" y="-2736.67" font-family="Times Roman,serif" font-size="10.00">TTACG</text>
+<polygon fill="#218559" stroke="#218559" points="593,-2718 593,-2732 644,-2732 644,-2718 593,-2718"/>
+<text text-anchor="start" x="602" y="-2722.67" font-family="Times Roman,serif" font-size="10.00">CGTAA</text>
+</g>
+<!-- 27,2->28,1 -->
+<g id="edge738" class="edge"><title>27,2->28,1</title>
+<path fill="none" stroke="#dd1e2f" d="M523.949,-2816.03C538.751,-2808.62 552,-2802 552,-2802 552,-2802 562,-2788 562,-2788 562,-2788 569.283,-2781.76 578.596,-2773.78"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="581.085,-2776.25 586.4,-2767.09 576.529,-2770.94 581.085,-2776.25"/>
+</g>
+<!-- 27,3->26,1 -->
+<g id="edge748" class="edge"><title>27,3->26,1</title>
+<path fill="none" stroke="#06a2cb" d="M586.4,-2808.91C573.697,-2798.03 562,-2788 562,-2788 562,-2788 550,-2746 550,-2746 550,-2746 510.777,-2541.56 492.805,-2447.89"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="496.198,-2447 490.876,-2437.84 489.323,-2448.32 496.198,-2447"/>
+</g>
+<!-- 27,3->27,2 -->
+<g id="edge750" class="edge"><title>27,3->27,2</title>
+<path fill="none" stroke="#218559" d="M570.933,-2842.48C561.29,-2842.75 551.041,-2842.82 541.091,-2842.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-2839.18 530.867,-2842.47 540.792,-2846.18 540.937,-2839.18"/>
+</g>
+<!-- 27,4 -->
+<g id="node33" class="node"><title>27,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-2836" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-2847.17" font-family="Times Roman,serif" font-size="10.00">27,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-2828 727,-2842 778,-2842 778,-2828 727,-2828"/>
+<text text-anchor="start" x="737" y="-2832.67" font-family="Times Roman,serif" font-size="10.00">TACGT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-2814 727,-2828 778,-2828 778,-2814 727,-2814"/>
+<text text-anchor="start" x="736.5" y="-2818.67" font-family="Times Roman,serif" font-size="10.00">ACGTA</text>
+</g>
+<!-- 27,3->27,4 -->
+<g id="edge746" class="edge"><title>27,3->27,4</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-2829.53C674.501,-2829.25 684.748,-2829.18 694.703,-2829.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-2832.82 704.933,-2829.52 695.007,-2825.82 694.864,-2832.82"/>
+</g>
+<!-- 22,1 -->
+<g id="node45" class="node"><title>22,1</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-3234" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-3245.17" font-family="Times Roman,serif" font-size="10.00">22,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-3226 459,-3240 510,-3240 510,-3226 459,-3226"/>
+<text text-anchor="start" x="469" y="-3230.67" font-family="Times Roman,serif" font-size="10.00">TACGT</text>
+<polygon fill="#218559" stroke="#218559" points="459,-3212 459,-3226 510,-3226 510,-3212 459,-3212"/>
+<text text-anchor="start" x="468.5" y="-3216.67" font-family="Times Roman,serif" font-size="10.00">ACGTA</text>
+</g>
+<!-- 27,3->22,1 -->
+<g id="edge744" class="edge"><title>27,3->22,1</title>
+<path fill="none" stroke="#dd1e2f" d="M605.012,-2871.02C588.469,-2915.63 562,-2987 562,-2987 562,-2987 540,-3083 540,-3083 540,-3083 517.026,-3144.95 500.508,-3189.49"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="497.184,-3188.38 496.988,-3198.98 503.747,-3190.82 497.184,-3188.38"/>
+</g>
+<!-- 27,4->27,3 -->
+<g id="edge758" class="edge"><title>27,4->27,3</title>
+<path fill="none" stroke="#218559" d="M704.933,-2842.48C695.29,-2842.75 685.041,-2842.82 675.091,-2842.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-2839.18 664.867,-2842.47 674.792,-2846.18 674.937,-2839.18"/>
+</g>
+<!-- 21,4 -->
+<g id="node43" class="node"><title>21,4</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-3649" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-3660.17" font-family="Times Roman,serif" font-size="10.00">21,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-3641 861,-3655 912,-3655 912,-3641 861,-3641"/>
+<text text-anchor="start" x="869.5" y="-3645.67" font-family="Times Roman,serif" font-size="10.00">ACGTG</text>
+<polygon fill="#218559" stroke="#218559" points="861,-3627 861,-3641 912,-3641 912,-3627 861,-3627"/>
+<text text-anchor="start" x="870" y="-3631.67" font-family="Times Roman,serif" font-size="10.00">CACGT</text>
+</g>
+<!-- 27,4->21,4 -->
+<g id="edge754" class="edge"><title>27,4->21,4</title>
+<path fill="none" stroke="#dd1e2f" d="M764.988,-2871.02C781.531,-2915.63 808,-2987 808,-2987 808,-2987 820,-3083 820,-3083 820,-3083 830,-3531 830,-3531 830,-3531 849.437,-3571.96 865.409,-3605.61"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="862.353,-3607.34 869.802,-3614.87 868.677,-3604.33 862.353,-3607.34"/>
+</g>
+<!-- 22,2 -->
+<g id="node46" class="node"><title>22,2</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-3234" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-3245.17" font-family="Times Roman,serif" font-size="10.00">22,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-3226 593,-3240 644,-3240 644,-3226 593,-3226"/>
+<text text-anchor="start" x="601.5" y="-3230.67" font-family="Times Roman,serif" font-size="10.00">ACGTG</text>
+<polygon fill="#218559" stroke="#218559" points="593,-3212 593,-3226 644,-3226 644,-3212 593,-3212"/>
+<text text-anchor="start" x="602" y="-3216.67" font-family="Times Roman,serif" font-size="10.00">CACGT</text>
+</g>
+<!-- 27,4->22,2 -->
+<g id="edge756" class="edge"><title>27,4->22,2</title>
+<path fill="none" stroke="#dd1e2f" d="M739.012,-2871.02C722.469,-2915.63 696,-2987 696,-2987 696,-2987 674,-3179 674,-3179 674,-3179 665.159,-3187.68 654.502,-3198.15"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="651.994,-3195.71 647.312,-3205.21 656.899,-3200.7 651.994,-3195.71"/>
+</g>
+<!-- 28,3 -->
+<g id="node57" class="node"><title>28,3</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-2740" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-2751.17" font-family="Times Roman,serif" font-size="10.00">28,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-2732 861,-2746 912,-2746 912,-2732 861,-2732"/>
+<text text-anchor="start" x="870.5" y="-2736.67" font-family="Times Roman,serif" font-size="10.00">ACGTC</text>
+<polygon fill="#218559" stroke="#218559" points="861,-2718 861,-2732 912,-2732 912,-2718 861,-2718"/>
+<text text-anchor="start" x="869.5" y="-2722.67" font-family="Times Roman,serif" font-size="10.00">GACGT</text>
+</g>
+<!-- 27,4->28,3 -->
+<g id="edge752" class="edge"><title>27,4->28,3</title>
+<path fill="none" stroke="#dd1e2f" d="M790.767,-2814.91C806.038,-2806.6 820,-2799 820,-2799 820,-2799 832.918,-2787.45 847.324,-2774.57"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="849.81,-2777.05 854.933,-2767.77 845.145,-2771.83 849.81,-2777.05"/>
+</g>
+<!-- 20,1 -->
+<g id="node35" class="node"><title>20,1</title>
+<ellipse fill="none" stroke="black" cx="216" cy="-4114" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="193.5" y="-4125.17" font-family="Times Roman,serif" font-size="10.00">20,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="191,-4106 191,-4120 242,-4120 242,-4106 191,-4106"/>
+<text text-anchor="start" x="201.5" y="-4110.67" font-family="Times Roman,serif" font-size="10.00">TCAAT</text>
+<polygon fill="#218559" stroke="#218559" points="191,-4092 191,-4106 242,-4106 242,-4092 191,-4092"/>
+<text text-anchor="start" x="201" y="-4096.67" font-family="Times Roman,serif" font-size="10.00">ATTGA</text>
+</g>
+<!-- 20,2 -->
+<g id="node36" class="node"><title>20,2</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-4114" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-4125.17" font-family="Times Roman,serif" font-size="10.00">20,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-4106 325,-4120 376,-4120 376,-4106 325,-4106"/>
+<text text-anchor="start" x="335" y="-4110.67" font-family="Times Roman,serif" font-size="10.00">CAATA</text>
+<polygon fill="#218559" stroke="#218559" points="325,-4092 325,-4106 376,-4106 376,-4092 325,-4092"/>
+<text text-anchor="start" x="336" y="-4096.67" font-family="Times Roman,serif" font-size="10.00">TATTG</text>
+</g>
+<!-- 20,1->20,2 -->
+<g id="edge232" class="edge"><title>20,1->20,2</title>
+<path fill="none" stroke="#dd1e2f" d="M262.867,-4107.53C272.501,-4107.25 282.748,-4107.18 292.703,-4107.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="292.864,-4110.82 302.933,-4107.52 293.007,-4103.82 292.864,-4110.82"/>
+</g>
+<!-- 20,1->19,3 -->
+<g id="edge234" class="edge"><title>20,1->19,3</title>
+<path fill="none" stroke="#06a2cb" d="M237.703,-4146.23C257.603,-4175.79 284,-4215 284,-4215 284,-4215 294,-5072 294,-5072 294,-5072 316.267,-5129.26 332.719,-5171.56"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="329.557,-5173.09 336.444,-5181.14 336.081,-5170.55 329.557,-5173.09"/>
+</g>
+<!-- 20,2->20,1 -->
+<g id="edge240" class="edge"><title>20,2->20,1</title>
+<path fill="none" stroke="#218559" d="M302.933,-4120.48C293.29,-4120.75 283.041,-4120.82 273.091,-4120.68"/>
+<polygon fill="#218559" stroke="#218559" points="272.937,-4117.18 262.867,-4120.47 272.792,-4124.18 272.937,-4117.18"/>
+</g>
+<!-- 20,3 -->
+<g id="node37" class="node"><title>20,3</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-4114" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-4125.17" font-family="Times Roman,serif" font-size="10.00">20,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-4106 459,-4120 510,-4120 510,-4106 459,-4106"/>
+<text text-anchor="start" x="469" y="-4110.67" font-family="Times Roman,serif" font-size="10.00">AATAC</text>
+<polygon fill="#218559" stroke="#218559" points="459,-4092 459,-4106 510,-4106 510,-4092 459,-4092"/>
+<text text-anchor="start" x="470.5" y="-4096.67" font-family="Times Roman,serif" font-size="10.00">GTATT</text>
+</g>
+<!-- 20,2->20,3 -->
+<g id="edge238" class="edge"><title>20,2->20,3</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-4107.53C406.501,-4107.25 416.748,-4107.18 426.703,-4107.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-4110.82 436.933,-4107.52 427.007,-4103.82 426.864,-4110.82"/>
+</g>
+<!-- 21,1 -->
+<g id="node40" class="node"><title>21,1</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-3649" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-3660.17" font-family="Times Roman,serif" font-size="10.00">21,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-3641 459,-3655 510,-3655 510,-3641 459,-3641"/>
+<text text-anchor="start" x="469" y="-3645.67" font-family="Times Roman,serif" font-size="10.00">AATAC</text>
+<polygon fill="#218559" stroke="#218559" points="459,-3627 459,-3641 510,-3641 510,-3627 459,-3627"/>
+<text text-anchor="start" x="470.5" y="-3631.67" font-family="Times Roman,serif" font-size="10.00">GTATT</text>
+</g>
+<!-- 20,2->21,1 -->
+<g id="edge236" class="edge"><title>20,2->21,1</title>
+<path fill="none" stroke="#dd1e2f" d="M364.013,-4079.55C378.18,-4044.72 398,-3996 398,-3996 398,-3996 449.463,-3788.35 472.769,-3694.32"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="476.192,-3695.05 475.201,-3684.5 469.398,-3693.37 476.192,-3695.05"/>
+</g>
+<!-- 20,3->20,2 -->
+<g id="edge246" class="edge"><title>20,3->20,2</title>
+<path fill="none" stroke="#218559" d="M436.933,-4120.48C427.29,-4120.75 417.041,-4120.82 407.091,-4120.68"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-4117.18 396.867,-4120.47 406.792,-4124.18 406.937,-4117.18"/>
+</g>
+<!-- 20,4 -->
+<g id="node38" class="node"><title>20,4</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-4114" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-4125.17" font-family="Times Roman,serif" font-size="10.00">20,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-4106 593,-4120 644,-4120 644,-4106 593,-4106"/>
+<text text-anchor="start" x="602.5" y="-4110.67" font-family="Times Roman,serif" font-size="10.00">ATACG</text>
+<polygon fill="#218559" stroke="#218559" points="593,-4092 593,-4106 644,-4106 644,-4092 593,-4092"/>
+<text text-anchor="start" x="603.5" y="-4096.67" font-family="Times Roman,serif" font-size="10.00">CGTAT</text>
+</g>
+<!-- 20,3->20,4 -->
+<g id="edge242" class="edge"><title>20,3->20,4</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-4107.53C540.501,-4107.25 550.748,-4107.18 560.703,-4107.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-4110.82 570.933,-4107.52 561.007,-4103.82 560.864,-4110.82"/>
+</g>
+<!-- 19,1 -->
+<g id="node149" class="node"><title>19,1</title>
+<ellipse fill="none" stroke="black" cx="64" cy="-5216" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="41.5" y="-5227.17" font-family="Times Roman,serif" font-size="10.00">19,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="39,-5208 39,-5222 90,-5222 90,-5208 39,-5208"/>
+<text text-anchor="start" x="50" y="-5212.67" font-family="Times Roman,serif" font-size="10.00">TATTG</text>
+<polygon fill="#218559" stroke="#218559" points="39,-5194 39,-5208 90,-5208 90,-5194 39,-5194"/>
+<text text-anchor="start" x="49" y="-5198.67" font-family="Times Roman,serif" font-size="10.00">CAATA</text>
+</g>
+<!-- 20,3->19,1 -->
+<g id="edge244" class="edge"><title>20,3->19,1</title>
+<path fill="none" stroke="#06a2cb" d="M476.816,-4149.67C456.146,-4252.3 398,-4541 398,-4541 398,-4541 294,-4677 294,-4677 294,-4677 137.748,-5043.17 82.8196,-5171.9"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="79.4431,-5170.89 78.7374,-5181.46 85.8814,-5173.64 79.4431,-5170.89"/>
+</g>
+<!-- 20,4->20,3 -->
+<g id="edge250" class="edge"><title>20,4->20,3</title>
+<path fill="none" stroke="#218559" d="M570.933,-4120.48C561.29,-4120.75 551.041,-4120.82 541.091,-4120.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-4117.18 530.867,-4120.47 540.792,-4124.18 540.937,-4117.18"/>
+</g>
+<!-- 21,3 -->
+<g id="node42" class="node"><title>21,3</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-3649" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-3660.17" font-family="Times Roman,serif" font-size="10.00">21,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-3641 727,-3655 778,-3655 778,-3641 727,-3641"/>
+<text text-anchor="start" x="737" y="-3645.67" font-family="Times Roman,serif" font-size="10.00">TACGT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-3627 727,-3641 778,-3641 778,-3627 727,-3627"/>
+<text text-anchor="start" x="736.5" y="-3631.67" font-family="Times Roman,serif" font-size="10.00">ACGTA</text>
+</g>
+<!-- 20,4->21,3 -->
+<g id="edge248" class="edge"><title>20,4->21,3</title>
+<path fill="none" stroke="#dd1e2f" d="M634.198,-4079.87C650.736,-4045.02 674,-3996 674,-3996 674,-3996 684,-3750 684,-3750 684,-3750 705.679,-3717.8 724.556,-3689.76"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727.616,-3691.48 730.297,-3681.23 721.809,-3687.58 727.616,-3691.48"/>
+</g>
+<!-- 21,1->20,2 -->
+<g id="edge664" class="edge"><title>21,1->20,2</title>
+<path fill="none" stroke="#218559" d="M475.04,-3684.44C456.575,-3757.49 416,-3918 416,-3918 416,-3918 398,-3996 398,-3996 398,-3996 381.582,-4036.36 367.95,-4069.87"/>
+<polygon fill="#218559" stroke="#218559" points="364.539,-4068.97 364.013,-4079.55 371.023,-4071.61 364.539,-4068.97"/>
+</g>
+<!-- 21,2 -->
+<g id="node41" class="node"><title>21,2</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-3649" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-3660.17" font-family="Times Roman,serif" font-size="10.00">21,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-3641 593,-3655 644,-3655 644,-3641 593,-3641"/>
+<text text-anchor="start" x="602.5" y="-3645.67" font-family="Times Roman,serif" font-size="10.00">ATACG</text>
+<polygon fill="#218559" stroke="#218559" points="593,-3627 593,-3641 644,-3641 644,-3627 593,-3627"/>
+<text text-anchor="start" x="603.5" y="-3631.67" font-family="Times Roman,serif" font-size="10.00">CGTAT</text>
+</g>
+<!-- 21,1->21,2 -->
+<g id="edge662" class="edge"><title>21,1->21,2</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-3642.53C540.501,-3642.25 550.748,-3642.18 560.703,-3642.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-3645.82 570.933,-3642.52 561.007,-3638.82 560.864,-3645.82"/>
+</g>
+<!-- 21,2->21,1 -->
+<g id="edge670" class="edge"><title>21,2->21,1</title>
+<path fill="none" stroke="#218559" d="M570.933,-3655.48C561.29,-3655.75 551.041,-3655.82 541.091,-3655.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-3652.18 530.867,-3655.47 540.792,-3659.18 540.937,-3652.18"/>
+</g>
+<!-- 21,2->21,3 -->
+<g id="edge668" class="edge"><title>21,2->21,3</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-3642.53C674.501,-3642.25 684.748,-3642.18 694.703,-3642.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-3645.82 704.933,-3642.52 695.007,-3638.82 694.864,-3645.82"/>
+</g>
+<!-- 21,2->22,1 -->
+<g id="edge666" class="edge"><title>21,2->22,1</title>
+<path fill="none" stroke="#dd1e2f" d="M601.802,-3614.87C585.264,-3580.02 562,-3531 562,-3531 562,-3531 552,-3362 552,-3362 552,-3362 526.516,-3314.03 506.749,-3276.82"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="509.649,-3274.82 501.866,-3267.63 503.467,-3278.1 509.649,-3274.82"/>
+</g>
+<!-- 21,3->20,4 -->
+<g id="edge674" class="edge"><title>21,3->20,4</title>
+<path fill="none" stroke="#218559" d="M733.956,-3682.43C713.81,-3719.76 684,-3775 684,-3775 684,-3775 674,-3996 674,-3996 674,-3996 654.563,-4036.96 638.591,-4070.61"/>
+<polygon fill="#218559" stroke="#218559" points="635.323,-4069.33 634.198,-4079.87 641.647,-4072.34 635.323,-4069.33"/>
+</g>
+<!-- 21,3->21,2 -->
+<g id="edge676" class="edge"><title>21,3->21,2</title>
+<path fill="none" stroke="#218559" d="M704.933,-3655.48C695.29,-3655.75 685.041,-3655.82 675.091,-3655.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-3652.18 664.867,-3655.47 674.792,-3659.18 674.937,-3652.18"/>
+</g>
+<!-- 21,3->21,4 -->
+<g id="edge672" class="edge"><title>21,3->21,4</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-3642.53C808.501,-3642.25 818.748,-3642.18 828.703,-3642.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-3645.82 838.933,-3642.52 829.007,-3638.82 828.864,-3645.82"/>
+</g>
+<!-- 21,4->27,4 -->
+<g id="edge680" class="edge"><title>21,4->27,4</title>
+<path fill="none" stroke="#218559" d="M869.802,-3614.87C853.264,-3580.02 830,-3531 830,-3531 830,-3531 820,-2915 820,-2915 820,-2915 802.13,-2894.24 784.712,-2874"/>
+<polygon fill="#218559" stroke="#218559" points="787.272,-2871.61 778.096,-2866.32 781.967,-2876.18 787.272,-2871.61"/>
+</g>
+<!-- 21,4->21,3 -->
+<g id="edge682" class="edge"><title>21,4->21,3</title>
+<path fill="none" stroke="#218559" d="M838.933,-3655.48C829.29,-3655.75 819.041,-3655.82 809.091,-3655.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-3652.18 798.867,-3655.47 808.792,-3659.18 808.937,-3652.18"/>
+</g>
+<!-- 22,3 -->
+<g id="node47" class="node"><title>22,3</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-3234" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-3245.17" font-family="Times Roman,serif" font-size="10.00">22,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-3226 727,-3240 778,-3240 778,-3226 727,-3226"/>
+<text text-anchor="start" x="735.5" y="-3230.67" font-family="Times Roman,serif" font-size="10.00">CGTGA</text>
+<polygon fill="#218559" stroke="#218559" points="727,-3212 727,-3226 778,-3226 778,-3212 727,-3212"/>
+<text text-anchor="start" x="736" y="-3216.67" font-family="Times Roman,serif" font-size="10.00">TCACG</text>
+</g>
+<!-- 21,4->22,3 -->
+<g id="edge678" class="edge"><title>21,4->22,3</title>
+<path fill="none" stroke="#dd1e2f" d="M869.802,-3614.87C853.264,-3580.02 830,-3531 830,-3531 830,-3531 820,-3477 820,-3477 820,-3477 784.341,-3349.57 764.668,-3279.27"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="767.953,-3278.02 761.888,-3269.33 761.212,-3279.91 767.953,-3278.02"/>
+</g>
+<!-- 22,1->27,3 -->
+<g id="edge256" class="edge"><title>22,1->27,3</title>
+<path fill="none" stroke="#218559" d="M496.988,-3198.98C513.531,-3154.37 540,-3083 540,-3083 540,-3083 550,-2962 550,-2962 550,-2962 575.335,-2915.06 595.076,-2878.48"/>
+<polygon fill="#218559" stroke="#218559" points="598.286,-2879.9 599.956,-2869.43 592.126,-2876.57 598.286,-2879.9"/>
+</g>
+<!-- 22,1->21,2 -->
+<g id="edge258" class="edge"><title>22,1->21,2</title>
+<path fill="none" stroke="#218559" d="M500.984,-3267.72C521.125,-3307.7 552,-3369 552,-3369 552,-3369 562,-3531 562,-3531 562,-3531 581.437,-3571.96 597.409,-3605.61"/>
+<polygon fill="#218559" stroke="#218559" points="594.353,-3607.34 601.802,-3614.87 600.677,-3604.33 594.353,-3607.34"/>
+</g>
+<!-- 22,1->22,2 -->
+<g id="edge252" class="edge"><title>22,1->22,2</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-3227.53C540.501,-3227.25 550.748,-3227.18 560.703,-3227.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-3230.82 570.933,-3227.52 561.007,-3223.82 560.864,-3230.82"/>
+</g>
+<!-- 22,1->28,1 -->
+<g id="edge254" class="edge"><title>22,1->28,1</title>
+<path fill="none" stroke="#218559" d="M496.988,-3198.98C513.531,-3154.37 540,-3083 540,-3083 540,-3083 562,-2788 562,-2788 562,-2788 569.283,-2781.76 578.596,-2773.78"/>
+<polygon fill="#218559" stroke="#218559" points="581.085,-2776.25 586.4,-2767.09 576.529,-2770.94 581.085,-2776.25"/>
+</g>
+<!-- 22,2->27,4 -->
+<g id="edge264" class="edge"><title>22,2->27,4</title>
+<path fill="none" stroke="#218559" d="M647.312,-3205.21C660.861,-3191.9 674,-3179 674,-3179 674,-3179 684,-2962 684,-2962 684,-2962 709.335,-2915.06 729.076,-2878.48"/>
+<polygon fill="#218559" stroke="#218559" points="732.286,-2879.9 733.956,-2869.43 726.126,-2876.57 732.286,-2879.9"/>
+</g>
+<!-- 22,2->22,1 -->
+<g id="edge266" class="edge"><title>22,2->22,1</title>
+<path fill="none" stroke="#218559" d="M570.933,-3240.48C561.29,-3240.75 551.041,-3240.82 541.091,-3240.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-3237.18 530.867,-3240.47 540.792,-3244.18 540.937,-3237.18"/>
+</g>
+<!-- 22,2->22,3 -->
+<g id="edge260" class="edge"><title>22,2->22,3</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-3227.53C674.501,-3227.25 684.748,-3227.18 694.703,-3227.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-3230.82 704.933,-3227.52 695.007,-3223.82 694.864,-3230.82"/>
+</g>
+<!-- 23,4 -->
+<g id="node53" class="node"><title>23,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-4725" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-4736.17" font-family="Times Roman,serif" font-size="10.00">23,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-4717 727,-4731 778,-4731 778,-4717 727,-4717"/>
+<text text-anchor="start" x="736" y="-4721.67" font-family="Times Roman,serif" font-size="10.00">TCACG</text>
+<polygon fill="#218559" stroke="#218559" points="727,-4703 727,-4717 778,-4717 778,-4703 727,-4703"/>
+<text text-anchor="start" x="735.5" y="-4707.67" font-family="Times Roman,serif" font-size="10.00">CGTGA</text>
+</g>
+<!-- 22,2->23,4 -->
+<g id="edge262" class="edge"><title>22,2->23,4</title>
+<path fill="none" stroke="#ebb035" d="M627.883,-3269.47C643.959,-3327.18 674,-3435 674,-3435 674,-3435 686,-3516 686,-3516 686,-3516 696,-4677 696,-4677 696,-4677 703.283,-4683.24 712.596,-4691.22"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="710.529,-4694.06 720.4,-4697.91 715.085,-4688.75 710.529,-4694.06"/>
+</g>
+<!-- 22,3->21,4 -->
+<g id="edge270" class="edge"><title>22,3->21,4</title>
+<path fill="none" stroke="#218559" d="M761.883,-3269.47C777.959,-3327.18 808,-3435 808,-3435 808,-3435 830,-3531 830,-3531 830,-3531 849.437,-3571.96 865.409,-3605.61"/>
+<polygon fill="#218559" stroke="#218559" points="862.353,-3607.34 869.802,-3614.87 868.677,-3604.33 862.353,-3607.34"/>
+</g>
+<!-- 22,3->22,2 -->
+<g id="edge272" class="edge"><title>22,3->22,2</title>
+<path fill="none" stroke="#218559" d="M704.933,-3240.48C695.29,-3240.75 685.041,-3240.82 675.091,-3240.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-3237.18 664.867,-3240.47 674.792,-3244.18 674.937,-3237.18"/>
+</g>
+<!-- 22,4 -->
+<g id="node48" class="node"><title>22,4</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-3234" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-3245.17" font-family="Times Roman,serif" font-size="10.00">22,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-3226 861,-3240 912,-3240 912,-3226 861,-3226"/>
+<text text-anchor="start" x="869.5" y="-3230.67" font-family="Times Roman,serif" font-size="10.00">GTGAA</text>
+<polygon fill="#218559" stroke="#218559" points="861,-3212 861,-3226 912,-3226 912,-3212 861,-3212"/>
+<text text-anchor="start" x="871.5" y="-3216.67" font-family="Times Roman,serif" font-size="10.00">TTCAC</text>
+</g>
+<!-- 22,3->22,4 -->
+<g id="edge268" class="edge"><title>22,3->22,4</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-3227.53C808.501,-3227.25 818.748,-3227.18 828.703,-3227.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-3230.82 838.933,-3227.52 829.007,-3223.82 828.864,-3230.82"/>
+</g>
+<!-- 22,4->22,3 -->
+<g id="edge276" class="edge"><title>22,4->22,3</title>
+<path fill="none" stroke="#218559" d="M838.933,-3240.48C829.29,-3240.75 819.041,-3240.82 809.091,-3240.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-3237.18 798.867,-3240.47 808.792,-3244.18 808.937,-3237.18"/>
+</g>
+<!-- 23,2 -->
+<g id="node51" class="node"><title>23,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-4725" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-4736.17" font-family="Times Roman,serif" font-size="10.00">23,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-4717 459,-4731 510,-4731 510,-4717 459,-4717"/>
+<text text-anchor="start" x="470" y="-4721.67" font-family="Times Roman,serif" font-size="10.00">TTTCA</text>
+<polygon fill="#218559" stroke="#218559" points="459,-4703 459,-4717 510,-4717 510,-4703 459,-4703"/>
+<text text-anchor="start" x="467.5" y="-4707.67" font-family="Times Roman,serif" font-size="10.00">TGAAA</text>
+</g>
+<!-- 22,4->23,2 -->
+<g id="edge274" class="edge"><title>22,4->23,2</title>
+<path fill="none" stroke="#ebb035" d="M876.117,-3269.47C860.041,-3327.18 830,-3435 830,-3435 830,-3435 808,-3561 808,-3561 808,-3561 696,-3601 696,-3601 696,-3601 674,-4192 674,-4192 674,-4192 562,-4415 562,-4415 562,-4415 550,-4546 550,-4546 550,-4546 540,-4677 540,-4677 540,-4677 532.717,-4683.24 523.404,-4691.22"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="520.915,-4688.75 515.6,-4697.91 525.471,-4694.06 520.915,-4688.75"/>
+</g>
+<!-- 23,1->24,3 -->
+<g id="edge690" class="edge"><title>23,1->24,3</title>
+<path fill="none" stroke="#06a2cb" d="M358.034,-4689.18C371.699,-4628.26 398,-4511 398,-4511 398,-4511 416,-4240 416,-4240 416,-4240 418,-4212 418,-4212 418,-4212 428,-4026 428,-4026 428,-4026 540,-3996 540,-3996 540,-3996 562,-3896 562,-3896 562,-3896 674,-3896 674,-3896 674,-3896 686,-4442 686,-4442 686,-4442 696,-5072 696,-5072 696,-5072 703.283,-5078.24 712.596,-5086.22"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="710.529,-5089.06 720.4,-5092.91 715.085,-5083.75 710.529,-5089.06"/>
+</g>
+<!-- 23,1->23,2 -->
+<g id="edge686" class="edge"><title>23,1->23,2</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-4718.53C406.501,-4718.25 416.748,-4718.18 426.703,-4718.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-4721.82 436.933,-4718.52 427.007,-4714.82 426.864,-4721.82"/>
+</g>
+<!-- 23,1->7,1 -->
+<g id="edge684" class="edge"><title>23,1->7,1</title>
+<path fill="none" stroke="#dd1e2f" d="M386.389,-4701.45C402.556,-4690.99 418,-4681 418,-4681 418,-4681 430.492,-4671.16 444.607,-4660.04"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="446.857,-4662.72 452.546,-4653.78 442.525,-4657.22 446.857,-4662.72"/>
+</g>
+<!-- 17,2 -->
+<g id="node140" class="node"><title>17,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-3948" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-3959.17" font-family="Times Roman,serif" font-size="10.00">17,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-3940 459,-3954 510,-3954 510,-3940 459,-3940"/>
+<text text-anchor="start" x="467" y="-3944.67" font-family="Times Roman,serif" font-size="10.00">AAACG</text>
+<polygon fill="#218559" stroke="#218559" points="459,-3926 459,-3940 510,-3940 510,-3926 459,-3926"/>
+<text text-anchor="start" x="469.5" y="-3930.67" font-family="Times Roman,serif" font-size="10.00">CGTTT</text>
+</g>
+<!-- 23,1->17,2 -->
+<g id="edge692" class="edge"><title>23,1->17,2</title>
+<path fill="none" stroke="#06a2cb" d="M358.034,-4689.18C371.699,-4628.26 398,-4511 398,-4511 398,-4511 416,-4215 416,-4215 416,-4215 418,-4187 418,-4187 418,-4187 428,-3998 428,-3998 428,-3998 435.684,-3991.14 445.359,-3982.5"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="447.923,-3984.9 453.052,-3975.63 443.261,-3979.68 447.923,-3984.9"/>
+</g>
+<!-- 23,1->19,4 -->
+<g id="edge688" class="edge"><title>23,1->19,4</title>
+<path fill="none" stroke="#ebb035" d="M356.826,-4760.7C370.078,-4829.99 398,-4976 398,-4976 398,-4976 418,-5091 418,-5091 418,-5091 428,-5168 428,-5168 428,-5168 435.283,-5174.24 444.596,-5182.22"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="442.529,-5185.06 452.4,-5188.91 447.085,-5179.75 442.529,-5185.06"/>
+</g>
+<!-- 18,1 -->
+<g id="node154" class="node"><title>18,1</title>
+<ellipse fill="none" stroke="black" cx="216" cy="-4463" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="193.5" y="-4474.17" font-family="Times Roman,serif" font-size="10.00">18,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="191,-4455 191,-4469 242,-4469 242,-4455 191,-4455"/>
+<text text-anchor="start" x="201.5" y="-4459.67" font-family="Times Roman,serif" font-size="10.00">CGTTT</text>
+<polygon fill="#218559" stroke="#218559" points="191,-4441 191,-4455 242,-4455 242,-4441 191,-4441"/>
+<text text-anchor="start" x="199" y="-4445.67" font-family="Times Roman,serif" font-size="10.00">AAACG</text>
+</g>
+<!-- 23,1->18,1 -->
+<g id="edge694" class="edge"><title>23,1->18,1</title>
+<path fill="none" stroke="#218559" d="M333.345,-4690.93C313.793,-4650.94 284,-4590 284,-4590 284,-4590 258.516,-4542.4 238.749,-4505.49"/>
+<polygon fill="#218559" stroke="#218559" points="241.672,-4503.53 233.866,-4496.37 235.501,-4506.84 241.672,-4503.53"/>
+</g>
+<!-- 23,2->22,4 -->
+<g id="edge698" class="edge"><title>23,2->22,4</title>
+<path fill="none" stroke="#ebb035" d="M515.6,-4697.91C528.303,-4687.03 540,-4677 540,-4677 540,-4677 562,-4192 562,-4192 562,-4192 674,-4162 674,-4162 674,-4162 684,-3796 684,-3796 684,-3796 696,-3561 696,-3561 696,-3561 808,-3531 808,-3531 808,-3531 830,-3435 830,-3435 830,-3435 856.845,-3338.65 873.362,-3279.36"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="876.805,-3280.04 876.117,-3269.47 870.062,-3278.17 876.805,-3280.04"/>
+</g>
+<!-- 23,2->23,1 -->
+<g id="edge702" class="edge"><title>23,2->23,1</title>
+<path fill="none" stroke="#218559" d="M436.933,-4731.48C427.29,-4731.75 417.041,-4731.82 407.091,-4731.68"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-4728.18 396.867,-4731.47 406.792,-4735.18 406.937,-4728.18"/>
+</g>
+<!-- 23,2->23,3 -->
+<g id="edge696" class="edge"><title>23,2->23,3</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-4718.53C540.501,-4718.25 550.748,-4718.18 560.703,-4718.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-4721.82 570.933,-4718.52 561.007,-4714.82 560.864,-4721.82"/>
+</g>
+<!-- 23,2->17,1 -->
+<g id="edge700" class="edge"><title>23,2->17,1</title>
+<path fill="none" stroke="#06a2cb" d="M452.4,-4697.91C439.697,-4687.03 428,-4677 428,-4677 428,-4677 418,-3825 418,-3825 418,-3825 416,-3825 416,-3825 416,-3825 392.048,-3869.64 373.003,-3905.13"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="369.679,-3903.92 368.035,-3914.39 375.847,-3907.23 369.679,-3903.92"/>
+</g>
+<!-- 23,3->24,1 -->
+<g id="edge706" class="edge"><title>23,3->24,1</title>
+<path fill="none" stroke="#06a2cb" d="M609.73,-4760.63C591.608,-4838.72 550,-5018 550,-5018 550,-5018 540,-5072 540,-5072 540,-5072 532.717,-5078.24 523.404,-5086.22"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="520.915,-5083.75 515.6,-5092.91 525.471,-5089.06 520.915,-5083.75"/>
+</g>
+<!-- 23,3->23,2 -->
+<g id="edge708" class="edge"><title>23,3->23,2</title>
+<path fill="none" stroke="#218559" d="M570.933,-4731.48C561.29,-4731.75 551.041,-4731.82 541.091,-4731.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-4728.18 530.867,-4731.47 540.792,-4735.18 540.937,-4728.18"/>
+</g>
+<!-- 23,3->23,4 -->
+<g id="edge704" class="edge"><title>23,3->23,4</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-4718.53C674.501,-4718.25 684.748,-4718.18 694.703,-4718.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-4721.82 704.933,-4718.52 695.007,-4714.82 694.864,-4721.82"/>
+</g>
+<!-- 23,4->22,2 -->
+<g id="edge710" class="edge"><title>23,4->22,2</title>
+<path fill="none" stroke="#ebb035" d="M720.4,-4697.91C707.697,-4687.03 696,-4677 696,-4677 696,-4677 686,-3501 686,-3501 686,-3501 674,-3435 674,-3435 674,-3435 647.155,-3338.65 630.638,-3279.36"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="633.938,-3278.17 627.883,-3269.47 627.195,-3280.04 633.938,-3278.17"/>
+</g>
+<!-- 23,4->23,3 -->
+<g id="edge712" class="edge"><title>23,4->23,3</title>
+<path fill="none" stroke="#218559" d="M704.933,-4731.48C695.29,-4731.75 685.041,-4731.82 675.091,-4731.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-4728.18 664.867,-4731.47 674.792,-4735.18 674.937,-4728.18"/>
+</g>
+<!-- 28,1->27,2 -->
+<g id="edge336" class="edge"><title>28,1->27,2</title>
+<path fill="none" stroke="#218559" d="M583.317,-2764.85C566.127,-2777.16 545.205,-2792.15 527.005,-2805.19"/>
+<polygon fill="#218559" stroke="#218559" points="524.948,-2802.36 518.857,-2811.03 529.025,-2808.05 524.948,-2802.36"/>
+</g>
+<!-- 28,1->22,1 -->
+<g id="edge332" class="edge"><title>28,1->22,1</title>
+<path fill="none" stroke="#dd1e2f" d="M586.4,-2767.09C573.697,-2777.97 562,-2788 562,-2788 562,-2788 550,-2923 550,-2923 550,-2923 540,-3083 540,-3083 540,-3083 517.026,-3144.95 500.508,-3189.49"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="497.184,-3188.38 496.988,-3198.98 503.747,-3190.82 497.184,-3188.38"/>
+</g>
+<!-- 28,2 -->
+<g id="node56" class="node"><title>28,2</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-2740" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-2751.17" font-family="Times Roman,serif" font-size="10.00">28,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-2732 727,-2746 778,-2746 778,-2732 727,-2732"/>
+<text text-anchor="start" x="737" y="-2736.67" font-family="Times Roman,serif" font-size="10.00">TACGT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-2718 727,-2732 778,-2732 778,-2718 727,-2718"/>
+<text text-anchor="start" x="736.5" y="-2722.67" font-family="Times Roman,serif" font-size="10.00">ACGTA</text>
+</g>
+<!-- 28,1->28,2 -->
+<g id="edge334" class="edge"><title>28,1->28,2</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-2733.53C674.501,-2733.25 684.748,-2733.18 694.703,-2733.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-2736.82 704.933,-2733.52 695.007,-2729.82 694.864,-2736.82"/>
+</g>
+<!-- 28,2->28,1 -->
+<g id="edge342" class="edge"><title>28,2->28,1</title>
+<path fill="none" stroke="#218559" d="M704.933,-2746.48C695.29,-2746.75 685.041,-2746.82 675.091,-2746.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-2743.18 664.867,-2746.47 674.792,-2750.18 674.937,-2743.18"/>
+</g>
+<!-- 28,2->28,3 -->
+<g id="edge338" class="edge"><title>28,2->28,3</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-2733.53C808.501,-2733.25 818.748,-2733.18 828.703,-2733.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-2736.82 838.933,-2733.52 829.007,-2729.82 828.864,-2736.82"/>
+</g>
+<!-- 29,4 -->
+<g id="node63" class="node"><title>29,4</title>
+<ellipse fill="none" stroke="black" cx="1154" cy="-2306" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="1131.5" y="-2317.17" font-family="Times Roman,serif" font-size="10.00">29,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1129,-2298 1129,-2312 1180,-2312 1180,-2298 1129,-2298"/>
+<text text-anchor="start" x="1137.5" y="-2302.67" font-family="Times Roman,serif" font-size="10.00">GACGT</text>
+<polygon fill="#218559" stroke="#218559" points="1129,-2284 1129,-2298 1180,-2298 1180,-2284 1129,-2284"/>
+<text text-anchor="start" x="1138.5" y="-2288.67" font-family="Times Roman,serif" font-size="10.00">ACGTC</text>
+</g>
+<!-- 28,2->29,4 -->
+<g id="edge340" class="edge"><title>28,2->29,4</title>
+<path fill="none" stroke="#ebb035" d="M790.086,-2718.03C809.886,-2706.6 830,-2695 830,-2695 830,-2695 1076,-2610 1076,-2610 1076,-2610 1106,-2450 1106,-2450 1106,-2450 1124.98,-2393.07 1139.07,-2350.8"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1142.42,-2351.82 1142.26,-2341.23 1135.78,-2349.61 1142.42,-2351.82"/>
+</g>
+<!-- 28,3->27,4 -->
+<g id="edge346" class="edge"><title>28,3->27,4</title>
+<path fill="none" stroke="#218559" d="M851.317,-2764.85C834.127,-2777.16 813.205,-2792.15 795.005,-2805.19"/>
+<polygon fill="#218559" stroke="#218559" points="792.948,-2802.36 786.857,-2811.03 797.025,-2808.05 792.948,-2802.36"/>
+</g>
+<!-- 28,3->28,2 -->
+<g id="edge348" class="edge"><title>28,3->28,2</title>
+<path fill="none" stroke="#218559" d="M838.933,-2746.48C829.29,-2746.75 819.041,-2746.82 809.091,-2746.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-2743.18 798.867,-2746.47 808.792,-2750.18 808.937,-2743.18"/>
+</g>
+<!-- 28,4 -->
+<g id="node58" class="node"><title>28,4</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-2740" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="997.5" y="-2751.17" font-family="Times Roman,serif" font-size="10.00">28,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="995,-2732 995,-2746 1046,-2746 1046,-2732 995,-2732"/>
+<text text-anchor="start" x="1004" y="-2736.67" font-family="Times Roman,serif" font-size="10.00">CGTCA</text>
+<polygon fill="#218559" stroke="#218559" points="995,-2718 995,-2732 1046,-2732 1046,-2718 995,-2718"/>
+<text text-anchor="start" x="1003.5" y="-2722.67" font-family="Times Roman,serif" font-size="10.00">TGACG</text>
+</g>
+<!-- 28,3->28,4 -->
+<g id="edge344" class="edge"><title>28,3->28,4</title>
+<path fill="none" stroke="#dd1e2f" d="M932.867,-2733.53C942.501,-2733.25 952.748,-2733.18 962.703,-2733.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="962.864,-2736.82 972.933,-2733.52 963.007,-2729.82 962.864,-2736.82"/>
+</g>
+<!-- 28,4->28,3 -->
+<g id="edge354" class="edge"><title>28,4->28,3</title>
+<path fill="none" stroke="#218559" d="M972.933,-2746.48C963.29,-2746.75 953.041,-2746.82 943.091,-2746.68"/>
+<polygon fill="#218559" stroke="#218559" points="942.937,-2743.18 932.867,-2746.47 942.792,-2750.18 942.937,-2743.18"/>
+</g>
+<!-- 29,2 -->
+<g id="node61" class="node"><title>29,2</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-2306" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-2317.17" font-family="Times Roman,serif" font-size="10.00">29,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-2298 861,-2312 912,-2312 912,-2298 861,-2298"/>
+<text text-anchor="start" x="870" y="-2302.67" font-family="Times Roman,serif" font-size="10.00">ATGAC</text>
+<polygon fill="#218559" stroke="#218559" points="861,-2284 861,-2298 912,-2298 912,-2284 861,-2284"/>
+<text text-anchor="start" x="871.5" y="-2288.67" font-family="Times Roman,serif" font-size="10.00">GTCAT</text>
+</g>
+<!-- 28,4->29,2 -->
+<g id="edge352" class="edge"><title>28,4->29,2</title>
+<path fill="none" stroke="#ebb035" d="M1009.73,-2704.42C993.562,-2648.41 964,-2546 964,-2546 964,-2546 954,-2407 954,-2407 954,-2407 942,-2354 942,-2354 942,-2354 934.717,-2347.76 925.404,-2339.78"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="927.471,-2336.94 917.6,-2333.09 922.915,-2342.25 927.471,-2336.94"/>
+</g>
+<!-- 31,3 -->
+<g id="node161" class="node"><title>31,3</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-1732" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-1743.17" font-family="Times Roman,serif" font-size="10.00">31,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-1724 861,-1738 912,-1738 912,-1724 861,-1724"/>
+<text text-anchor="start" x="871.5" y="-1728.67" font-family="Times Roman,serif" font-size="10.00">GTCAT</text>
+<polygon fill="#218559" stroke="#218559" points="861,-1710 861,-1724 912,-1724 912,-1710 861,-1710"/>
+<text text-anchor="start" x="870" y="-1714.67" font-family="Times Roman,serif" font-size="10.00">ATGAC</text>
+</g>
+<!-- 28,4->31,3 -->
+<g id="edge350" class="edge"><title>28,4->31,3</title>
+<path fill="none" stroke="#dd1e2f" d="M1009.73,-2704.42C993.562,-2648.41 964,-2546 964,-2546 964,-2546 954,-1346 954,-1346 954,-1346 952,-1346 952,-1346 952,-1346 942,-1684 942,-1684 942,-1684 934.717,-1690.24 925.404,-1698.22"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="922.915,-1695.75 917.6,-1704.91 927.471,-1701.06 922.915,-1695.75"/>
+</g>
+<!-- 29,1 -->
+<g id="node60" class="node"><title>29,1</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-2306" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-2317.17" font-family="Times Roman,serif" font-size="10.00">29,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-2298 727,-2312 778,-2312 778,-2298 727,-2298"/>
+<text text-anchor="start" x="736" y="-2302.67" font-family="Times Roman,serif" font-size="10.00">CATGA</text>
+<polygon fill="#218559" stroke="#218559" points="727,-2284 727,-2298 778,-2298 778,-2284 727,-2284"/>
+<text text-anchor="start" x="737" y="-2288.67" font-family="Times Roman,serif" font-size="10.00">TCATG</text>
+</g>
+<!-- 29,1->29,2 -->
+<g id="edge760" class="edge"><title>29,1->29,2</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-2299.53C808.501,-2299.25 818.748,-2299.18 828.703,-2299.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-2302.82 838.933,-2299.52 829.007,-2295.82 828.864,-2302.82"/>
+</g>
+<!-- 30,2 -->
+<g id="node165" class="node"><title>30,2</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-2140" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-2151.17" font-family="Times Roman,serif" font-size="10.00">30,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-2132 861,-2146 912,-2146 912,-2132 861,-2132"/>
+<text text-anchor="start" x="871" y="-2136.67" font-family="Times Roman,serif" font-size="10.00">TCATG</text>
+<polygon fill="#218559" stroke="#218559" points="861,-2118 861,-2132 912,-2132 912,-2118 861,-2118"/>
+<text text-anchor="start" x="870" y="-2122.67" font-family="Times Roman,serif" font-size="10.00">CATGA</text>
+</g>
+<!-- 29,1->30,2 -->
+<g id="edge764" class="edge"><title>29,1->30,2</title>
+<path fill="none" stroke="#218559" d="M780.444,-2277.14C799.078,-2258.23 820,-2237 820,-2237 820,-2237 840.341,-2207.11 858.438,-2180.51"/>
+<polygon fill="#218559" stroke="#218559" points="861.474,-2182.27 864.206,-2172.03 855.687,-2178.33 861.474,-2182.27"/>
+</g>
+<!-- 30,3 -->
+<g id="node166" class="node"><title>30,3</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-2140" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="997.5" y="-2151.17" font-family="Times Roman,serif" font-size="10.00">30,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="995,-2132 995,-2146 1046,-2146 1046,-2132 995,-2132"/>
+<text text-anchor="start" x="1004" y="-2136.67" font-family="Times Roman,serif" font-size="10.00">CATGA</text>
+<polygon fill="#218559" stroke="#218559" points="995,-2118 995,-2132 1046,-2132 1046,-2118 995,-2118"/>
+<text text-anchor="start" x="1005" y="-2122.67" font-family="Times Roman,serif" font-size="10.00">TCATG</text>
+</g>
+<!-- 29,1->30,3 -->
+<g id="edge762" class="edge"><title>29,1->30,3</title>
+<path fill="none" stroke="#06a2cb" d="M768.198,-2271.87C784.736,-2237.02 808,-2188 808,-2188 808,-2188 820,-2139 820,-2139 820,-2139 830,-2095 830,-2095 830,-2095 942,-2095 942,-2095 942,-2095 956.586,-2103.41 973.143,-2112.97"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="971.503,-2116.06 981.914,-2118.03 975.001,-2110 971.503,-2116.06"/>
+</g>
+<!-- 29,2->28,4 -->
+<g id="edge770" class="edge"><title>29,2->28,4</title>
+<path fill="none" stroke="#ebb035" d="M917.6,-2333.09C930.303,-2343.97 942,-2354 942,-2354 942,-2354 964,-2546 964,-2546 964,-2546 990.308,-2637.14 1006.86,-2694.48"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1003.59,-2695.78 1009.73,-2704.42 1010.32,-2693.84 1003.59,-2695.78"/>
+</g>
+<!-- 29,2->29,1 -->
+<g id="edge772" class="edge"><title>29,2->29,1</title>
+<path fill="none" stroke="#218559" d="M838.933,-2312.48C829.29,-2312.75 819.041,-2312.82 809.091,-2312.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-2309.18 798.867,-2312.47 808.792,-2316.18 808.937,-2309.18"/>
+</g>
+<!-- 29,3 -->
+<g id="node62" class="node"><title>29,3</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-2306" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="997.5" y="-2317.17" font-family="Times Roman,serif" font-size="10.00">29,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="995,-2298 995,-2312 1046,-2312 1046,-2298 995,-2298"/>
+<text text-anchor="start" x="1003.5" y="-2302.67" font-family="Times Roman,serif" font-size="10.00">TGACG</text>
+<polygon fill="#218559" stroke="#218559" points="995,-2284 995,-2298 1046,-2298 1046,-2284 995,-2284"/>
+<text text-anchor="start" x="1004" y="-2288.67" font-family="Times Roman,serif" font-size="10.00">CGTCA</text>
+</g>
+<!-- 29,2->29,3 -->
+<g id="edge766" class="edge"><title>29,2->29,3</title>
+<path fill="none" stroke="#dd1e2f" d="M932.867,-2299.53C942.501,-2299.25 952.748,-2299.18 962.703,-2299.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="962.864,-2302.82 972.933,-2299.52 963.007,-2295.82 962.864,-2302.82"/>
+</g>
+<!-- 32,4 -->
+<g id="node197" class="node"><title>32,4</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-1974" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="997.5" y="-1985.17" font-family="Times Roman,serif" font-size="10.00">32,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="995,-1966 995,-1980 1046,-1980 1046,-1966 995,-1966"/>
+<text text-anchor="start" x="1004" y="-1970.67" font-family="Times Roman,serif" font-size="10.00">CGTCA</text>
+<polygon fill="#218559" stroke="#218559" points="995,-1952 995,-1966 1046,-1966 1046,-1952 995,-1952"/>
+<text text-anchor="start" x="1003.5" y="-1956.67" font-family="Times Roman,serif" font-size="10.00">TGACG</text>
+</g>
+<!-- 29,2->32,4 -->
+<g id="edge768" class="edge"><title>29,2->32,4</title>
+<path fill="none" stroke="#ebb035" d="M902.198,-2271.87C918.736,-2237.02 942,-2188 942,-2188 942,-2188 954,-2154 954,-2154 954,-2154 964,-2092 964,-2092 964,-2092 983.437,-2051.04 999.409,-2017.39"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1002.68,-2018.67 1003.8,-2008.13 996.353,-2015.66 1002.68,-2018.67"/>
+</g>
+<!-- 29,3->29,2 -->
+<g id="edge780" class="edge"><title>29,3->29,2</title>
+<path fill="none" stroke="#218559" d="M972.933,-2312.48C963.29,-2312.75 953.041,-2312.82 943.091,-2312.68"/>
+<polygon fill="#218559" stroke="#218559" points="942.937,-2309.18 932.867,-2312.47 942.792,-2316.18 942.937,-2309.18"/>
+</g>
+<!-- 29,3->29,4 -->
+<g id="edge774" class="edge"><title>29,3->29,4</title>
+<path fill="none" stroke="#dd1e2f" d="M1066.87,-2299.53C1076.5,-2299.25 1086.75,-2299.18 1096.7,-2299.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1096.86,-2302.82 1106.93,-2299.52 1097.01,-2295.82 1096.86,-2302.82"/>
+</g>
+<!-- 30,1 -->
+<g id="node164" class="node"><title>30,1</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-2140" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-2151.17" font-family="Times Roman,serif" font-size="10.00">30,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-2132 727,-2146 778,-2146 778,-2132 727,-2132"/>
+<text text-anchor="start" x="737.5" y="-2136.67" font-family="Times Roman,serif" font-size="10.00">GTCAT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-2118 727,-2132 778,-2132 778,-2118 727,-2118"/>
+<text text-anchor="start" x="736" y="-2122.67" font-family="Times Roman,serif" font-size="10.00">ATGAC</text>
+</g>
+<!-- 29,3->30,1 -->
+<g id="edge776" class="edge"><title>29,3->30,1</title>
+<path fill="none" stroke="#06a2cb" d="M1003.8,-2271.87C987.264,-2237.02 964,-2188 964,-2188 964,-2188 942,-1926 942,-1926 942,-1926 830,-1926 830,-1926 830,-1926 808,-2090 808,-2090 808,-2090 800.316,-2096.86 790.641,-2105.5"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="788.077,-2103.1 782.948,-2112.37 792.739,-2108.32 788.077,-2103.1"/>
+</g>
+<!-- 30,4 -->
+<g id="node167" class="node"><title>30,4</title>
+<ellipse fill="none" stroke="black" cx="1154" cy="-2140" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="1131.5" y="-2151.17" font-family="Times Roman,serif" font-size="10.00">30,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1129,-2132 1129,-2146 1180,-2146 1180,-2132 1129,-2132"/>
+<text text-anchor="start" x="1138" y="-2136.67" font-family="Times Roman,serif" font-size="10.00">ATGAC</text>
+<polygon fill="#218559" stroke="#218559" points="1129,-2118 1129,-2132 1180,-2132 1180,-2118 1129,-2118"/>
+<text text-anchor="start" x="1139.5" y="-2122.67" font-family="Times Roman,serif" font-size="10.00">GTCAT</text>
+</g>
+<!-- 29,3->30,4 -->
+<g id="edge778" class="edge"><title>29,3->30,4</title>
+<path fill="none" stroke="#218559" d="M1048.44,-2277.14C1067.08,-2258.23 1088,-2237 1088,-2237 1088,-2237 1108.34,-2207.11 1126.44,-2180.51"/>
+<polygon fill="#218559" stroke="#218559" points="1129.47,-2182.27 1132.21,-2172.03 1123.69,-2178.33 1129.47,-2182.27"/>
+</g>
+<!-- 29,4->28,2 -->
+<g id="edge782" class="edge"><title>29,4->28,2</title>
+<path fill="none" stroke="#ebb035" d="M1142.26,-2341.23C1128.05,-2383.86 1106,-2450 1106,-2450 1106,-2450 1076,-2546 1076,-2546 1076,-2546 954,-2614 954,-2614 954,-2614 830,-2653 830,-2653 830,-2653 808,-2678 808,-2678 808,-2678 797.753,-2689.34 786.026,-2702.33"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="783.121,-2700.32 779.016,-2710.09 788.316,-2705.01 783.121,-2700.32"/>
+</g>
+<!-- 29,4->29,3 -->
+<g id="edge784" class="edge"><title>29,4->29,3</title>
+<path fill="none" stroke="#218559" d="M1106.93,-2312.48C1097.29,-2312.75 1087.04,-2312.82 1077.09,-2312.68"/>
+<polygon fill="#218559" stroke="#218559" points="1076.94,-2309.18 1066.87,-2312.47 1076.79,-2316.18 1076.94,-2309.18"/>
+</g>
+<!-- 1,1 -->
+<g id="node65" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="64" cy="-5415" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="44.5" y="-5426.17" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="42,-5407 42,-5421 87,-5421 87,-5407 42,-5407"/>
+<text text-anchor="start" x="49" y="-5411.67" font-family="Times Roman,serif" font-size="10.00">TAGTG</text>
+<polygon fill="#218559" stroke="#218559" points="42,-5393 42,-5407 87,-5407 87,-5393 42,-5393"/>
+<text text-anchor="start" x="48.5" y="-5397.67" font-family="Times Roman,serif" font-size="10.00">CACTA</text>
+</g>
+<!-- 1,2 -->
+<g id="node66" class="node"><title>1,2</title>
+<ellipse fill="none" stroke="black" cx="216" cy="-5415" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="196.5" y="-5426.17" font-family="Times Roman,serif" font-size="10.00">1,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="194,-5407 194,-5421 239,-5421 239,-5407 194,-5407"/>
+<text text-anchor="start" x="199.5" y="-5411.67" font-family="Times Roman,serif" font-size="10.00">AGTGC</text>
+<polygon fill="#218559" stroke="#218559" points="194,-5393 194,-5407 239,-5407 239,-5393 194,-5393"/>
+<text text-anchor="start" x="200" y="-5397.67" font-family="Times Roman,serif" font-size="10.00">GCACT</text>
+</g>
+<!-- 1,1->1,2 -->
+<g id="edge454" class="edge"><title>1,1->1,2</title>
+<path fill="none" stroke="#dd1e2f" d="M106.745,-5408.86C124.378,-5408.17 144.986,-5408.07 163.523,-5408.55"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="163.467,-5412.05 173.576,-5408.87 163.695,-5405.05 163.467,-5412.05"/>
+</g>
+<!-- 1,2->1,1 -->
+<g id="edge460" class="edge"><title>1,2->1,1</title>
+<path fill="none" stroke="#218559" d="M173.576,-5421.13C155.979,-5421.82 135.377,-5421.94 116.815,-5421.46"/>
+<polygon fill="#218559" stroke="#218559" points="116.852,-5417.96 106.745,-5421.14 116.627,-5424.96 116.852,-5417.96"/>
+</g>
+<!-- 1,3 -->
+<g id="node67" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-5415" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="330.5" y="-5426.17" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="328,-5407 328,-5421 373,-5421 373,-5407 328,-5407"/>
+<text text-anchor="start" x="333" y="-5411.67" font-family="Times Roman,serif" font-size="10.00">GTGCG</text>
+<polygon fill="#218559" stroke="#218559" points="328,-5393 328,-5407 373,-5407 373,-5393 328,-5393"/>
+<text text-anchor="start" x="333" y="-5397.67" font-family="Times Roman,serif" font-size="10.00">CGCAC</text>
+</g>
+<!-- 1,2->1,3 -->
+<g id="edge456" class="edge"><title>1,2->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M258.398,-5408.67C270.741,-5408.23 284.377,-5408.14 297.273,-5408.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="297.326,-5411.89 307.417,-5408.67 297.517,-5404.89 297.326,-5411.89"/>
+</g>
+<!-- 2,4 -->
+<g id="node77" class="node"><title>2,4</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-5319" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="598.5" y="-5330.17" font-family="Times Roman,serif" font-size="10.00">2,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="596,-5311 596,-5325 641,-5325 641,-5311 596,-5311"/>
+<text text-anchor="start" x="601" y="-5315.67" font-family="Times Roman,serif" font-size="10.00">CGCAC</text>
+<polygon fill="#218559" stroke="#218559" points="596,-5297 596,-5311 641,-5311 641,-5297 596,-5297"/>
+<text text-anchor="start" x="601" y="-5301.67" font-family="Times Roman,serif" font-size="10.00">GTGCG</text>
+</g>
+<!-- 1,2->2,4 -->
+<g id="edge458" class="edge"><title>1,2->2,4</title>
+<path fill="none" stroke="#ebb035" d="M251.804,-5435.66C272.309,-5447.49 294,-5460 294,-5460 294,-5460 540,-5460 540,-5460 540,-5460 571.999,-5402.16 595.006,-5360.57"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="598.131,-5362.15 599.909,-5351.7 592.006,-5358.76 598.131,-5362.15"/>
+</g>
+<!-- 1,3->1,2 -->
+<g id="edge464" class="edge"><title>1,3->1,2</title>
+<path fill="none" stroke="#218559" d="M307.417,-5421.33C295.062,-5421.77 281.424,-5421.86 268.535,-5421.61"/>
+<polygon fill="#218559" stroke="#218559" points="268.491,-5418.11 258.398,-5421.33 268.298,-5425.1 268.491,-5418.11"/>
+</g>
+<!-- 1,4 -->
+<g id="node68" class="node"><title>1,4</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-5415" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="464.5" y="-5426.17" font-family="Times Roman,serif" font-size="10.00">1,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="462,-5407 462,-5421 507,-5421 507,-5407 462,-5407"/>
+<text text-anchor="start" x="467.5" y="-5411.67" font-family="Times Roman,serif" font-size="10.00">TGCGA</text>
+<polygon fill="#218559" stroke="#218559" points="462,-5393 462,-5407 507,-5407 507,-5393 462,-5393"/>
+<text text-anchor="start" x="468" y="-5397.67" font-family="Times Roman,serif" font-size="10.00">TCGCA</text>
+</g>
+<!-- 1,3->1,4 -->
+<g id="edge462" class="edge"><title>1,3->1,4</title>
+<path fill="none" stroke="#dd1e2f" d="M392.398,-5408.67C404.741,-5408.23 418.377,-5408.14 431.273,-5408.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="431.326,-5411.89 441.417,-5408.67 431.517,-5404.89 431.326,-5411.89"/>
+</g>
+<!-- 1,4->1,3 -->
+<g id="edge468" class="edge"><title>1,4->1,3</title>
+<path fill="none" stroke="#218559" d="M441.417,-5421.33C429.062,-5421.77 415.424,-5421.86 402.535,-5421.61"/>
+<polygon fill="#218559" stroke="#218559" points="402.491,-5418.11 392.398,-5421.33 402.298,-5425.1 402.491,-5418.11"/>
+</g>
+<!-- 2,2 -->
+<g id="node75" class="node"><title>2,2</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-5319" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="330.5" y="-5330.17" font-family="Times Roman,serif" font-size="10.00">2,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="328,-5311 328,-5325 373,-5325 373,-5311 328,-5311"/>
+<text text-anchor="start" x="334" y="-5315.67" font-family="Times Roman,serif" font-size="10.00">CTCGC</text>
+<polygon fill="#218559" stroke="#218559" points="328,-5297 328,-5311 373,-5311 373,-5297 328,-5297"/>
+<text text-anchor="start" x="332.5" y="-5301.67" font-family="Times Roman,serif" font-size="10.00">GCGAG</text>
+</g>
+<!-- 1,4->2,2 -->
+<g id="edge466" class="edge"><title>1,4->2,2</title>
+<path fill="none" stroke="#ebb035" d="M448.805,-5394.3C432.212,-5384.54 416,-5375 416,-5375 416,-5375 402.69,-5363.71 388.025,-5351.26"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="390.185,-5348.51 380.296,-5344.71 385.656,-5353.84 390.185,-5348.51"/>
+</g>
+<!-- 3,1 -->
+<g id="node70" class="node"><title>3,1</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-52" rx="43.8406" ry="36.0624"/>
+<text text-anchor="start" x="732" y="-63.1667" font-family="Times Roman,serif" font-size="10.00">3,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="729,-44 729,-58 775,-58 775,-44 729,-44"/>
+<text text-anchor="start" x="731.5" y="-48.6667" font-family="Times Roman,serif" font-size="10.00">GCTAGG</text>
+<polygon fill="#218559" stroke="#218559" points="729,-30 729,-44 775,-44 775,-30 729,-30"/>
+<text text-anchor="start" x="732.5" y="-34.6667" font-family="Times Roman,serif" font-size="10.00">CCTAGC</text>
+</g>
+<!-- 3,3 -->
+<g id="node71" class="node"><title>3,3</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-52" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="866.5" y="-63.1667" font-family="Times Roman,serif" font-size="10.00">3,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="864,-44 864,-58 909,-58 909,-44 864,-44"/>
+<text text-anchor="start" x="869.5" y="-48.6667" font-family="Times Roman,serif" font-size="10.00">TAGGG</text>
+<polygon fill="#218559" stroke="#218559" points="864,-30 864,-44 909,-44 909,-30 864,-30"/>
+<text text-anchor="start" x="870.5" y="-34.6667" font-family="Times Roman,serif" font-size="10.00">CCCTA</text>
+</g>
+<!-- 3,1->3,3 -->
+<g id="edge470" class="edge"><title>3,1->3,3</title>
+<path fill="none" stroke="#dd1e2f" d="M795.506,-45.6333C807.566,-45.224 820.789,-45.1425 833.312,-45.3888"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="833.533,-48.8961 843.626,-45.6728 833.726,-41.8988 833.533,-48.8961"/>
+</g>
+<!-- 3,3->3,1 -->
+<g id="edge474" class="edge"><title>3,3->3,1</title>
+<path fill="none" stroke="#218559" d="M843.626,-58.3272C831.641,-58.7571 818.434,-58.8591 805.869,-58.6334"/>
+<polygon fill="#218559" stroke="#218559" points="805.593,-55.1253 795.506,-58.3667 805.413,-62.1229 805.593,-55.1253"/>
+</g>
+<!-- 3,4 -->
+<g id="node72" class="node"><title>3,4</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-52" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="1000.5" y="-63.1667" font-family="Times Roman,serif" font-size="10.00">3,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="998,-44 998,-58 1043,-58 1043,-44 998,-44"/>
+<text text-anchor="start" x="1003.5" y="-48.6667" font-family="Times Roman,serif" font-size="10.00">AGGGT</text>
+<polygon fill="#218559" stroke="#218559" points="998,-30 998,-44 1043,-44 1043,-30 998,-30"/>
+<text text-anchor="start" x="1004" y="-34.6667" font-family="Times Roman,serif" font-size="10.00">ACCCT</text>
+</g>
+<!-- 3,3->3,4 -->
+<g id="edge472" class="edge"><title>3,3->3,4</title>
+<path fill="none" stroke="#dd1e2f" d="M928.398,-45.6719C940.741,-45.2298 954.377,-45.1351 967.273,-45.388"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="967.326,-48.8907 977.417,-45.6653 967.517,-41.8933 967.326,-48.8907"/>
+</g>
+<!-- 3,4->3,3 -->
+<g id="edge478" class="edge"><title>3,4->3,3</title>
+<path fill="none" stroke="#218559" d="M977.417,-58.3347C965.062,-58.7734 951.424,-58.8645 938.535,-58.6082"/>
+<polygon fill="#218559" stroke="#218559" points="938.491,-55.1057 928.398,-58.3281 938.298,-62.1031 938.491,-55.1057"/>
+</g>
+<!-- 4,3 -->
+<g id="node86" class="node"><title>4,3</title>
+<ellipse fill="none" stroke="black" cx="1154" cy="-1444" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="1134.5" y="-1455.17" font-family="Times Roman,serif" font-size="10.00">4,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1132,-1436 1132,-1450 1177,-1450 1177,-1436 1132,-1436"/>
+<text text-anchor="start" x="1138" y="-1440.67" font-family="Times Roman,serif" font-size="10.00">GGGTT</text>
+<polygon fill="#218559" stroke="#218559" points="1132,-1422 1132,-1436 1177,-1436 1177,-1422 1132,-1422"/>
+<text text-anchor="start" x="1137.5" y="-1426.67" font-family="Times Roman,serif" font-size="10.00">AACCC</text>
+</g>
+<!-- 3,4->4,3 -->
+<g id="edge476" class="edge"><title>3,4->4,3</title>
+<path fill="none" stroke="#dd1e2f" d="M1023.59,-87.8824C1035.95,-211.48 1076,-612 1076,-612 1076,-612 1106,-1122 1106,-1122 1106,-1122 1133.86,-1308.88 1147.13,-1397.9"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1143.7,-1398.63 1148.63,-1408 1150.62,-1397.59 1143.7,-1398.63"/>
+</g>
+<!-- 2,1 -->
+<g id="node74" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="216" cy="-5319" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="196.5" y="-5330.17" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="194,-5311 194,-5325 239,-5325 239,-5311 194,-5311"/>
+<text text-anchor="start" x="200" y="-5315.67" font-family="Times Roman,serif" font-size="10.00">CCTCG</text>
+<polygon fill="#218559" stroke="#218559" points="194,-5297 194,-5311 239,-5311 239,-5297 194,-5297"/>
+<text text-anchor="start" x="198.5" y="-5301.67" font-family="Times Roman,serif" font-size="10.00">CGAGG</text>
+</g>
+<!-- 2,1->2,2 -->
+<g id="edge48" class="edge"><title>2,1->2,2</title>
+<path fill="none" stroke="#dd1e2f" d="M258.398,-5312.67C270.741,-5312.23 284.377,-5312.14 297.273,-5312.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="297.326,-5315.89 307.417,-5312.67 297.517,-5308.89 297.326,-5315.89"/>
+</g>
+<!-- 2,2->1,4 -->
+<g id="edge52" class="edge"><title>2,2->1,4</title>
+<path fill="none" stroke="#ebb035" d="M382.78,-5342.48C400.868,-5355.44 423.505,-5371.66 442.782,-5385.47"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="440.926,-5388.45 451.094,-5391.43 445.003,-5382.76 440.926,-5388.45"/>
+</g>
+<!-- 2,2->2,1 -->
+<g id="edge54" class="edge"><title>2,2->2,1</title>
+<path fill="none" stroke="#218559" d="M307.417,-5325.33C295.062,-5325.77 281.424,-5325.86 268.535,-5325.61"/>
+<polygon fill="#218559" stroke="#218559" points="268.491,-5322.11 258.398,-5325.33 268.298,-5329.1 268.491,-5322.11"/>
+</g>
+<!-- 2,3 -->
+<g id="node76" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-5319" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="464.5" y="-5330.17" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="462,-5311 462,-5325 507,-5325 507,-5311 462,-5311"/>
+<text text-anchor="start" x="468" y="-5315.67" font-family="Times Roman,serif" font-size="10.00">TCGCA</text>
+<polygon fill="#218559" stroke="#218559" points="462,-5297 462,-5311 507,-5311 507,-5297 462,-5297"/>
+<text text-anchor="start" x="467.5" y="-5301.67" font-family="Times Roman,serif" font-size="10.00">TGCGA</text>
+</g>
+<!-- 2,2->2,3 -->
+<g id="edge50" class="edge"><title>2,2->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M392.398,-5312.67C404.741,-5312.23 418.377,-5312.14 431.273,-5312.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="431.326,-5315.89 441.417,-5312.67 431.517,-5308.89 431.326,-5315.89"/>
+</g>
+<!-- 2,3->2,2 -->
+<g id="edge58" class="edge"><title>2,3->2,2</title>
+<path fill="none" stroke="#218559" d="M441.417,-5325.33C429.062,-5325.77 415.424,-5325.86 402.535,-5325.61"/>
+<polygon fill="#218559" stroke="#218559" points="402.491,-5322.11 392.398,-5325.33 402.298,-5329.1 402.491,-5322.11"/>
+</g>
+<!-- 2,3->2,4 -->
+<g id="edge56" class="edge"><title>2,3->2,4</title>
+<path fill="none" stroke="#dd1e2f" d="M526.398,-5312.67C538.741,-5312.23 552.377,-5312.14 565.273,-5312.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="565.326,-5315.89 575.417,-5312.67 565.517,-5308.89 565.326,-5315.89"/>
+</g>
+<!-- 2,4->1,2 -->
+<g id="edge60" class="edge"><title>2,4->1,2</title>
+<path fill="none" stroke="#ebb035" d="M604.961,-5353.38C585.726,-5404.09 552,-5493 552,-5493 552,-5493 282,-5493 282,-5493 282,-5493 264.575,-5472.41 247.632,-5452.38"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="250.083,-5449.86 240.952,-5444.49 244.739,-5454.38 250.083,-5449.86"/>
+</g>
+<!-- 2,4->2,3 -->
+<g id="edge62" class="edge"><title>2,4->2,3</title>
+<path fill="none" stroke="#218559" d="M575.417,-5325.33C563.062,-5325.77 549.424,-5325.86 536.535,-5325.61"/>
+<polygon fill="#218559" stroke="#218559" points="536.491,-5322.11 526.398,-5325.33 536.298,-5329.1 536.491,-5322.11"/>
+</g>
+<!-- 5,1 -->
+<g id="node79" class="node"><title>5,1</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-2498" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="732.5" y="-2509.17" font-family="Times Roman,serif" font-size="10.00">5,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="730,-2490 730,-2504 775,-2504 775,-2490 730,-2490"/>
+<text text-anchor="start" x="735" y="-2494.67" font-family="Times Roman,serif" font-size="10.00">AGCAA</text>
+<polygon fill="#218559" stroke="#218559" points="730,-2476 730,-2490 775,-2490 775,-2476 730,-2476"/>
+<text text-anchor="start" x="737.5" y="-2480.67" font-family="Times Roman,serif" font-size="10.00">TTGCT</text>
+</g>
+<!-- 5,2 -->
+<g id="node80" class="node"><title>5,2</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-2498" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="866.5" y="-2509.17" font-family="Times Roman,serif" font-size="10.00">5,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="864,-2490 864,-2504 909,-2504 909,-2490 864,-2490"/>
+<text text-anchor="start" x="869" y="-2494.67" font-family="Times Roman,serif" font-size="10.00">GCAAC</text>
+<polygon fill="#218559" stroke="#218559" points="864,-2476 864,-2490 909,-2490 909,-2476 864,-2476"/>
+<text text-anchor="start" x="870.5" y="-2480.67" font-family="Times Roman,serif" font-size="10.00">GTTGC</text>
+</g>
+<!-- 5,1->5,2 -->
+<g id="edge480" class="edge"><title>5,1->5,2</title>
+<path fill="none" stroke="#dd1e2f" d="M794.398,-2491.67C806.741,-2491.23 820.377,-2491.14 833.273,-2491.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="833.326,-2494.89 843.417,-2491.67 833.517,-2487.89 833.326,-2494.89"/>
+</g>
+<!-- 6,3 -->
+<g id="node96" class="node"><title>6,3</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-3131" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="866.5" y="-3142.17" font-family="Times Roman,serif" font-size="10.00">6,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="864,-3123 864,-3137 909,-3137 909,-3123 864,-3123"/>
+<text text-anchor="start" x="870" y="-3127.67" font-family="Times Roman,serif" font-size="10.00">TGCTG</text>
+<polygon fill="#218559" stroke="#218559" points="864,-3109 864,-3123 909,-3123 909,-3109 864,-3109"/>
+<text text-anchor="start" x="869" y="-3113.67" font-family="Times Roman,serif" font-size="10.00">CAGCA</text>
+</g>
+<!-- 5,1->6,3 -->
+<g id="edge482" class="edge"><title>5,1->6,3</title>
+<path fill="none" stroke="#06a2cb" d="M762.141,-2533.13C778.279,-2589.04 808,-2692 808,-2692 808,-2692 820,-2810 820,-2810 820,-2810 830,-3083 830,-3083 830,-3083 837.944,-3089.81 847.849,-3098.3"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="845.835,-3101.18 855.705,-3105.03 850.391,-3095.87 845.835,-3101.18"/>
+</g>
+<!-- 5,2->5,1 -->
+<g id="edge488" class="edge"><title>5,2->5,1</title>
+<path fill="none" stroke="#218559" d="M843.417,-2504.33C831.062,-2504.77 817.424,-2504.86 804.535,-2504.61"/>
+<polygon fill="#218559" stroke="#218559" points="804.491,-2501.11 794.398,-2504.33 804.298,-2508.1 804.491,-2501.11"/>
+</g>
+<!-- 5,3 -->
+<g id="node81" class="node"><title>5,3</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-2498" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="1000.5" y="-2509.17" font-family="Times Roman,serif" font-size="10.00">5,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="998,-2490 998,-2504 1043,-2504 1043,-2490 998,-2490"/>
+<text text-anchor="start" x="1003.5" y="-2494.67" font-family="Times Roman,serif" font-size="10.00">CAACC</text>
+<polygon fill="#218559" stroke="#218559" points="998,-2476 998,-2490 1043,-2490 1043,-2476 998,-2476"/>
+<text text-anchor="start" x="1004" y="-2480.67" font-family="Times Roman,serif" font-size="10.00">GGTTG</text>
+</g>
+<!-- 5,2->5,3 -->
+<g id="edge484" class="edge"><title>5,2->5,3</title>
+<path fill="none" stroke="#dd1e2f" d="M928.398,-2491.67C940.741,-2491.23 954.377,-2491.14 967.273,-2491.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="967.326,-2494.89 977.417,-2491.67 967.517,-2487.89 967.326,-2494.89"/>
+</g>
+<!-- 4,4 -->
+<g id="node87" class="node"><title>4,4</title>
+<ellipse fill="none" stroke="black" cx="1288" cy="-1444" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="1268.5" y="-1455.17" font-family="Times Roman,serif" font-size="10.00">4,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1266,-1436 1266,-1450 1311,-1450 1311,-1436 1266,-1436"/>
+<text text-anchor="start" x="1272" y="-1440.67" font-family="Times Roman,serif" font-size="10.00">GGTTG</text>
+<polygon fill="#218559" stroke="#218559" points="1266,-1422 1266,-1436 1311,-1436 1311,-1422 1266,-1422"/>
+<text text-anchor="start" x="1271.5" y="-1426.67" font-family="Times Roman,serif" font-size="10.00">CAACC</text>
+</g>
+<!-- 5,2->4,4 -->
+<g id="edge486" class="edge"><title>5,2->4,4</title>
+<path fill="none" stroke="#ebb035" d="M916.295,-2472.03C929.489,-2460.72 942,-2450 942,-2450 942,-2450 954,-2353 954,-2353 954,-2353 964,-2258 964,-2258 964,-2258 1210,-2211 1210,-2211 1210,-2211 1222,-2096 1222,-2096 1222,-2096 1268.27,-1638.87 1283.33,-1490.15"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1286.83,-1490.28 1284.36,-1479.98 1279.87,-1489.58 1286.83,-1490.28"/>
+</g>
+<!-- 5,3->5,2 -->
+<g id="edge494" class="edge"><title>5,3->5,2</title>
+<path fill="none" stroke="#218559" d="M977.417,-2504.33C965.062,-2504.77 951.424,-2504.86 938.535,-2504.61"/>
+<polygon fill="#218559" stroke="#218559" points="938.491,-2501.11 928.398,-2504.33 938.298,-2508.1 938.491,-2501.11"/>
+</g>
+<!-- 5,4 -->
+<g id="node82" class="node"><title>5,4</title>
+<ellipse fill="none" stroke="black" cx="1154" cy="-2498" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="1134.5" y="-2509.17" font-family="Times Roman,serif" font-size="10.00">5,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1132,-2490 1132,-2504 1177,-2504 1177,-2490 1132,-2490"/>
+<text text-anchor="start" x="1137.5" y="-2494.67" font-family="Times Roman,serif" font-size="10.00">AACCC</text>
+<polygon fill="#218559" stroke="#218559" points="1132,-2476 1132,-2490 1177,-2490 1177,-2476 1132,-2476"/>
+<text text-anchor="start" x="1138" y="-2480.67" font-family="Times Roman,serif" font-size="10.00">GGGTT</text>
+</g>
+<!-- 5,3->5,4 -->
+<g id="edge490" class="edge"><title>5,3->5,4</title>
+<path fill="none" stroke="#dd1e2f" d="M1062.4,-2491.67C1074.74,-2491.23 1088.38,-2491.14 1101.27,-2491.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1101.33,-2494.89 1111.42,-2491.67 1101.52,-2487.89 1101.33,-2494.89"/>
+</g>
+<!-- 6,1 -->
+<g id="node94" class="node"><title>6,1</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-3131" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="598.5" y="-3142.17" font-family="Times Roman,serif" font-size="10.00">6,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="596,-3123 596,-3137 641,-3137 641,-3123 596,-3123"/>
+<text text-anchor="start" x="602.5" y="-3127.67" font-family="Times Roman,serif" font-size="10.00">GTTGC</text>
+<polygon fill="#218559" stroke="#218559" points="596,-3109 596,-3123 641,-3123 641,-3109 596,-3109"/>
+<text text-anchor="start" x="601" y="-3113.67" font-family="Times Roman,serif" font-size="10.00">GCAAC</text>
+</g>
+<!-- 5,3->6,1 -->
+<g id="edge492" class="edge"><title>5,3->6,1</title>
+<path fill="none" stroke="#06a2cb" d="M988.4,-2522.83C975.697,-2532.81 964,-2542 964,-2542 964,-2542 942,-2577 942,-2577 942,-2577 830,-2615 830,-2615 830,-2615 808,-2636 808,-2636 808,-2636 696,-2692 696,-2692 696,-2692 686,-2881 686,-2881 686,-2881 674,-3083 674,-3083 674,-3083 666.056,-3089.81 656.151,-3098.3"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="653.609,-3095.87 648.295,-3105.03 658.165,-3101.18 653.609,-3095.87"/>
+</g>
+<!-- 5,4->5,3 -->
+<g id="edge498" class="edge"><title>5,4->5,3</title>
+<path fill="none" stroke="#218559" d="M1111.42,-2504.33C1099.06,-2504.77 1085.42,-2504.86 1072.54,-2504.61"/>
+<polygon fill="#218559" stroke="#218559" points="1072.49,-2501.11 1062.4,-2504.33 1072.3,-2508.1 1072.49,-2501.11"/>
+</g>
+<!-- 4,2 -->
+<g id="node85" class="node"><title>4,2</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-1444" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="1000.5" y="-1455.17" font-family="Times Roman,serif" font-size="10.00">4,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="998,-1436 998,-1450 1043,-1450 1043,-1436 998,-1436"/>
+<text text-anchor="start" x="1003.5" y="-1440.67" font-family="Times Roman,serif" font-size="10.00">AGGGT</text>
+<polygon fill="#218559" stroke="#218559" points="998,-1422 998,-1436 1043,-1436 1043,-1422 998,-1422"/>
+<text text-anchor="start" x="1004" y="-1426.67" font-family="Times Roman,serif" font-size="10.00">ACCCT</text>
+</g>
+<!-- 5,4->4,2 -->
+<g id="edge496" class="edge"><title>5,4->4,2</title>
+<path fill="none" stroke="#ebb035" d="M1142.38,-2463.14C1128.17,-2420.52 1106,-2354 1106,-2354 1106,-2354 1086,-1913 1086,-1913 1086,-1913 1076,-1684 1076,-1684 1076,-1684 1046.78,-1558.79 1030.56,-1489.25"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1033.94,-1488.36 1028.26,-1479.41 1027.13,-1489.95 1033.94,-1488.36"/>
+</g>
+<!-- 4,1 -->
+<g id="node84" class="node"><title>4,1</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-1444" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="866.5" y="-1455.17" font-family="Times Roman,serif" font-size="10.00">4,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="864,-1436 864,-1450 909,-1450 909,-1436 864,-1436"/>
+<text text-anchor="start" x="868" y="-1440.67" font-family="Times Roman,serif" font-size="10.00">GAGGG</text>
+<polygon fill="#218559" stroke="#218559" points="864,-1422 864,-1436 909,-1436 909,-1422 864,-1422"/>
+<text text-anchor="start" x="870.5" y="-1426.67" font-family="Times Roman,serif" font-size="10.00">CCCTC</text>
+</g>
+<!-- 4,1->4,2 -->
+<g id="edge64" class="edge"><title>4,1->4,2</title>
+<path fill="none" stroke="#dd1e2f" d="M928.398,-1437.67C940.741,-1437.23 954.377,-1437.14 967.273,-1437.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="967.326,-1440.89 977.417,-1437.67 967.517,-1433.89 967.326,-1440.89"/>
+</g>
+<!-- 4,2->5,4 -->
+<g id="edge68" class="edge"><title>4,2->5,4</title>
+<path fill="none" stroke="#ebb035" d="M1028.26,-1479.41C1043.84,-1546.17 1076,-1684 1076,-1684 1076,-1684 1088,-1915 1088,-1915 1088,-1915 1106,-2354 1106,-2354 1106,-2354 1125.09,-2411.26 1139.19,-2453.56"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1135.9,-2454.76 1142.38,-2463.14 1142.54,-2452.55 1135.9,-2454.76"/>
+</g>
+<!-- 4,2->4,1 -->
+<g id="edge70" class="edge"><title>4,2->4,1</title>
+<path fill="none" stroke="#218559" d="M977.417,-1450.33C965.062,-1450.77 951.424,-1450.86 938.535,-1450.61"/>
+<polygon fill="#218559" stroke="#218559" points="938.491,-1447.11 928.398,-1450.33 938.298,-1454.1 938.491,-1447.11"/>
+</g>
+<!-- 4,2->4,3 -->
+<g id="edge66" class="edge"><title>4,2->4,3</title>
+<path fill="none" stroke="#dd1e2f" d="M1062.4,-1437.67C1074.74,-1437.23 1088.38,-1437.14 1101.27,-1437.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1101.33,-1440.89 1111.42,-1437.67 1101.52,-1433.89 1101.33,-1440.89"/>
+</g>
+<!-- 4,3->3,4 -->
+<g id="edge74" class="edge"><title>4,3->3,4</title>
+<path fill="none" stroke="#218559" d="M1148.63,-1408C1136.11,-1323.98 1106,-1122 1106,-1122 1106,-1122 1088,-453 1088,-453 1088,-453 1045.65,-203.273 1027.79,-97.9643"/>
+<polygon fill="#218559" stroke="#218559" points="1031.22,-97.213 1026.09,-87.939 1024.32,-98.3834 1031.22,-97.213"/>
+</g>
+<!-- 4,3->4,2 -->
+<g id="edge76" class="edge"><title>4,3->4,2</title>
+<path fill="none" stroke="#218559" d="M1111.42,-1450.33C1099.06,-1450.77 1085.42,-1450.86 1072.54,-1450.61"/>
+<polygon fill="#218559" stroke="#218559" points="1072.49,-1447.11 1062.4,-1450.33 1072.3,-1454.1 1072.49,-1447.11"/>
+</g>
+<!-- 4,3->4,4 -->
+<g id="edge72" class="edge"><title>4,3->4,4</title>
+<path fill="none" stroke="#dd1e2f" d="M1196.4,-1437.67C1208.74,-1437.23 1222.38,-1437.14 1235.27,-1437.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1235.33,-1440.89 1245.42,-1437.67 1235.52,-1433.89 1235.33,-1440.89"/>
+</g>
+<!-- 4,4->5,2 -->
+<g id="edge78" class="edge"><title>4,4->5,2</title>
+<path fill="none" stroke="#ebb035" d="M1284.24,-1479.88C1268.66,-1628.5 1210,-2188 1210,-2188 1210,-2188 964,-2228 964,-2228 964,-2228 942,-2450 942,-2450 942,-2450 934.056,-2456.81 924.151,-2465.3"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="921.609,-2462.87 916.295,-2472.03 926.165,-2468.18 921.609,-2462.87"/>
+</g>
+<!-- 4,4->4,3 -->
+<g id="edge80" class="edge"><title>4,4->4,3</title>
+<path fill="none" stroke="#218559" d="M1245.42,-1450.33C1233.06,-1450.77 1219.42,-1450.86 1206.54,-1450.61"/>
+<polygon fill="#218559" stroke="#218559" points="1206.49,-1447.11 1196.4,-1450.33 1206.3,-1454.1 1206.49,-1447.11"/>
+</g>
+<!-- 7,1->24,2 -->
+<g id="edge504" class="edge"><title>7,1->24,2</title>
+<path fill="none" stroke="#06a2cb" d="M514.295,-4654.97C527.489,-4666.28 540,-4677 540,-4677 540,-4677 562,-5072 562,-5072 562,-5072 569.283,-5078.24 578.596,-5086.22"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="576.529,-5089.06 586.4,-5092.91 581.085,-5083.75 576.529,-5089.06"/>
+</g>
+<!-- 7,1->23,1 -->
+<g id="edge508" class="edge"><title>7,1->23,1</title>
+<path fill="none" stroke="#218559" d="M451.161,-4652.53C433.641,-4665.08 411.867,-4680.68 393.012,-4694.19"/>
+<polygon fill="#218559" stroke="#218559" points="390.948,-4691.36 384.857,-4700.03 395.025,-4697.05 390.948,-4691.36"/>
+</g>
+<!-- 7,1->7,2 -->
+<g id="edge500" class="edge"><title>7,1->7,2</title>
+<path fill="none" stroke="#dd1e2f" d="M526.398,-4622.67C538.741,-4622.23 552.377,-4622.14 565.273,-4622.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="565.326,-4625.89 575.417,-4622.67 565.517,-4618.89 565.326,-4625.89"/>
+</g>
+<!-- 8,3 -->
+<g id="node106" class="node"><title>8,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-5024" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="598.5" y="-5035.17" font-family="Times Roman,serif" font-size="10.00">8,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="596,-5016 596,-5030 641,-5030 641,-5016 596,-5016"/>
+<text text-anchor="start" x="602" y="-5020.67" font-family="Times Roman,serif" font-size="10.00">GAAAT</text>
+<polygon fill="#218559" stroke="#218559" points="596,-5002 596,-5016 641,-5016 641,-5002 596,-5002"/>
+<text text-anchor="start" x="604.5" y="-5006.67" font-family="Times Roman,serif" font-size="10.00">ATTTC</text>
+</g>
+<!-- 7,1->8,3 -->
+<g id="edge502" class="edge"><title>7,1->8,3</title>
+<path fill="none" stroke="#06a2cb" d="M513.64,-4602.54C527.072,-4590.54 540,-4579 540,-4579 540,-4579 550,-4340 550,-4340 550,-4340 552,-4340 552,-4340 552,-4340 562,-4773 562,-4773 562,-4773 591.791,-4906.53 607.915,-4978.8"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="604.502,-4979.57 610.095,-4988.57 611.334,-4978.05 604.502,-4979.57"/>
+</g>
+<!-- 18,2 -->
+<g id="node155" class="node"><title>18,2</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-4463" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-4474.17" font-family="Times Roman,serif" font-size="10.00">18,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-4455 325,-4469 376,-4469 376,-4455 325,-4455"/>
+<text text-anchor="start" x="336" y="-4459.67" font-family="Times Roman,serif" font-size="10.00">GTTTC</text>
+<polygon fill="#218559" stroke="#218559" points="325,-4441 325,-4455 376,-4455 376,-4441 325,-4441"/>
+<text text-anchor="start" x="333" y="-4445.67" font-family="Times Roman,serif" font-size="10.00">GAAAC</text>
+</g>
+<!-- 7,1->18,2 -->
+<g id="edge506" class="edge"><title>7,1->18,2</title>
+<path fill="none" stroke="#218559" d="M468.102,-4595.5C451.546,-4560.62 428,-4511 428,-4511 428,-4511 418,-4336 418,-4336 418,-4336 416,-4336 416,-4336 416,-4336 391.411,-4383.32 372.25,-4420.19"/>
+<polygon fill="#218559" stroke="#218559" points="369.019,-4418.81 367.514,-4429.3 375.231,-4422.04 369.019,-4418.81"/>
+</g>
+<!-- 7,2->24,1 -->
+<g id="edge514" class="edge"><title>7,2->24,1</title>
+<path fill="none" stroke="#06a2cb" d="M587.705,-4654.97C574.511,-4666.28 562,-4677 562,-4677 562,-4677 552,-4873 552,-4873 552,-4873 550,-4901 550,-4901 550,-4901 540,-5072 540,-5072 540,-5072 532.717,-5078.24 523.404,-5086.22"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="520.915,-5083.75 515.6,-5092.91 525.471,-5089.06 520.915,-5083.75"/>
+</g>
+<!-- 7,2->7,1 -->
+<g id="edge516" class="edge"><title>7,2->7,1</title>
+<path fill="none" stroke="#218559" d="M575.417,-4635.33C563.062,-4635.77 549.424,-4635.86 536.535,-4635.61"/>
+<polygon fill="#218559" stroke="#218559" points="536.491,-4632.11 526.398,-4635.33 536.298,-4639.1 536.491,-4632.11"/>
+</g>
+<!-- 7,3 -->
+<g id="node91" class="node"><title>7,3</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-4629" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="732.5" y="-4640.17" font-family="Times Roman,serif" font-size="10.00">7,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="730,-4621 730,-4635 775,-4635 775,-4621 730,-4621"/>
+<text text-anchor="start" x="736" y="-4625.67" font-family="Times Roman,serif" font-size="10.00">TCAGC</text>
+<polygon fill="#218559" stroke="#218559" points="730,-4607 730,-4621 775,-4621 775,-4607 730,-4607"/>
+<text text-anchor="start" x="735.5" y="-4611.67" font-family="Times Roman,serif" font-size="10.00">GCTGA</text>
+</g>
+<!-- 7,2->7,3 -->
+<g id="edge510" class="edge"><title>7,2->7,3</title>
+<path fill="none" stroke="#dd1e2f" d="M660.398,-4622.67C672.741,-4622.23 686.377,-4622.14 699.273,-4622.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="699.326,-4625.89 709.417,-4622.67 699.517,-4618.89 699.326,-4625.89"/>
+</g>
+<!-- 6,4 -->
+<g id="node97" class="node"><title>6,4</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-3131" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="1000.5" y="-3142.17" font-family="Times Roman,serif" font-size="10.00">6,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="998,-3123 998,-3137 1043,-3137 1043,-3123 998,-3123"/>
+<text text-anchor="start" x="1003.5" y="-3127.67" font-family="Times Roman,serif" font-size="10.00">GCTGA</text>
+<polygon fill="#218559" stroke="#218559" points="998,-3109 998,-3123 1043,-3123 1043,-3109 998,-3109"/>
+<text text-anchor="start" x="1004" y="-3113.67" font-family="Times Roman,serif" font-size="10.00">TCAGC</text>
+</g>
+<!-- 7,2->6,4 -->
+<g id="edge512" class="edge"><title>7,2->6,4</title>
+<path fill="none" stroke="#ebb035" d="M653.804,-4608.34C674.309,-4596.51 696,-4584 696,-4584 696,-4584 808,-4541 808,-4541 808,-4541 820,-4518 820,-4518 820,-4518 942,-4081 942,-4081 942,-4081 1000.55,-3367.83 1016.2,-3177.32"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1019.71,-3177.33 1017.04,-3167.08 1012.73,-3176.76 1019.71,-3177.33"/>
+</g>
+<!-- 7,3->7,2 -->
+<g id="edge522" class="edge"><title>7,3->7,2</title>
+<path fill="none" stroke="#218559" d="M709.417,-4635.33C697.062,-4635.77 683.424,-4635.86 670.535,-4635.61"/>
+<polygon fill="#218559" stroke="#218559" points="670.491,-4632.11 660.398,-4635.33 670.298,-4639.1 670.491,-4632.11"/>
+</g>
+<!-- 7,4 -->
+<g id="node92" class="node"><title>7,4</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-4629" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="866.5" y="-4640.17" font-family="Times Roman,serif" font-size="10.00">7,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="864,-4621 864,-4635 909,-4635 909,-4621 864,-4621"/>
+<text text-anchor="start" x="869" y="-4625.67" font-family="Times Roman,serif" font-size="10.00">CAGCA</text>
+<polygon fill="#218559" stroke="#218559" points="864,-4607 864,-4621 909,-4621 909,-4607 864,-4607"/>
+<text text-anchor="start" x="870" y="-4611.67" font-family="Times Roman,serif" font-size="10.00">TGCTG</text>
+</g>
+<!-- 7,3->7,4 -->
+<g id="edge518" class="edge"><title>7,3->7,4</title>
+<path fill="none" stroke="#dd1e2f" d="M794.398,-4622.67C806.741,-4622.23 820.377,-4622.14 833.273,-4622.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="833.326,-4625.89 843.417,-4622.67 833.517,-4618.89 833.326,-4625.89"/>
+</g>
+<!-- 7,3->8,1 -->
+<g id="edge520" class="edge"><title>7,3->8,1</title>
+<path fill="none" stroke="#06a2cb" d="M739.294,-4594.14C717.192,-4533.5 674,-4415 674,-4415 674,-4415 552,-4311 552,-4311 552,-4311 550,-4311 550,-4311 550,-4311 540,-4545 540,-4545 540,-4545 428,-4584 428,-4584 428,-4584 398,-4773 398,-4773 398,-4773 372.635,-4905.64 358.782,-4978.08"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="355.267,-4977.82 356.826,-4988.3 362.142,-4979.14 355.267,-4977.82"/>
+</g>
+<!-- 7,4->7,3 -->
+<g id="edge526" class="edge"><title>7,4->7,3</title>
+<path fill="none" stroke="#218559" d="M843.417,-4635.33C831.062,-4635.77 817.424,-4635.86 804.535,-4635.61"/>
+<polygon fill="#218559" stroke="#218559" points="804.491,-4632.11 794.398,-4635.33 804.298,-4639.1 804.491,-4632.11"/>
+</g>
+<!-- 6,2 -->
+<g id="node95" class="node"><title>6,2</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-3131" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="732.5" y="-3142.17" font-family="Times Roman,serif" font-size="10.00">6,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="730,-3123 730,-3137 775,-3137 775,-3123 730,-3123"/>
+<text text-anchor="start" x="737.5" y="-3127.67" font-family="Times Roman,serif" font-size="10.00">TTGCT</text>
+<polygon fill="#218559" stroke="#218559" points="730,-3109 730,-3123 775,-3123 775,-3109 730,-3109"/>
+<text text-anchor="start" x="735" y="-3113.67" font-family="Times Roman,serif" font-size="10.00">AGCAA</text>
+</g>
+<!-- 7,4->6,2 -->
+<g id="edge524" class="edge"><title>7,4->6,2</title>
+<path fill="none" stroke="#ebb035" d="M883.839,-4593.04C873.548,-4421.76 830,-3697 830,-3697 830,-3697 808,-3186 808,-3186 808,-3186 798.399,-3176.57 787.148,-3165.52"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="789.587,-3163.01 780,-3158.5 784.682,-3168 789.587,-3163.01"/>
+</g>
+<!-- 6,1->5,3 -->
+<g id="edge84" class="edge"><title>6,1->5,3</title>
+<path fill="none" stroke="#06a2cb" d="M648.295,-3105.03C661.489,-3093.72 674,-3083 674,-3083 674,-3083 696,-2650 696,-2650 696,-2650 818,-2587 818,-2587 818,-2587 942,-2543 942,-2543 942,-2543 957.73,-2533.93 975.061,-2523.93"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="977.283,-2526.69 984.196,-2518.66 973.785,-2520.62 977.283,-2526.69"/>
+</g>
+<!-- 6,1->6,2 -->
+<g id="edge82" class="edge"><title>6,1->6,2</title>
+<path fill="none" stroke="#dd1e2f" d="M660.398,-3124.67C672.741,-3124.23 686.377,-3124.14 699.273,-3124.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="699.326,-3127.89 709.417,-3124.67 699.517,-3120.89 699.326,-3127.89"/>
+</g>
+<!-- 6,2->7,4 -->
+<g id="edge88" class="edge"><title>6,2->7,4</title>
+<path fill="none" stroke="#ebb035" d="M790.372,-3147.93C805.796,-3154.73 820,-3161 820,-3161 820,-3161 830,-3697 830,-3697 830,-3697 871.947,-4395.11 883.232,-4582.94"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="879.745,-4583.26 883.839,-4593.04 886.733,-4582.84 879.745,-4583.26"/>
+</g>
+<!-- 6,2->6,1 -->
+<g id="edge90" class="edge"><title>6,2->6,1</title>
+<path fill="none" stroke="#218559" d="M709.417,-3137.33C697.062,-3137.77 683.424,-3137.86 670.535,-3137.61"/>
+<polygon fill="#218559" stroke="#218559" points="670.491,-3134.11 660.398,-3137.33 670.298,-3141.1 670.491,-3134.11"/>
+</g>
+<!-- 6,2->6,3 -->
+<g id="edge86" class="edge"><title>6,2->6,3</title>
+<path fill="none" stroke="#dd1e2f" d="M794.398,-3124.67C806.741,-3124.23 820.377,-3124.14 833.273,-3124.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="833.326,-3127.89 843.417,-3124.67 833.517,-3120.89 833.326,-3127.89"/>
+</g>
+<!-- 6,3->5,1 -->
+<g id="edge94" class="edge"><title>6,3->5,1</title>
+<path fill="none" stroke="#06a2cb" d="M855.705,-3105.03C842.511,-3093.72 830,-3083 830,-3083 830,-3083 820,-2834 820,-2834 820,-2834 808,-2692 808,-2692 808,-2692 781.55,-2600.37 765.006,-2543.06"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="768.277,-2541.77 762.141,-2533.13 761.552,-2543.71 768.277,-2541.77"/>
+</g>
+<!-- 6,3->6,2 -->
+<g id="edge96" class="edge"><title>6,3->6,2</title>
+<path fill="none" stroke="#218559" d="M843.417,-3137.33C831.062,-3137.77 817.424,-3137.86 804.535,-3137.61"/>
+<polygon fill="#218559" stroke="#218559" points="804.491,-3134.11 794.398,-3137.33 804.298,-3141.1 804.491,-3134.11"/>
+</g>
+<!-- 6,3->6,4 -->
+<g id="edge92" class="edge"><title>6,3->6,4</title>
+<path fill="none" stroke="#dd1e2f" d="M928.398,-3124.67C940.741,-3124.23 954.377,-3124.14 967.273,-3124.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="967.326,-3127.89 977.417,-3124.67 967.517,-3120.89 967.326,-3127.89"/>
+</g>
+<!-- 6,4->7,2 -->
+<g id="edge98" class="edge"><title>6,4->7,2</title>
+<path fill="none" stroke="#ebb035" d="M1015.06,-3166.84C997.916,-3291.25 942,-3697 942,-3697 942,-3697 820,-4503 820,-4503 820,-4503 684,-4551 684,-4551 684,-4551 674,-4579 674,-4579 674,-4579 665.629,-4586.47 655.36,-4595.64"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="652.768,-4593.26 647.64,-4602.54 657.43,-4598.49 652.768,-4593.26"/>
+</g>
+<!-- 6,4->6,3 -->
+<g id="edge100" class="edge"><title>6,4->6,3</title>
+<path fill="none" stroke="#218559" d="M977.417,-3137.33C965.062,-3137.77 951.424,-3137.86 938.535,-3137.61"/>
+<polygon fill="#218559" stroke="#218559" points="938.491,-3134.11 928.398,-3137.33 938.298,-3141.1 938.491,-3134.11"/>
+</g>
+<!-- 9,1 -->
+<g id="node99" class="node"><title>9,1</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-3483" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="464.5" y="-3494.17" font-family="Times Roman,serif" font-size="10.00">9,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="462,-3475 462,-3489 507,-3489 507,-3475 462,-3475"/>
+<text text-anchor="start" x="468" y="-3479.67" font-family="Times Roman,serif" font-size="10.00">CAGAT</text>
+<polygon fill="#218559" stroke="#218559" points="462,-3461 462,-3475 507,-3475 507,-3461 462,-3461"/>
+<text text-anchor="start" x="469" y="-3465.67" font-family="Times Roman,serif" font-size="10.00">ATCTG</text>
+</g>
+<!-- 9,2 -->
+<g id="node100" class="node"><title>9,2</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-3483" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="598.5" y="-3494.17" font-family="Times Roman,serif" font-size="10.00">9,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="596,-3475 596,-3489 641,-3489 641,-3475 596,-3475"/>
+<text text-anchor="start" x="603" y="-3479.67" font-family="Times Roman,serif" font-size="10.00">AGATT</text>
+<polygon fill="#218559" stroke="#218559" points="596,-3461 596,-3475 641,-3475 641,-3461 596,-3461"/>
+<text text-anchor="start" x="603.5" y="-3465.67" font-family="Times Roman,serif" font-size="10.00">AATCT</text>
+</g>
+<!-- 9,1->9,2 -->
+<g id="edge528" class="edge"><title>9,1->9,2</title>
+<path fill="none" stroke="#dd1e2f" d="M526.398,-3476.67C538.741,-3476.23 552.377,-3476.14 565.273,-3476.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="565.326,-3479.89 575.417,-3476.67 565.517,-3472.89 565.326,-3479.89"/>
+</g>
+<!-- 10,2 -->
+<g id="node115" class="node"><title>10,2</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-3035" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-3046.17" font-family="Times Roman,serif" font-size="10.00">10,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-3027 593,-3041 644,-3041 644,-3027 593,-3027"/>
+<text text-anchor="start" x="600.5" y="-3031.67" font-family="Times Roman,serif" font-size="10.00">GCAGA</text>
+<polygon fill="#218559" stroke="#218559" points="593,-3013 593,-3027 644,-3027 644,-3013 593,-3013"/>
+<text text-anchor="start" x="603" y="-3017.67" font-family="Times Roman,serif" font-size="10.00">TCTGC</text>
+</g>
+<!-- 9,1->10,2 -->
+<g id="edge530" class="edge"><title>9,1->10,2</title>
+<path fill="none" stroke="#218559" d="M493.883,-3447.53C509.959,-3389.82 540,-3282 540,-3282 540,-3282 552,-3197 552,-3197 552,-3197 562,-3083 562,-3083 562,-3083 569.283,-3076.76 578.596,-3068.78"/>
+<polygon fill="#218559" stroke="#218559" points="581.085,-3071.25 586.4,-3062.09 576.529,-3065.94 581.085,-3071.25"/>
+</g>
+<!-- 9,2->9,1 -->
+<g id="edge536" class="edge"><title>9,2->9,1</title>
+<path fill="none" stroke="#218559" d="M575.417,-3489.33C563.062,-3489.77 549.424,-3489.86 536.535,-3489.61"/>
+<polygon fill="#218559" stroke="#218559" points="536.491,-3486.11 526.398,-3489.33 536.298,-3493.1 536.491,-3486.11"/>
+</g>
+<!-- 9,3 -->
+<g id="node101" class="node"><title>9,3</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-3483" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="732.5" y="-3494.17" font-family="Times Roman,serif" font-size="10.00">9,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="730,-3475 730,-3489 775,-3489 775,-3475 730,-3475"/>
+<text text-anchor="start" x="738" y="-3479.67" font-family="Times Roman,serif" font-size="10.00">GATTT</text>
+<polygon fill="#218559" stroke="#218559" points="730,-3461 730,-3475 775,-3475 775,-3461 730,-3461"/>
+<text text-anchor="start" x="736.5" y="-3465.67" font-family="Times Roman,serif" font-size="10.00">AAATC</text>
+</g>
+<!-- 9,2->9,3 -->
+<g id="edge532" class="edge"><title>9,2->9,3</title>
+<path fill="none" stroke="#dd1e2f" d="M660.398,-3476.67C672.741,-3476.23 686.377,-3476.14 699.273,-3476.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="699.326,-3479.89 709.417,-3476.67 699.517,-3472.89 699.326,-3479.89"/>
+</g>
+<!-- 8,4 -->
+<g id="node107" class="node"><title>8,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-5024" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="732.5" y="-5035.17" font-family="Times Roman,serif" font-size="10.00">8,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="730,-5016 730,-5030 775,-5030 775,-5016 730,-5016"/>
+<text text-anchor="start" x="736.5" y="-5020.67" font-family="Times Roman,serif" font-size="10.00">AAATC</text>
+<polygon fill="#218559" stroke="#218559" points="730,-5002 730,-5016 775,-5016 775,-5002 730,-5002"/>
+<text text-anchor="start" x="738" y="-5006.67" font-family="Times Roman,serif" font-size="10.00">GATTT</text>
+</g>
+<!-- 9,2->8,4 -->
+<g id="edge534" class="edge"><title>9,2->8,4</title>
+<path fill="none" stroke="#ebb035" d="M633.898,-3516.5C650.454,-3551.38 674,-3601 674,-3601 674,-3601 686,-3822 686,-3822 686,-3822 696,-4773 696,-4773 696,-4773 725.791,-4906.53 741.915,-4978.8"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="738.502,-4979.57 744.095,-4988.57 745.334,-4978.05 738.502,-4979.57"/>
+</g>
+<!-- 9,3->9,2 -->
+<g id="edge542" class="edge"><title>9,3->9,2</title>
+<path fill="none" stroke="#218559" d="M709.417,-3489.33C697.062,-3489.77 683.424,-3489.86 670.535,-3489.61"/>
+<polygon fill="#218559" stroke="#218559" points="670.491,-3486.11 660.398,-3489.33 670.298,-3493.1 670.491,-3486.11"/>
+</g>
+<!-- 9,4 -->
+<g id="node102" class="node"><title>9,4</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-3483" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="866.5" y="-3494.17" font-family="Times Roman,serif" font-size="10.00">9,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="864,-3475 864,-3489 909,-3489 909,-3475 864,-3475"/>
+<text text-anchor="start" x="872.5" y="-3479.67" font-family="Times Roman,serif" font-size="10.00">ATTTC</text>
+<polygon fill="#218559" stroke="#218559" points="864,-3461 864,-3475 909,-3475 909,-3461 864,-3461"/>
+<text text-anchor="start" x="870" y="-3465.67" font-family="Times Roman,serif" font-size="10.00">GAAAT</text>
+</g>
+<!-- 9,3->9,4 -->
+<g id="edge538" class="edge"><title>9,3->9,4</title>
+<path fill="none" stroke="#dd1e2f" d="M794.398,-3476.67C806.741,-3476.23 820.377,-3476.14 833.273,-3476.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="833.326,-3479.89 843.417,-3476.67 833.517,-3472.89 833.326,-3479.89"/>
+</g>
+<!-- 10,4 -->
+<g id="node117" class="node"><title>10,4</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-3035" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-3046.17" font-family="Times Roman,serif" font-size="10.00">10,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-3027 861,-3041 912,-3041 912,-3027 861,-3027"/>
+<text text-anchor="start" x="871" y="-3031.67" font-family="Times Roman,serif" font-size="10.00">AGATT</text>
+<polygon fill="#218559" stroke="#218559" points="861,-3013 861,-3027 912,-3027 912,-3013 861,-3013"/>
+<text text-anchor="start" x="871.5" y="-3017.67" font-family="Times Roman,serif" font-size="10.00">AATCT</text>
+</g>
+<!-- 9,3->10,4 -->
+<g id="edge540" class="edge"><title>9,3->10,4</title>
+<path fill="none" stroke="#218559" d="M761.883,-3447.53C777.959,-3389.82 808,-3282 808,-3282 808,-3282 820,-3197 820,-3197 820,-3197 830,-3083 830,-3083 830,-3083 837.283,-3076.76 846.596,-3068.78"/>
+<polygon fill="#218559" stroke="#218559" points="849.085,-3071.25 854.4,-3062.09 844.529,-3065.94 849.085,-3071.25"/>
+</g>
+<!-- 9,4->9,3 -->
+<g id="edge546" class="edge"><title>9,4->9,3</title>
+<path fill="none" stroke="#218559" d="M843.417,-3489.33C831.062,-3489.77 817.424,-3489.86 804.535,-3489.61"/>
+<polygon fill="#218559" stroke="#218559" points="804.491,-3486.11 794.398,-3489.33 804.298,-3493.1 804.491,-3486.11"/>
+</g>
+<!-- 8,2 -->
+<g id="node105" class="node"><title>8,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-5024" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="464.5" y="-5035.17" font-family="Times Roman,serif" font-size="10.00">8,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="462,-5016 462,-5030 507,-5030 507,-5016 462,-5016"/>
+<text text-anchor="start" x="467.5" y="-5020.67" font-family="Times Roman,serif" font-size="10.00">TGAAA</text>
+<polygon fill="#218559" stroke="#218559" points="462,-5002 462,-5016 507,-5016 507,-5002 462,-5002"/>
+<text text-anchor="start" x="470" y="-5006.67" font-family="Times Roman,serif" font-size="10.00">TTTCA</text>
+</g>
+<!-- 9,4->8,2 -->
+<g id="edge544" class="edge"><title>9,4->8,2</title>
+<path fill="none" stroke="#ebb035" d="M870.102,-3516.5C853.546,-3551.38 830,-3601 830,-3601 830,-3601 808,-3996 808,-3996 808,-3996 674,-4511 674,-4511 674,-4511 562,-4541 562,-4541 562,-4541 540,-4855 540,-4855 540,-4855 515.396,-4929.25 498.805,-4979.32"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="495.426,-4978.39 495.603,-4988.98 502.071,-4980.59 495.426,-4978.39"/>
+</g>
+<!-- 8,1->24,1 -->
+<g id="edge102" class="edge"><title>8,1->24,1</title>
+<path fill="none" stroke="#dd1e2f" d="M382.78,-5047.48C400.328,-5060.06 422.158,-5075.7 441.047,-5089.23"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="439.049,-5092.1 449.217,-5095.08 443.126,-5086.41 439.049,-5092.1"/>
+</g>
+<!-- 8,1->7,3 -->
+<g id="edge106" class="edge"><title>8,1->7,3</title>
+<path fill="none" stroke="#06a2cb" d="M356.826,-4988.3C370.078,-4919.01 398,-4773 398,-4773 398,-4773 428,-4551 428,-4551 428,-4551 540,-4511 540,-4511 540,-4511 550,-4259 550,-4259 550,-4259 562,-4248 562,-4248 562,-4248 674,-4162 674,-4162 674,-4162 684,-3800 684,-3800 684,-3800 686,-3800 686,-3800 686,-3800 696,-3996 696,-3996 696,-3996 734.995,-4436.78 747.905,-4582.71"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="744.42,-4583.04 748.788,-4592.69 751.393,-4582.42 744.42,-4583.04"/>
+</g>
+<!-- 8,1->8,2 -->
+<g id="edge104" class="edge"><title>8,1->8,2</title>
+<path fill="none" stroke="#dd1e2f" d="M392.398,-5017.67C404.741,-5017.23 418.377,-5017.14 431.273,-5017.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="431.326,-5020.89 441.417,-5017.67 431.517,-5013.89 431.326,-5020.89"/>
+</g>
+<!-- 8,2->9,4 -->
+<g id="edge110" class="edge"><title>8,2->9,4</title>
+<path fill="none" stroke="#ebb035" d="M495.845,-4988.99C515.327,-4931.4 552,-4823 552,-4823 552,-4823 562,-4581 562,-4581 562,-4581 674,-4541 674,-4541 674,-4541 696,-4504 696,-4504 696,-4504 820,-4465 820,-4465 820,-4465 830,-3601 830,-3601 830,-3601 849.673,-3559.55 865.699,-3525.78"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="868.977,-3527.03 870.102,-3516.5 862.653,-3524.03 868.977,-3527.03"/>
+</g>
+<!-- 8,2->8,1 -->
+<g id="edge112" class="edge"><title>8,2->8,1</title>
+<path fill="none" stroke="#218559" d="M441.417,-5030.33C429.062,-5030.77 415.424,-5030.86 402.535,-5030.61"/>
+<polygon fill="#218559" stroke="#218559" points="402.491,-5027.11 392.398,-5030.33 402.298,-5034.1 402.491,-5027.11"/>
+</g>
+<!-- 8,2->8,3 -->
+<g id="edge108" class="edge"><title>8,2->8,3</title>
+<path fill="none" stroke="#dd1e2f" d="M526.398,-5017.67C538.741,-5017.23 552.377,-5017.14 565.273,-5017.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="565.326,-5020.89 575.417,-5017.67 565.517,-5013.89 565.326,-5020.89"/>
+</g>
+<!-- 8,3->7,1 -->
+<g id="edge116" class="edge"><title>8,3->7,1</title>
+<path fill="none" stroke="#06a2cb" d="M610.095,-4988.57C594.659,-4919.38 562,-4773 562,-4773 562,-4773 552,-4315 552,-4315 552,-4315 550,-4315 550,-4315 550,-4315 540,-4579 540,-4579 540,-4579 531.629,-4586.47 521.36,-4595.64"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="518.768,-4593.26 513.64,-4602.54 523.43,-4598.49 518.768,-4593.26"/>
+</g>
+<!-- 8,3->8,2 -->
+<g id="edge120" class="edge"><title>8,3->8,2</title>
+<path fill="none" stroke="#218559" d="M575.417,-5030.33C563.062,-5030.77 549.424,-5030.86 536.535,-5030.61"/>
+<polygon fill="#218559" stroke="#218559" points="536.491,-5027.11 526.398,-5030.33 536.298,-5034.1 536.491,-5027.11"/>
+</g>
+<!-- 8,3->8,4 -->
+<g id="edge114" class="edge"><title>8,3->8,4</title>
+<path fill="none" stroke="#dd1e2f" d="M660.398,-5017.67C672.741,-5017.23 686.377,-5017.14 699.273,-5017.39"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="699.326,-5020.89 709.417,-5017.67 699.517,-5013.89 699.326,-5020.89"/>
+</g>
+<!-- 8,3->19,4 -->
+<g id="edge118" class="edge"><title>8,3->19,4</title>
+<path fill="none" stroke="#218559" d="M587.705,-5049.97C574.511,-5061.28 562,-5072 562,-5072 562,-5072 552,-5193 552,-5193 552,-5193 545.818,-5195.09 537.182,-5198.01"/>
+<polygon fill="#218559" stroke="#218559" points="535.802,-5194.78 527.45,-5201.3 538.045,-5201.41 535.802,-5194.78"/>
+</g>
+<!-- 8,4->9,2 -->
+<g id="edge122" class="edge"><title>8,4->9,2</title>
+<path fill="none" stroke="#ebb035" d="M744.095,-4988.57C728.659,-4919.38 696,-4773 696,-4773 696,-4773 686,-3557 686,-3557 686,-3557 668.526,-3537.98 651.293,-3519.23"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="653.829,-3516.82 644.485,-3511.82 648.674,-3521.55 653.829,-3516.82"/>
+</g>
+<!-- 8,4->8,3 -->
+<g id="edge124" class="edge"><title>8,4->8,3</title>
+<path fill="none" stroke="#218559" d="M709.417,-5030.33C697.062,-5030.77 683.424,-5030.86 670.535,-5030.61"/>
+<polygon fill="#218559" stroke="#218559" points="670.491,-5027.11 660.398,-5030.33 670.298,-5034.1 670.491,-5027.11"/>
+</g>
+<!-- 11,1 -->
+<g id="node109" class="node"><title>11,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-660" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-671.167" font-family="Times Roman,serif" font-size="10.00">11,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-652 325,-666 376,-666 376,-652 325,-652"/>
+<text text-anchor="start" x="333.5" y="-656.667" font-family="Times Roman,serif" font-size="10.00">CTGGC</text>
+<polygon fill="#218559" stroke="#218559" points="325,-638 325,-652 376,-652 376,-638 325,-638"/>
+<text text-anchor="start" x="332.5" y="-642.667" font-family="Times Roman,serif" font-size="10.00">GCCAG</text>
+</g>
+<!-- 11,2 -->
+<g id="node110" class="node"><title>11,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-660" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-671.167" font-family="Times Roman,serif" font-size="10.00">11,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-652 459,-666 510,-666 510,-652 459,-652"/>
+<text text-anchor="start" x="467.5" y="-656.667" font-family="Times Roman,serif" font-size="10.00">TGGCA</text>
+<polygon fill="#218559" stroke="#218559" points="459,-638 459,-652 510,-652 510,-638 459,-638"/>
+<text text-anchor="start" x="467.5" y="-642.667" font-family="Times Roman,serif" font-size="10.00">TGCCA</text>
+</g>
+<!-- 11,1->11,2 -->
+<g id="edge548" class="edge"><title>11,1->11,2</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-653.529C406.501,-653.254 416.748,-653.183 426.703,-653.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-656.817 436.933,-653.523 427.007,-649.819 426.864,-656.817"/>
+</g>
+<!-- 12,2 -->
+<g id="node125" class="node"><title>12,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-564" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-575.167" font-family="Times Roman,serif" font-size="10.00">12,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-556 459,-570 510,-570 510,-556 459,-556"/>
+<text text-anchor="start" x="468.5" y="-560.667" font-family="Times Roman,serif" font-size="10.00">TCTGG</text>
+<polygon fill="#218559" stroke="#218559" points="459,-542 459,-556 510,-556 510,-542 459,-542"/>
+<text text-anchor="start" x="467" y="-546.667" font-family="Times Roman,serif" font-size="10.00">CCAGA</text>
+</g>
+<!-- 11,1->12,2 -->
+<g id="edge550" class="edge"><title>11,1->12,2</title>
+<path fill="none" stroke="#218559" d="M387.58,-637.894C403.307,-628.643 418,-620 418,-620 418,-620 430.304,-609.561 444.286,-597.696"/>
+<polygon fill="#218559" stroke="#218559" points="446.799,-600.154 452.16,-591.016 442.27,-594.817 446.799,-600.154"/>
+</g>
+<!-- 11,2->11,1 -->
+<g id="edge556" class="edge"><title>11,2->11,1</title>
+<path fill="none" stroke="#218559" d="M436.933,-666.477C427.29,-666.749 417.041,-666.817 407.091,-666.684"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-663.18 396.867,-666.471 406.792,-670.179 406.937,-663.18"/>
+</g>
+<!-- 11,3 -->
+<g id="node111" class="node"><title>11,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-660" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-671.167" font-family="Times Roman,serif" font-size="10.00">11,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-652 593,-666 644,-666 644,-652 593,-652"/>
+<text text-anchor="start" x="600.5" y="-656.667" font-family="Times Roman,serif" font-size="10.00">GGCAG</text>
+<polygon fill="#218559" stroke="#218559" points="593,-638 593,-652 644,-652 644,-638 593,-638"/>
+<text text-anchor="start" x="601.5" y="-642.667" font-family="Times Roman,serif" font-size="10.00">CTGCC</text>
+</g>
+<!-- 11,2->11,3 -->
+<g id="edge554" class="edge"><title>11,2->11,3</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-653.529C540.501,-653.254 550.748,-653.183 560.703,-653.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-656.817 570.933,-653.523 561.007,-649.819 560.864,-656.817"/>
+</g>
+<!-- 10,1 -->
+<g id="node114" class="node"><title>10,1</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-3035" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-3046.17" font-family="Times Roman,serif" font-size="10.00">10,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-3027 459,-3041 510,-3041 510,-3027 459,-3027"/>
+<text text-anchor="start" x="466.5" y="-3031.67" font-family="Times Roman,serif" font-size="10.00">GGCAG</text>
+<polygon fill="#218559" stroke="#218559" points="459,-3013 459,-3027 510,-3027 510,-3013 459,-3013"/>
+<text text-anchor="start" x="467.5" y="-3017.67" font-family="Times Roman,serif" font-size="10.00">CTGCC</text>
+</g>
+<!-- 11,2->10,1 -->
+<g id="edge552" class="edge"><title>11,2->10,1</title>
+<path fill="none" stroke="#dd1e2f" d="M480.949,-696.137C467.359,-857.109 413,-1501 413,-1501 413,-1501 413,-2875 413,-2875 413,-2875 443.68,-2944.14 464.704,-2991.52"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="461.513,-2992.96 468.769,-3000.68 467.912,-2990.12 461.513,-2992.96"/>
+</g>
+<!-- 11,3->11,2 -->
+<g id="edge562" class="edge"><title>11,3->11,2</title>
+<path fill="none" stroke="#218559" d="M570.933,-666.477C561.29,-666.749 551.041,-666.817 541.091,-666.684"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-663.18 530.867,-666.471 540.792,-670.179 540.937,-663.18"/>
+</g>
+<!-- 11,4 -->
+<g id="node112" class="node"><title>11,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-660" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-671.167" font-family="Times Roman,serif" font-size="10.00">11,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-652 727,-666 778,-666 778,-652 727,-652"/>
+<text text-anchor="start" x="734.5" y="-656.667" font-family="Times Roman,serif" font-size="10.00">GCAGA</text>
+<polygon fill="#218559" stroke="#218559" points="727,-638 727,-652 778,-652 778,-638 727,-638"/>
+<text text-anchor="start" x="737" y="-642.667" font-family="Times Roman,serif" font-size="10.00">TCTGC</text>
+</g>
+<!-- 11,3->11,4 -->
+<g id="edge558" class="edge"><title>11,3->11,4</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-653.529C674.501,-653.254 684.748,-653.183 694.703,-653.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-656.817 704.933,-653.523 695.007,-649.819 694.864,-656.817"/>
+</g>
+<!-- 12,4 -->
+<g id="node127" class="node"><title>12,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-564" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-575.167" font-family="Times Roman,serif" font-size="10.00">12,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-556 727,-570 778,-570 778,-556 727,-556"/>
+<text text-anchor="start" x="735.5" y="-560.667" font-family="Times Roman,serif" font-size="10.00">TGGCA</text>
+<polygon fill="#218559" stroke="#218559" points="727,-542 727,-556 778,-556 778,-542 727,-542"/>
+<text text-anchor="start" x="735.5" y="-546.667" font-family="Times Roman,serif" font-size="10.00">TGCCA</text>
+</g>
+<!-- 11,3->12,4 -->
+<g id="edge560" class="edge"><title>11,3->12,4</title>
+<path fill="none" stroke="#218559" d="M655.58,-637.894C671.307,-628.643 686,-620 686,-620 686,-620 698.304,-609.561 712.286,-597.696"/>
+<polygon fill="#218559" stroke="#218559" points="714.799,-600.154 720.16,-591.016 710.27,-594.817 714.799,-600.154"/>
+</g>
+<!-- 11,4->11,3 -->
+<g id="edge566" class="edge"><title>11,4->11,3</title>
+<path fill="none" stroke="#218559" d="M704.933,-666.477C695.29,-666.749 685.041,-666.817 675.091,-666.684"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-663.18 664.867,-666.471 674.792,-670.179 674.937,-663.18"/>
+</g>
+<!-- 10,3 -->
+<g id="node116" class="node"><title>10,3</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-3035" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-3046.17" font-family="Times Roman,serif" font-size="10.00">10,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-3027 727,-3041 778,-3041 778,-3027 727,-3027"/>
+<text text-anchor="start" x="736" y="-3031.67" font-family="Times Roman,serif" font-size="10.00">CAGAT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-3013 727,-3027 778,-3027 778,-3013 727,-3013"/>
+<text text-anchor="start" x="737" y="-3017.67" font-family="Times Roman,serif" font-size="10.00">ATCTG</text>
+</g>
+<!-- 11,4->10,3 -->
+<g id="edge564" class="edge"><title>11,4->10,3</title>
+<path fill="none" stroke="#dd1e2f" d="M720.07,-687.164C702.813,-701.845 685,-717 685,-717 685,-717 685,-2875 685,-2875 685,-2875 713.831,-2943.85 733.667,-2991.22"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="730.536,-2992.8 737.627,-3000.68 736.993,-2990.1 730.536,-2992.8"/>
+</g>
+<!-- 10,1->11,2 -->
+<g id="edge128" class="edge"><title>10,1->11,2</title>
+<path fill="none" stroke="#218559" d="M468.769,-3000.68C447.907,-2953.66 413,-2875 413,-2875 413,-2875 413,-1501 413,-1501 413,-1501 465.257,-882.016 480.107,-706.113"/>
+<polygon fill="#218559" stroke="#218559" points="483.595,-706.396 480.949,-696.137 476.62,-705.807 483.595,-706.396"/>
+</g>
+<!-- 10,1->10,2 -->
+<g id="edge126" class="edge"><title>10,1->10,2</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-3028.53C540.501,-3028.25 550.748,-3028.18 560.703,-3028.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-3031.82 570.933,-3028.52 561.007,-3024.82 560.864,-3031.82"/>
+</g>
+<!-- 10,2->9,1 -->
+<g id="edge130" class="edge"><title>10,2->9,1</title>
+<path fill="none" stroke="#dd1e2f" d="M586.4,-3062.09C573.697,-3072.97 562,-3083 562,-3083 562,-3083 540,-3282 540,-3282 540,-3282 513.155,-3378.35 496.638,-3437.64"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="493.195,-3436.96 493.883,-3447.53 499.938,-3438.83 493.195,-3436.96"/>
+</g>
+<!-- 10,2->10,1 -->
+<g id="edge134" class="edge"><title>10,2->10,1</title>
+<path fill="none" stroke="#218559" d="M570.933,-3041.48C561.29,-3041.75 551.041,-3041.82 541.091,-3041.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-3038.18 530.867,-3041.47 540.792,-3045.18 540.937,-3038.18"/>
+</g>
+<!-- 10,2->10,3 -->
+<g id="edge132" class="edge"><title>10,2->10,3</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-3028.53C674.501,-3028.25 684.748,-3028.18 694.703,-3028.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-3031.82 704.933,-3028.52 695.007,-3024.82 694.864,-3031.82"/>
+</g>
+<!-- 10,3->11,4 -->
+<g id="edge138" class="edge"><title>10,3->11,4</title>
+<path fill="none" stroke="#218559" d="M737.627,-3000.68C717.94,-2953.66 685,-2875 685,-2875 685,-2875 685,-717 685,-717 685,-717 697.917,-706.011 712.409,-693.682"/>
+<polygon fill="#218559" stroke="#218559" points="714.721,-696.31 720.07,-687.164 710.185,-690.979 714.721,-696.31"/>
+</g>
+<!-- 10,3->10,2 -->
+<g id="edge140" class="edge"><title>10,3->10,2</title>
+<path fill="none" stroke="#218559" d="M704.933,-3041.48C695.29,-3041.75 685.041,-3041.82 675.091,-3041.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-3038.18 664.867,-3041.47 674.792,-3045.18 674.937,-3038.18"/>
+</g>
+<!-- 10,3->10,4 -->
+<g id="edge136" class="edge"><title>10,3->10,4</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-3028.53C808.501,-3028.25 818.748,-3028.18 828.703,-3028.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-3031.82 838.933,-3028.52 829.007,-3024.82 828.864,-3031.82"/>
+</g>
+<!-- 10,4->9,3 -->
+<g id="edge142" class="edge"><title>10,4->9,3</title>
+<path fill="none" stroke="#dd1e2f" d="M854.4,-3062.09C841.697,-3072.97 830,-3083 830,-3083 830,-3083 808,-3282 808,-3282 808,-3282 781.155,-3378.35 764.638,-3437.64"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="761.195,-3436.96 761.883,-3447.53 767.938,-3438.83 761.195,-3436.96"/>
+</g>
+<!-- 10,4->10,3 -->
+<g id="edge144" class="edge"><title>10,4->10,3</title>
+<path fill="none" stroke="#218559" d="M838.933,-3041.48C829.29,-3041.75 819.041,-3041.82 809.091,-3041.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-3038.18 798.867,-3041.47 808.792,-3045.18 808.937,-3038.18"/>
+</g>
+<!-- 13,1 -->
+<g id="node119" class="node"><title>13,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-468" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-479.167" font-family="Times Roman,serif" font-size="10.00">13,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-460 325,-474 376,-474 376,-460 325,-460"/>
+<text text-anchor="start" x="336" y="-464.667" font-family="Times Roman,serif" font-size="10.00">ATCTC</text>
+<polygon fill="#218559" stroke="#218559" points="325,-446 325,-460 376,-460 376,-446 325,-446"/>
+<text text-anchor="start" x="334" y="-450.667" font-family="Times Roman,serif" font-size="10.00">GAGAT</text>
+</g>
+<!-- 13,2 -->
+<g id="node120" class="node"><title>13,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-468" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-479.167" font-family="Times Roman,serif" font-size="10.00">13,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-460 459,-474 510,-474 510,-460 459,-460"/>
+<text text-anchor="start" x="470" y="-464.667" font-family="Times Roman,serif" font-size="10.00">TCTCT</text>
+<polygon fill="#218559" stroke="#218559" points="459,-446 459,-460 510,-460 510,-446 459,-446"/>
+<text text-anchor="start" x="467" y="-450.667" font-family="Times Roman,serif" font-size="10.00">AGAGA</text>
+</g>
+<!-- 13,1->13,2 -->
+<g id="edge568" class="edge"><title>13,1->13,2</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-461.529C406.501,-461.254 416.748,-461.183 426.703,-461.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-464.817 436.933,-461.523 427.007,-457.819 426.864,-464.817"/>
+</g>
+<!-- 14,2 -->
+<g id="node135" class="node"><title>14,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-372" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-383.167" font-family="Times Roman,serif" font-size="10.00">14,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-364 459,-378 510,-378 510,-364 459,-364"/>
+<text text-anchor="start" x="469.5" y="-368.667" font-family="Times Roman,serif" font-size="10.00">CATCT</text>
+<polygon fill="#218559" stroke="#218559" points="459,-350 459,-364 510,-364 510,-350 459,-350"/>
+<text text-anchor="start" x="468" y="-354.667" font-family="Times Roman,serif" font-size="10.00">AGATG</text>
+</g>
+<!-- 13,1->14,2 -->
+<g id="edge570" class="edge"><title>13,1->14,2</title>
+<path fill="none" stroke="#218559" d="M387.58,-445.894C403.307,-436.643 418,-428 418,-428 418,-428 430.304,-417.561 444.286,-405.696"/>
+<polygon fill="#218559" stroke="#218559" points="446.799,-408.154 452.16,-399.016 442.27,-402.817 446.799,-408.154"/>
+</g>
+<!-- 13,2->13,1 -->
+<g id="edge576" class="edge"><title>13,2->13,1</title>
+<path fill="none" stroke="#218559" d="M436.933,-474.477C427.29,-474.749 417.041,-474.817 407.091,-474.684"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-471.18 396.867,-474.471 406.792,-478.179 406.937,-471.18"/>
+</g>
+<!-- 13,3 -->
+<g id="node121" class="node"><title>13,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-468" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-479.167" font-family="Times Roman,serif" font-size="10.00">13,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-460 593,-474 644,-474 644,-460 593,-460"/>
+<text text-anchor="start" x="603" y="-464.667" font-family="Times Roman,serif" font-size="10.00">CTCTG</text>
+<polygon fill="#218559" stroke="#218559" points="593,-446 593,-460 644,-460 644,-446 593,-446"/>
+<text text-anchor="start" x="601" y="-450.667" font-family="Times Roman,serif" font-size="10.00">CAGAG</text>
+</g>
+<!-- 13,2->13,3 -->
+<g id="edge574" class="edge"><title>13,2->13,3</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-461.529C540.501,-461.254 550.748,-461.183 560.703,-461.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-464.817 570.933,-461.523 561.007,-457.819 560.864,-464.817"/>
+</g>
+<!-- 12,1 -->
+<g id="node124" class="node"><title>12,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-564" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-575.167" font-family="Times Roman,serif" font-size="10.00">12,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-556 325,-570 376,-570 376,-556 325,-556"/>
+<text text-anchor="start" x="335" y="-560.667" font-family="Times Roman,serif" font-size="10.00">CTCTG</text>
+<polygon fill="#218559" stroke="#218559" points="325,-542 325,-556 376,-556 376,-542 325,-542"/>
+<text text-anchor="start" x="333" y="-546.667" font-family="Times Roman,serif" font-size="10.00">CAGAG</text>
+</g>
+<!-- 13,2->12,1 -->
+<g id="edge572" class="edge"><title>13,2->12,1</title>
+<path fill="none" stroke="#dd1e2f" d="M452.16,-495.016C435.283,-509.336 418,-524 418,-524 418,-524 408.3,-529.706 396.227,-536.808"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="394.425,-533.807 387.58,-541.894 397.974,-539.841 394.425,-533.807"/>
+</g>
+<!-- 13,3->13,2 -->
+<g id="edge582" class="edge"><title>13,3->13,2</title>
+<path fill="none" stroke="#218559" d="M570.933,-474.477C561.29,-474.749 551.041,-474.817 541.091,-474.684"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-471.18 530.867,-474.471 540.792,-478.179 540.937,-471.18"/>
+</g>
+<!-- 13,4 -->
+<g id="node122" class="node"><title>13,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-468" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-479.167" font-family="Times Roman,serif" font-size="10.00">13,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-460 727,-474 778,-474 778,-460 727,-460"/>
+<text text-anchor="start" x="736.5" y="-464.667" font-family="Times Roman,serif" font-size="10.00">TCTGG</text>
+<polygon fill="#218559" stroke="#218559" points="727,-446 727,-460 778,-460 778,-446 727,-446"/>
+<text text-anchor="start" x="735" y="-450.667" font-family="Times Roman,serif" font-size="10.00">CCAGA</text>
+</g>
+<!-- 13,3->13,4 -->
+<g id="edge578" class="edge"><title>13,3->13,4</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-461.529C674.501,-461.254 684.748,-461.183 694.703,-461.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-464.817 704.933,-461.523 695.007,-457.819 694.864,-464.817"/>
+</g>
+<!-- 14,4 -->
+<g id="node137" class="node"><title>14,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-372" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-383.167" font-family="Times Roman,serif" font-size="10.00">14,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-364 727,-378 778,-378 778,-364 727,-364"/>
+<text text-anchor="start" x="738" y="-368.667" font-family="Times Roman,serif" font-size="10.00">TCTCT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-350 727,-364 778,-364 778,-350 727,-350"/>
+<text text-anchor="start" x="735" y="-354.667" font-family="Times Roman,serif" font-size="10.00">AGAGA</text>
+</g>
+<!-- 13,3->14,4 -->
+<g id="edge580" class="edge"><title>13,3->14,4</title>
+<path fill="none" stroke="#218559" d="M655.58,-445.894C671.307,-436.643 686,-428 686,-428 686,-428 698.304,-417.561 712.286,-405.696"/>
+<polygon fill="#218559" stroke="#218559" points="714.799,-408.154 720.16,-399.016 710.27,-402.817 714.799,-408.154"/>
+</g>
+<!-- 13,4->13,3 -->
+<g id="edge586" class="edge"><title>13,4->13,3</title>
+<path fill="none" stroke="#218559" d="M704.933,-474.477C695.29,-474.749 685.041,-474.817 675.091,-474.684"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-471.18 664.867,-474.471 674.792,-478.179 674.937,-471.18"/>
+</g>
+<!-- 12,3 -->
+<g id="node126" class="node"><title>12,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-564" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-575.167" font-family="Times Roman,serif" font-size="10.00">12,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-556 593,-570 644,-570 644,-556 593,-556"/>
+<text text-anchor="start" x="601.5" y="-560.667" font-family="Times Roman,serif" font-size="10.00">CTGGC</text>
+<polygon fill="#218559" stroke="#218559" points="593,-542 593,-556 644,-556 644,-542 593,-542"/>
+<text text-anchor="start" x="600.5" y="-546.667" font-family="Times Roman,serif" font-size="10.00">GCCAG</text>
+</g>
+<!-- 13,4->12,3 -->
+<g id="edge584" class="edge"><title>13,4->12,3</title>
+<path fill="none" stroke="#dd1e2f" d="M720.16,-495.016C703.283,-509.336 686,-524 686,-524 686,-524 676.3,-529.706 664.227,-536.808"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="662.425,-533.807 655.58,-541.894 665.974,-539.841 662.425,-533.807"/>
+</g>
+<!-- 12,1->13,2 -->
+<g id="edge148" class="edge"><title>12,1->13,2</title>
+<path fill="none" stroke="#218559" d="M384.857,-539.028C402.072,-526.694 422.998,-511.703 441.18,-498.677"/>
+<polygon fill="#218559" stroke="#218559" points="443.227,-501.516 449.317,-492.847 439.15,-495.826 443.227,-501.516"/>
+</g>
+<!-- 12,1->12,2 -->
+<g id="edge146" class="edge"><title>12,1->12,2</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-557.529C406.501,-557.254 416.748,-557.183 426.703,-557.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-560.817 436.933,-557.523 427.007,-553.819 426.864,-560.817"/>
+</g>
+<!-- 12,2->11,1 -->
+<g id="edge150" class="edge"><title>12,2->11,1</title>
+<path fill="none" stroke="#dd1e2f" d="M449.317,-588.847C432.127,-601.162 411.205,-616.152 393.005,-629.191"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="390.948,-626.359 384.857,-635.028 395.025,-632.049 390.948,-626.359"/>
+</g>
+<!-- 12,2->12,1 -->
+<g id="edge154" class="edge"><title>12,2->12,1</title>
+<path fill="none" stroke="#218559" d="M436.933,-570.477C427.29,-570.749 417.041,-570.817 407.091,-570.684"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-567.18 396.867,-570.471 406.792,-574.179 406.937,-567.18"/>
+</g>
+<!-- 12,2->12,3 -->
+<g id="edge152" class="edge"><title>12,2->12,3</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-557.529C540.501,-557.254 550.748,-557.183 560.703,-557.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-560.817 570.933,-557.523 561.007,-553.819 560.864,-560.817"/>
+</g>
+<!-- 12,3->13,4 -->
+<g id="edge158" class="edge"><title>12,3->13,4</title>
+<path fill="none" stroke="#218559" d="M652.857,-539.028C670.072,-526.694 690.998,-511.703 709.18,-498.677"/>
+<polygon fill="#218559" stroke="#218559" points="711.227,-501.516 717.317,-492.847 707.15,-495.826 711.227,-501.516"/>
+</g>
+<!-- 12,3->12,2 -->
+<g id="edge160" class="edge"><title>12,3->12,2</title>
+<path fill="none" stroke="#218559" d="M570.933,-570.477C561.29,-570.749 551.041,-570.817 541.091,-570.684"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-567.18 530.867,-570.471 540.792,-574.179 540.937,-567.18"/>
+</g>
+<!-- 12,3->12,4 -->
+<g id="edge156" class="edge"><title>12,3->12,4</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-557.529C674.501,-557.254 684.748,-557.183 694.703,-557.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-560.817 704.933,-557.523 695.007,-553.819 694.864,-560.817"/>
+</g>
+<!-- 12,4->11,3 -->
+<g id="edge162" class="edge"><title>12,4->11,3</title>
+<path fill="none" stroke="#dd1e2f" d="M717.317,-588.847C700.127,-601.162 679.205,-616.152 661.005,-629.191"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="658.948,-626.359 652.857,-635.028 663.025,-632.049 658.948,-626.359"/>
+</g>
+<!-- 12,4->12,3 -->
+<g id="edge164" class="edge"><title>12,4->12,3</title>
+<path fill="none" stroke="#218559" d="M704.933,-570.477C695.29,-570.749 685.041,-570.817 675.091,-570.684"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-567.18 664.867,-570.471 674.792,-574.179 674.937,-567.18"/>
+</g>
+<!-- 15,1 -->
+<g id="node129" class="node"><title>15,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-276" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-287.167" font-family="Times Roman,serif" font-size="10.00">15,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-268 325,-282 376,-282 376,-268 325,-268"/>
+<text text-anchor="start" x="332.5" y="-272.667" font-family="Times Roman,serif" font-size="10.00">CGGCA</text>
+<polygon fill="#218559" stroke="#218559" points="325,-254 325,-268 376,-268 376,-254 325,-254"/>
+<text text-anchor="start" x="333.5" y="-258.667" font-family="Times Roman,serif" font-size="10.00">TGCCG</text>
+</g>
+<!-- 15,2 -->
+<g id="node130" class="node"><title>15,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-276" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-287.167" font-family="Times Roman,serif" font-size="10.00">15,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-268 459,-282 510,-282 510,-268 459,-268"/>
+<text text-anchor="start" x="468" y="-272.667" font-family="Times Roman,serif" font-size="10.00">GGCAT</text>
+<polygon fill="#218559" stroke="#218559" points="459,-254 459,-268 510,-268 510,-254 459,-254"/>
+<text text-anchor="start" x="468" y="-258.667" font-family="Times Roman,serif" font-size="10.00">ATGCC</text>
+</g>
+<!-- 15,1->15,2 -->
+<g id="edge588" class="edge"><title>15,1->15,2</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-269.529C406.501,-269.254 416.748,-269.183 426.703,-269.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-272.817 436.933,-269.523 427.007,-265.819 426.864,-272.817"/>
+</g>
+<!-- 16,2 -->
+<g id="node145" class="node"><title>16,2</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-148" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-159.167" font-family="Times Roman,serif" font-size="10.00">16,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-140 459,-154 510,-154 510,-140 459,-140"/>
+<text text-anchor="start" x="466.5" y="-144.667" font-family="Times Roman,serif" font-size="10.00">ACGGC</text>
+<polygon fill="#218559" stroke="#218559" points="459,-126 459,-140 510,-140 510,-126 459,-126"/>
+<text text-anchor="start" x="467.5" y="-130.667" font-family="Times Roman,serif" font-size="10.00">GCCGT</text>
+</g>
+<!-- 15,1->16,2 -->
+<g id="edge590" class="edge"><title>15,1->16,2</title>
+<path fill="none" stroke="#218559" d="M384.398,-250.707C401.265,-238.305 418,-226 418,-226 418,-226 434.96,-205.956 451.686,-186.19"/>
+<polygon fill="#218559" stroke="#218559" points="454.506,-188.275 458.294,-178.38 449.162,-183.753 454.506,-188.275"/>
+</g>
+<!-- 15,2->15,1 -->
+<g id="edge596" class="edge"><title>15,2->15,1</title>
+<path fill="none" stroke="#218559" d="M436.933,-282.477C427.29,-282.749 417.041,-282.817 407.091,-282.684"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-279.18 396.867,-282.471 406.792,-286.179 406.937,-279.18"/>
+</g>
+<!-- 15,3 -->
+<g id="node131" class="node"><title>15,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-276" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-287.167" font-family="Times Roman,serif" font-size="10.00">15,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-268 593,-282 644,-282 644,-268 593,-268"/>
+<text text-anchor="start" x="602.5" y="-272.667" font-family="Times Roman,serif" font-size="10.00">GCATC</text>
+<polygon fill="#218559" stroke="#218559" points="593,-254 593,-268 644,-268 644,-254 593,-254"/>
+<text text-anchor="start" x="602" y="-258.667" font-family="Times Roman,serif" font-size="10.00">GATGC</text>
+</g>
+<!-- 15,2->15,3 -->
+<g id="edge594" class="edge"><title>15,2->15,3</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-269.529C540.501,-269.254 550.748,-269.183 560.703,-269.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-272.817 570.933,-269.523 561.007,-265.819 560.864,-272.817"/>
+</g>
+<!-- 14,1 -->
+<g id="node134" class="node"><title>14,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-372" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-383.167" font-family="Times Roman,serif" font-size="10.00">14,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-364 325,-378 376,-378 376,-364 325,-364"/>
+<text text-anchor="start" x="334.5" y="-368.667" font-family="Times Roman,serif" font-size="10.00">GCATC</text>
+<polygon fill="#218559" stroke="#218559" points="325,-350 325,-364 376,-364 376,-350 325,-350"/>
+<text text-anchor="start" x="334" y="-354.667" font-family="Times Roman,serif" font-size="10.00">GATGC</text>
+</g>
+<!-- 15,2->14,1 -->
+<g id="edge592" class="edge"><title>15,2->14,1</title>
+<path fill="none" stroke="#dd1e2f" d="M452.16,-303.016C435.283,-317.336 418,-332 418,-332 418,-332 408.3,-337.706 396.227,-344.808"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="394.425,-341.807 387.58,-349.894 397.974,-347.841 394.425,-341.807"/>
+</g>
+<!-- 15,3->15,2 -->
+<g id="edge602" class="edge"><title>15,3->15,2</title>
+<path fill="none" stroke="#218559" d="M570.933,-282.477C561.29,-282.749 551.041,-282.817 541.091,-282.684"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-279.18 530.867,-282.471 540.792,-286.179 540.937,-279.18"/>
+</g>
+<!-- 15,4 -->
+<g id="node132" class="node"><title>15,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-276" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-287.167" font-family="Times Roman,serif" font-size="10.00">15,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-268 727,-282 778,-282 778,-268 727,-268"/>
+<text text-anchor="start" x="737.5" y="-272.667" font-family="Times Roman,serif" font-size="10.00">CATCT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-254 727,-268 778,-268 778,-254 727,-254"/>
+<text text-anchor="start" x="736" y="-258.667" font-family="Times Roman,serif" font-size="10.00">AGATG</text>
+</g>
+<!-- 15,3->15,4 -->
+<g id="edge598" class="edge"><title>15,3->15,4</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-269.529C674.501,-269.254 684.748,-269.183 694.703,-269.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-272.817 704.933,-269.523 695.007,-265.819 694.864,-272.817"/>
+</g>
+<!-- 16,4 -->
+<g id="node147" class="node"><title>16,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-148" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-159.167" font-family="Times Roman,serif" font-size="10.00">16,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-140 727,-154 778,-154 778,-140 727,-140"/>
+<text text-anchor="start" x="736" y="-144.667" font-family="Times Roman,serif" font-size="10.00">GGCAT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-126 727,-140 778,-140 778,-126 727,-126"/>
+<text text-anchor="start" x="736" y="-130.667" font-family="Times Roman,serif" font-size="10.00">ATGCC</text>
+</g>
+<!-- 15,3->16,4 -->
+<g id="edge600" class="edge"><title>15,3->16,4</title>
+<path fill="none" stroke="#218559" d="M652.398,-250.707C669.265,-238.305 686,-226 686,-226 686,-226 702.96,-205.956 719.686,-186.19"/>
+<polygon fill="#218559" stroke="#218559" points="722.506,-188.275 726.294,-178.38 717.162,-183.753 722.506,-188.275"/>
+</g>
+<!-- 15,4->15,3 -->
+<g id="edge606" class="edge"><title>15,4->15,3</title>
+<path fill="none" stroke="#218559" d="M704.933,-282.477C695.29,-282.749 685.041,-282.817 675.091,-282.684"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-279.18 664.867,-282.471 674.792,-286.179 674.937,-279.18"/>
+</g>
+<!-- 14,3 -->
+<g id="node136" class="node"><title>14,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-372" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-383.167" font-family="Times Roman,serif" font-size="10.00">14,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-364 593,-378 644,-378 644,-364 593,-364"/>
+<text text-anchor="start" x="604" y="-368.667" font-family="Times Roman,serif" font-size="10.00">ATCTC</text>
+<polygon fill="#218559" stroke="#218559" points="593,-350 593,-364 644,-364 644,-350 593,-350"/>
+<text text-anchor="start" x="602" y="-354.667" font-family="Times Roman,serif" font-size="10.00">GAGAT</text>
+</g>
+<!-- 15,4->14,3 -->
+<g id="edge604" class="edge"><title>15,4->14,3</title>
+<path fill="none" stroke="#dd1e2f" d="M720.16,-303.016C703.283,-317.336 686,-332 686,-332 686,-332 676.3,-337.706 664.227,-344.808"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="662.425,-341.807 655.58,-349.894 665.974,-347.841 662.425,-341.807"/>
+</g>
+<!-- 14,1->15,2 -->
+<g id="edge168" class="edge"><title>14,1->15,2</title>
+<path fill="none" stroke="#218559" d="M384.857,-347.028C402.072,-334.694 422.998,-319.703 441.18,-306.677"/>
+<polygon fill="#218559" stroke="#218559" points="443.227,-309.516 449.317,-300.847 439.15,-303.826 443.227,-309.516"/>
+</g>
+<!-- 14,1->14,2 -->
+<g id="edge166" class="edge"><title>14,1->14,2</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-365.529C406.501,-365.254 416.748,-365.183 426.703,-365.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-368.817 436.933,-365.523 427.007,-361.819 426.864,-368.817"/>
+</g>
+<!-- 14,2->13,1 -->
+<g id="edge170" class="edge"><title>14,2->13,1</title>
+<path fill="none" stroke="#dd1e2f" d="M449.317,-396.847C432.127,-409.162 411.205,-424.152 393.005,-437.191"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="390.948,-434.359 384.857,-443.028 395.025,-440.049 390.948,-434.359"/>
+</g>
+<!-- 14,2->14,1 -->
+<g id="edge174" class="edge"><title>14,2->14,1</title>
+<path fill="none" stroke="#218559" d="M436.933,-378.477C427.29,-378.749 417.041,-378.817 407.091,-378.684"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-375.18 396.867,-378.471 406.792,-382.179 406.937,-375.18"/>
+</g>
+<!-- 14,2->14,3 -->
+<g id="edge172" class="edge"><title>14,2->14,3</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-365.529C540.501,-365.254 550.748,-365.183 560.703,-365.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-368.817 570.933,-365.523 561.007,-361.819 560.864,-368.817"/>
+</g>
+<!-- 14,3->15,4 -->
+<g id="edge178" class="edge"><title>14,3->15,4</title>
+<path fill="none" stroke="#218559" d="M652.857,-347.028C670.072,-334.694 690.998,-319.703 709.18,-306.677"/>
+<polygon fill="#218559" stroke="#218559" points="711.227,-309.516 717.317,-300.847 707.15,-303.826 711.227,-309.516"/>
+</g>
+<!-- 14,3->14,2 -->
+<g id="edge180" class="edge"><title>14,3->14,2</title>
+<path fill="none" stroke="#218559" d="M570.933,-378.477C561.29,-378.749 551.041,-378.817 541.091,-378.684"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-375.18 530.867,-378.471 540.792,-382.179 540.937,-375.18"/>
+</g>
+<!-- 14,3->14,4 -->
+<g id="edge176" class="edge"><title>14,3->14,4</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-365.529C674.501,-365.254 684.748,-365.183 694.703,-365.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-368.817 704.933,-365.523 695.007,-361.819 694.864,-368.817"/>
+</g>
+<!-- 14,4->13,3 -->
+<g id="edge182" class="edge"><title>14,4->13,3</title>
+<path fill="none" stroke="#dd1e2f" d="M717.317,-396.847C700.127,-409.162 679.205,-424.152 661.005,-437.191"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="658.948,-434.359 652.857,-443.028 663.025,-440.049 658.948,-434.359"/>
+</g>
+<!-- 14,4->14,3 -->
+<g id="edge184" class="edge"><title>14,4->14,3</title>
+<path fill="none" stroke="#218559" d="M704.933,-378.477C695.29,-378.749 685.041,-378.817 675.091,-378.684"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-375.18 664.867,-378.471 674.792,-382.179 674.937,-375.18"/>
+</g>
+<!-- 17,1->24,1 -->
+<g id="edge614" class="edge"><title>17,1->24,1</title>
+<path fill="none" stroke="#218559" d="M364.013,-3982.45C378.18,-4017.28 398,-4066 398,-4066 398,-4066 418,-4361 418,-4361 418,-4361 428,-5072 428,-5072 428,-5072 435.283,-5078.24 444.596,-5086.22"/>
+<polygon fill="#218559" stroke="#218559" points="442.529,-5089.06 452.4,-5092.91 447.085,-5083.75 442.529,-5089.06"/>
+</g>
+<!-- 17,1->23,2 -->
+<g id="edge610" class="edge"><title>17,1->23,2</title>
+<path fill="none" stroke="#06a2cb" d="M365.308,-3913.67C384.805,-3869.95 416,-3800 416,-3800 416,-3800 418,-3800 418,-3800 418,-3800 428,-4677 428,-4677 428,-4677 435.283,-4683.24 444.596,-4691.22"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="442.529,-4694.06 452.4,-4697.91 447.085,-4688.75 442.529,-4694.06"/>
+</g>
+<!-- 17,1->17,2 -->
+<g id="edge608" class="edge"><title>17,1->17,2</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-3941.53C406.501,-3941.25 416.748,-3941.18 426.703,-3941.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-3944.82 436.933,-3941.52 427.007,-3937.82 426.864,-3944.82"/>
+</g>
+<!-- 18,3 -->
+<g id="node156" class="node"><title>18,3</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-4463" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-4474.17" font-family="Times Roman,serif" font-size="10.00">18,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-4455 459,-4469 510,-4469 510,-4455 459,-4455"/>
+<text text-anchor="start" x="470" y="-4459.67" font-family="Times Roman,serif" font-size="10.00">TTTCA</text>
+<polygon fill="#218559" stroke="#218559" points="459,-4441 459,-4455 510,-4455 510,-4441 459,-4441"/>
+<text text-anchor="start" x="467.5" y="-4445.67" font-family="Times Roman,serif" font-size="10.00">TGAAA</text>
+</g>
+<!-- 17,1->18,3 -->
+<g id="edge612" class="edge"><title>17,1->18,3</title>
+<path fill="none" stroke="#06a2cb" d="M365.188,-3913.83C379.25,-3882.19 398,-3840 398,-3840 398,-3840 416,-3775 416,-3775 416,-3775 418,-3775 418,-3775 418,-3775 428,-4162 428,-4162 428,-4162 459.813,-4333 475.483,-4417.22"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="472.086,-4418.1 477.356,-4427.29 478.968,-4416.82 472.086,-4418.1"/>
+</g>
+<!-- 17,2->23,1 -->
+<g id="edge620" class="edge"><title>17,2->23,1</title>
+<path fill="none" stroke="#06a2cb" d="M453.052,-3975.63C440.1,-3987.2 428,-3998 428,-3998 428,-3998 416,-4190 416,-4190 416,-4190 398,-4511 398,-4511 398,-4511 374.399,-4616.22 360.296,-4679.1"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="356.808,-4678.66 358.034,-4689.18 363.638,-4680.19 356.808,-4678.66"/>
+</g>
+<!-- 17,2->17,1 -->
+<g id="edge622" class="edge"><title>17,2->17,1</title>
+<path fill="none" stroke="#218559" d="M436.933,-3954.48C427.29,-3954.75 417.041,-3954.82 407.091,-3954.68"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-3951.18 396.867,-3954.47 406.792,-3958.18 406.937,-3951.18"/>
+</g>
+<!-- 17,3 -->
+<g id="node141" class="node"><title>17,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-3948" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-3959.17" font-family="Times Roman,serif" font-size="10.00">17,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-3940 593,-3954 644,-3954 644,-3940 593,-3940"/>
+<text text-anchor="start" x="600.5" y="-3944.67" font-family="Times Roman,serif" font-size="10.00">AACGG</text>
+<polygon fill="#218559" stroke="#218559" points="593,-3926 593,-3940 644,-3940 644,-3926 593,-3926"/>
+<text text-anchor="start" x="603" y="-3930.67" font-family="Times Roman,serif" font-size="10.00">CCGTT</text>
+</g>
+<!-- 17,2->17,3 -->
+<g id="edge618" class="edge"><title>17,2->17,3</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-3941.53C540.501,-3941.25 550.748,-3941.18 560.703,-3941.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-3944.82 570.933,-3941.52 561.007,-3937.82 560.864,-3944.82"/>
+</g>
+<!-- 16,1 -->
+<g id="node144" class="node"><title>16,1</title>
+<ellipse fill="none" stroke="black" cx="350" cy="-148" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="327.5" y="-159.167" font-family="Times Roman,serif" font-size="10.00">16,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325,-140 325,-154 376,-154 376,-140 325,-140"/>
+<text text-anchor="start" x="332.5" y="-144.667" font-family="Times Roman,serif" font-size="10.00">AACGG</text>
+<polygon fill="#218559" stroke="#218559" points="325,-126 325,-140 376,-140 376,-126 325,-126"/>
+<text text-anchor="start" x="335" y="-130.667" font-family="Times Roman,serif" font-size="10.00">CCGTT</text>
+</g>
+<!-- 17,2->16,1 -->
+<g id="edge616" class="edge"><title>17,2->16,1</title>
+<path fill="none" stroke="#dd1e2f" d="M476.036,-3912.3C460.575,-3843.01 428,-3697 428,-3697 428,-3697 418,-2372 418,-2372 418,-2372 398,-228 398,-228 398,-228 386.635,-209.059 374.965,-189.609"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="377.947,-187.775 369.801,-181.001 371.944,-191.377 377.947,-187.775"/>
+</g>
+<!-- 17,3->17,2 -->
+<g id="edge628" class="edge"><title>17,3->17,2</title>
+<path fill="none" stroke="#218559" d="M570.933,-3954.48C561.29,-3954.75 551.041,-3954.82 541.091,-3954.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-3951.18 530.867,-3954.47 540.792,-3958.18 540.937,-3951.18"/>
+</g>
+<!-- 17,4 -->
+<g id="node142" class="node"><title>17,4</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-3948" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-3959.17" font-family="Times Roman,serif" font-size="10.00">17,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-3940 727,-3954 778,-3954 778,-3940 727,-3940"/>
+<text text-anchor="start" x="734.5" y="-3944.67" font-family="Times Roman,serif" font-size="10.00">ACGGC</text>
+<polygon fill="#218559" stroke="#218559" points="727,-3926 727,-3940 778,-3940 778,-3926 727,-3926"/>
+<text text-anchor="start" x="735.5" y="-3930.67" font-family="Times Roman,serif" font-size="10.00">GCCGT</text>
+</g>
+<!-- 17,3->17,4 -->
+<g id="edge624" class="edge"><title>17,3->17,4</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-3941.53C674.501,-3941.25 684.748,-3941.18 694.703,-3941.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-3944.82 704.933,-3941.52 695.007,-3937.82 694.864,-3944.82"/>
+</g>
+<!-- 17,3->18,1 -->
+<g id="edge626" class="edge"><title>17,3->18,1</title>
+<path fill="none" stroke="#06a2cb" d="M610.036,-3912.3C594.575,-3843.01 562,-3697 562,-3697 562,-3697 540,-3435 540,-3435 540,-3435 428,-3435 428,-3435 428,-3435 398,-3669 398,-3669 398,-3669 294,-3903 294,-3903 294,-3903 264,-4162 264,-4162 264,-4162 236.799,-4332.57 223.35,-4416.91"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="219.861,-4416.56 221.742,-4426.99 226.773,-4417.67 219.861,-4416.56"/>
+</g>
+<!-- 17,4->17,3 -->
+<g id="edge632" class="edge"><title>17,4->17,3</title>
+<path fill="none" stroke="#218559" d="M704.933,-3954.48C695.29,-3954.75 685.041,-3954.82 675.091,-3954.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-3951.18 664.867,-3954.47 674.792,-3958.18 674.937,-3951.18"/>
+</g>
+<!-- 16,3 -->
+<g id="node146" class="node"><title>16,3</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-148" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-159.167" font-family="Times Roman,serif" font-size="10.00">16,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-140 593,-154 644,-154 644,-140 593,-140"/>
+<text text-anchor="start" x="600.5" y="-144.667" font-family="Times Roman,serif" font-size="10.00">CGGCA</text>
+<polygon fill="#218559" stroke="#218559" points="593,-126 593,-140 644,-140 644,-126 593,-126"/>
+<text text-anchor="start" x="601.5" y="-130.667" font-family="Times Roman,serif" font-size="10.00">TGCCG</text>
+</g>
+<!-- 17,4->16,3 -->
+<g id="edge630" class="edge"><title>17,4->16,3</title>
+<path fill="none" stroke="#dd1e2f" d="M744.036,-3912.3C728.575,-3843.01 696,-3697 696,-3697 696,-3697 686,-1855 686,-1855 686,-1855 674,-228 674,-228 674,-228 660.245,-208.35 646.369,-188.527"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="649.057,-186.263 640.455,-180.078 643.322,-190.278 649.057,-186.263"/>
+</g>
+<!-- 16,1->17,2 -->
+<g id="edge188" class="edge"><title>16,1->17,2</title>
+<path fill="none" stroke="#218559" d="M369.801,-181.001C383.023,-203.038 398,-228 398,-228 398,-228 428,-3697 428,-3697 428,-3697 457.714,-3830.18 473.853,-3902.52"/>
+<polygon fill="#218559" stroke="#218559" points="470.442,-3903.31 476.036,-3912.3 477.274,-3901.78 470.442,-3903.31"/>
+</g>
+<!-- 16,1->16,2 -->
+<g id="edge186" class="edge"><title>16,1->16,2</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-141.529C406.501,-141.254 416.748,-141.183 426.703,-141.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-144.817 436.933,-141.523 427.007,-137.819 426.864,-144.817"/>
+</g>
+<!-- 16,2->15,1 -->
+<g id="edge190" class="edge"><title>16,2->15,1</title>
+<path fill="none" stroke="#dd1e2f" d="M454.344,-176.328C434.498,-195.285 408.229,-220.378 387.013,-240.644"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="384.553,-238.154 379.739,-247.593 389.388,-243.216 384.553,-238.154"/>
+</g>
+<!-- 16,2->16,1 -->
+<g id="edge194" class="edge"><title>16,2->16,1</title>
+<path fill="none" stroke="#218559" d="M436.933,-154.477C427.29,-154.749 417.041,-154.817 407.091,-154.684"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-151.18 396.867,-154.471 406.792,-158.179 406.937,-151.18"/>
+</g>
+<!-- 16,2->16,3 -->
+<g id="edge192" class="edge"><title>16,2->16,3</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-141.529C540.501,-141.254 550.748,-141.183 560.703,-141.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-144.817 570.933,-141.523 561.007,-137.819 560.864,-144.817"/>
+</g>
+<!-- 16,3->17,4 -->
+<g id="edge198" class="edge"><title>16,3->17,4</title>
+<path fill="none" stroke="#218559" d="M640.455,-180.078C656.034,-202.335 674,-228 674,-228 674,-228 696,-3697 696,-3697 696,-3697 725.714,-3830.18 741.853,-3902.52"/>
+<polygon fill="#218559" stroke="#218559" points="738.442,-3903.31 744.036,-3912.3 745.274,-3901.78 738.442,-3903.31"/>
+</g>
+<!-- 16,3->16,2 -->
+<g id="edge200" class="edge"><title>16,3->16,2</title>
+<path fill="none" stroke="#218559" d="M570.933,-154.477C561.29,-154.749 551.041,-154.817 541.091,-154.684"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-151.18 530.867,-154.471 540.792,-158.179 540.937,-151.18"/>
+</g>
+<!-- 16,3->16,4 -->
+<g id="edge196" class="edge"><title>16,3->16,4</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-141.529C674.501,-141.254 684.748,-141.183 694.703,-141.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-144.817 704.933,-141.523 695.007,-137.819 694.864,-144.817"/>
+</g>
+<!-- 16,4->15,3 -->
+<g id="edge202" class="edge"><title>16,4->15,3</title>
+<path fill="none" stroke="#dd1e2f" d="M722.344,-176.328C702.498,-195.285 676.229,-220.378 655.013,-240.644"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="652.553,-238.154 647.739,-247.593 657.388,-243.216 652.553,-238.154"/>
+</g>
+<!-- 16,4->16,3 -->
+<g id="edge204" class="edge"><title>16,4->16,3</title>
+<path fill="none" stroke="#218559" d="M704.933,-154.477C695.29,-154.749 685.041,-154.817 675.091,-154.684"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-151.18 664.867,-154.471 674.792,-158.179 674.937,-151.18"/>
+</g>
+<!-- 19,1->20,3 -->
+<g id="edge636" class="edge"><title>19,1->20,3</title>
+<path fill="none" stroke="#06a2cb" d="M71.2944,-5180.29C93.6974,-5070.61 160,-4746 160,-4746 160,-4746 294,-4541 294,-4541 294,-4541 398,-4511 398,-4511 398,-4511 451.557,-4263.76 474.142,-4159.51"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="477.596,-4160.09 476.292,-4149.58 470.755,-4158.61 477.596,-4160.09"/>
+</g>
+<!-- 19,2 -->
+<g id="node150" class="node"><title>19,2</title>
+<ellipse fill="none" stroke="black" cx="216" cy="-5216" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="193.5" y="-5227.17" font-family="Times Roman,serif" font-size="10.00">19,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="191,-5208 191,-5222 242,-5222 242,-5208 191,-5208"/>
+<text text-anchor="start" x="201" y="-5212.67" font-family="Times Roman,serif" font-size="10.00">ATTGA</text>
+<polygon fill="#218559" stroke="#218559" points="191,-5194 191,-5208 242,-5208 242,-5194 191,-5194"/>
+<text text-anchor="start" x="201.5" y="-5198.67" font-family="Times Roman,serif" font-size="10.00">TCAAT</text>
+</g>
+<!-- 19,1->19,2 -->
+<g id="edge634" class="edge"><title>19,1->19,2</title>
+<path fill="none" stroke="#dd1e2f" d="M110.845,-5209.71C126.015,-5209.21 143.011,-5209.12 158.843,-5209.44"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="158.958,-5212.94 169.048,-5209.71 159.144,-5205.94 158.958,-5212.94"/>
+</g>
+<!-- 19,2->19,1 -->
+<g id="edge642" class="edge"><title>19,2->19,1</title>
+<path fill="none" stroke="#218559" d="M169.048,-5222.29C153.87,-5222.79 136.872,-5222.88 121.045,-5222.56"/>
+<polygon fill="#218559" stroke="#218559" points="120.935,-5219.06 110.845,-5222.29 120.748,-5226.05 120.935,-5219.06"/>
+</g>
+<!-- 19,2->19,3 -->
+<g id="edge638" class="edge"><title>19,2->19,3</title>
+<path fill="none" stroke="#dd1e2f" d="M262.867,-5209.53C272.501,-5209.25 282.748,-5209.18 292.703,-5209.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="292.864,-5212.82 302.933,-5209.52 293.007,-5205.82 292.864,-5212.82"/>
+</g>
+<!-- 19,2->18,4 -->
+<g id="edge640" class="edge"><title>19,2->18,4</title>
+<path fill="none" stroke="#ebb035" d="M227.425,-5180.84C249.091,-5114.18 294,-4976 294,-4976 294,-4976 540,-4801 540,-4801 540,-4801 562,-4513 562,-4513 562,-4513 569.684,-4506.14 579.359,-4497.5"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="581.923,-4499.9 587.052,-4490.63 577.261,-4494.68 581.923,-4499.9"/>
+</g>
+<!-- 19,3->24,1 -->
+<g id="edge644" class="edge"><title>19,3->24,1</title>
+<path fill="none" stroke="#dd1e2f" d="M383,-5190C399.5,-5177 416,-5164 416,-5164 416,-5164 426.392,-5157.28 439.051,-5149.08"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="441.117,-5151.92 447.611,-5143.55 437.314,-5146.04 441.117,-5151.92"/>
+</g>
+<!-- 19,3->20,1 -->
+<g id="edge648" class="edge"><title>19,3->20,1</title>
+<path fill="none" stroke="#06a2cb" d="M336.444,-5181.14C319.871,-5138.52 294,-5072 294,-5072 294,-5072 264,-4415 264,-4415 264,-4415 236.799,-4244.43 223.35,-4160.09"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="226.773,-4159.33 221.742,-4150.01 219.861,-4160.44 226.773,-4159.33"/>
+</g>
+<!-- 19,3->19,2 -->
+<g id="edge650" class="edge"><title>19,3->19,2</title>
+<path fill="none" stroke="#218559" d="M302.933,-5222.48C293.29,-5222.75 283.041,-5222.82 273.091,-5222.68"/>
+<polygon fill="#218559" stroke="#218559" points="272.937,-5219.18 262.867,-5222.47 272.792,-5226.18 272.937,-5219.18"/>
+</g>
+<!-- 19,3->19,4 -->
+<g id="edge646" class="edge"><title>19,3->19,4</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-5209.53C406.501,-5209.25 416.748,-5209.18 426.703,-5209.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-5212.82 436.933,-5209.52 427.007,-5205.82 426.864,-5212.82"/>
+</g>
+<!-- 19,4->24,2 -->
+<g id="edge654" class="edge"><title>19,4->24,2</title>
+<path fill="none" stroke="#dd1e2f" d="M531.615,-5217.44C542.023,-5217.76 550,-5218 550,-5218 550,-5218 552,-5218 552,-5218 552,-5218 572.47,-5187.6 590.61,-5160.67"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593.706,-5162.34 596.389,-5152.09 587.9,-5158.43 593.706,-5162.34"/>
+</g>
+<!-- 19,4->23,1 -->
+<g id="edge658" class="edge"><title>19,4->23,1</title>
+<path fill="none" stroke="#ebb035" d="M452.4,-5188.91C439.697,-5178.03 428,-5168 428,-5168 428,-5168 398,-4976 398,-4976 398,-4976 372.635,-4843.36 358.782,-4770.92"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="362.142,-4769.86 356.826,-4760.7 355.267,-4771.18 362.142,-4769.86"/>
+</g>
+<!-- 19,4->8,3 -->
+<g id="edge652" class="edge"><title>19,4->8,3</title>
+<path fill="none" stroke="#dd1e2f" d="M515.6,-5188.91C528.303,-5178.03 540,-5168 540,-5168 540,-5168 562,-5072 562,-5072 562,-5072 569.944,-5065.19 579.849,-5056.7"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="582.391,-5059.13 587.705,-5049.97 577.835,-5053.82 582.391,-5059.13"/>
+</g>
+<!-- 19,4->19,3 -->
+<g id="edge660" class="edge"><title>19,4->19,3</title>
+<path fill="none" stroke="#218559" d="M436.933,-5222.48C427.29,-5222.75 417.041,-5222.82 407.091,-5222.68"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-5219.18 396.867,-5222.47 406.792,-5226.18 406.937,-5219.18"/>
+</g>
+<!-- 19,4->18,2 -->
+<g id="edge656" class="edge"><title>19,4->18,2</title>
+<path fill="none" stroke="#ebb035" d="M452.4,-5188.91C439.697,-5178.03 428,-5168 428,-5168 428,-5168 398,-4677 398,-4677 398,-4677 374.399,-4571.78 360.296,-4508.9"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="363.638,-4507.81 358.034,-4498.82 356.808,-4509.34 363.638,-4507.81"/>
+</g>
+<!-- 18,1->23,1 -->
+<g id="edge206" class="edge"><title>18,1->23,1</title>
+<path fill="none" stroke="#dd1e2f" d="M233.244,-4496.72C257.516,-4544.17 301.748,-4630.66 328.249,-4682.47"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="325.153,-4684.1 332.822,-4691.41 331.385,-4680.92 325.153,-4684.1"/>
+</g>
+<!-- 18,1->17,3 -->
+<g id="edge210" class="edge"><title>18,1->17,3</title>
+<path fill="none" stroke="#06a2cb" d="M221.742,-4426.99C234.483,-4347.1 264,-4162 264,-4162 264,-4162 294,-3812 294,-3812 294,-3812 428,-3405 428,-3405 428,-3405 552,-3405 552,-3405 552,-3405 562,-3697 562,-3697 562,-3697 591.714,-3830.18 607.853,-3902.52"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="604.442,-3903.31 610.036,-3912.3 611.274,-3901.78 604.442,-3903.31"/>
+</g>
+<!-- 18,1->18,2 -->
+<g id="edge208" class="edge"><title>18,1->18,2</title>
+<path fill="none" stroke="#dd1e2f" d="M262.867,-4456.53C272.501,-4456.25 282.748,-4456.18 292.703,-4456.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="292.864,-4459.82 302.933,-4456.52 293.007,-4452.82 292.864,-4459.82"/>
+</g>
+<!-- 18,2->7,1 -->
+<g id="edge212" class="edge"><title>18,2->7,1</title>
+<path fill="none" stroke="#dd1e2f" d="M368.035,-4429.39C387.576,-4392.97 416,-4340 416,-4340 416,-4340 418,-4340 418,-4340 418,-4340 428,-4511 428,-4511 428,-4511 447.673,-4552.45 463.699,-4586.22"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="460.653,-4587.97 468.102,-4595.5 466.977,-4584.97 460.653,-4587.97"/>
+</g>
+<!-- 18,2->19,4 -->
+<g id="edge216" class="edge"><title>18,2->19,4</title>
+<path fill="none" stroke="#ebb035" d="M371.611,-4430.91C390.859,-4402.33 416,-4365 416,-4365 416,-4365 418,-4365 418,-4365 418,-4365 428,-5168 428,-5168 428,-5168 435.283,-5174.24 444.596,-5182.22"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="442.529,-5185.06 452.4,-5188.91 447.085,-5179.75 442.529,-5185.06"/>
+</g>
+<!-- 18,2->18,1 -->
+<g id="edge218" class="edge"><title>18,2->18,1</title>
+<path fill="none" stroke="#218559" d="M302.933,-4469.48C293.29,-4469.75 283.041,-4469.82 273.091,-4469.68"/>
+<polygon fill="#218559" stroke="#218559" points="272.937,-4466.18 262.867,-4469.47 272.792,-4473.18 272.937,-4466.18"/>
+</g>
+<!-- 18,2->18,3 -->
+<g id="edge214" class="edge"><title>18,2->18,3</title>
+<path fill="none" stroke="#dd1e2f" d="M396.867,-4456.53C406.501,-4456.25 416.748,-4456.18 426.703,-4456.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="426.864,-4459.82 436.933,-4456.52 427.007,-4452.82 426.864,-4459.82"/>
+</g>
+<!-- 18,3->17,1 -->
+<g id="edge222" class="edge"><title>18,3->17,1</title>
+<path fill="none" stroke="#06a2cb" d="M477.356,-4427.29C462.522,-4347.56 428,-4162 428,-4162 428,-4162 418,-3750 418,-3750 418,-3750 416,-3750 416,-3750 416,-3750 398,-3840 398,-3840 398,-3840 382.733,-3874.35 369.42,-3904.31"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="366.051,-3903.27 365.188,-3913.83 372.447,-3906.11 366.051,-3903.27"/>
+</g>
+<!-- 18,3->18,2 -->
+<g id="edge224" class="edge"><title>18,3->18,2</title>
+<path fill="none" stroke="#218559" d="M436.933,-4469.48C427.29,-4469.75 417.041,-4469.82 407.091,-4469.68"/>
+<polygon fill="#218559" stroke="#218559" points="406.937,-4466.18 396.867,-4469.47 406.792,-4473.18 406.937,-4466.18"/>
+</g>
+<!-- 18,3->18,4 -->
+<g id="edge220" class="edge"><title>18,3->18,4</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-4456.53C540.501,-4456.25 550.748,-4456.18 560.703,-4456.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-4459.82 570.933,-4456.52 561.007,-4452.82 560.864,-4459.82"/>
+</g>
+<!-- 18,4->24,1 -->
+<g id="edge228" class="edge"><title>18,4->24,1</title>
+<path fill="none" stroke="#06a2cb" d="M601.802,-4497.13C585.264,-4531.98 562,-4581 562,-4581 562,-4581 550,-4848 550,-4848 550,-4848 540,-5072 540,-5072 540,-5072 532.717,-5078.24 523.404,-5086.22"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="520.915,-5083.75 515.6,-5092.91 525.471,-5089.06 520.915,-5083.75"/>
+</g>
+<!-- 18,4->19,2 -->
+<g id="edge226" class="edge"><title>18,4->19,2</title>
+<path fill="none" stroke="#ebb035" d="M587.052,-4490.63C574.1,-4502.2 562,-4513 562,-4513 562,-4513 550,-4556 550,-4556 550,-4556 540,-4773 540,-4773 540,-4773 282,-4836 282,-4836 282,-4836 241.471,-5069.35 223.932,-5170.33"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="220.472,-5169.8 222.209,-5180.25 227.369,-5171 220.472,-5169.8"/>
+</g>
+<!-- 18,4->18,3 -->
+<g id="edge230" class="edge"><title>18,4->18,3</title>
+<path fill="none" stroke="#218559" d="M570.933,-4469.48C561.29,-4469.75 551.041,-4469.82 541.091,-4469.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-4466.18 530.867,-4469.47 540.792,-4473.18 540.937,-4466.18"/>
+</g>
+<!-- 31,1 -->
+<g id="node159" class="node"><title>31,1</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-1732" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-1743.17" font-family="Times Roman,serif" font-size="10.00">31,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-1724 593,-1738 644,-1738 644,-1724 593,-1724"/>
+<text text-anchor="start" x="602" y="-1728.67" font-family="Times Roman,serif" font-size="10.00">GCGTC</text>
+<polygon fill="#218559" stroke="#218559" points="593,-1710 593,-1724 644,-1724 644,-1710 593,-1710"/>
+<text text-anchor="start" x="600.5" y="-1714.67" font-family="Times Roman,serif" font-size="10.00">GACGC</text>
+</g>
+<!-- 31,2 -->
+<g id="node160" class="node"><title>31,2</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-1732" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-1743.17" font-family="Times Roman,serif" font-size="10.00">31,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-1724 727,-1738 778,-1738 778,-1724 727,-1724"/>
+<text text-anchor="start" x="736" y="-1728.67" font-family="Times Roman,serif" font-size="10.00">CGTCA</text>
+<polygon fill="#218559" stroke="#218559" points="727,-1710 727,-1724 778,-1724 778,-1710 727,-1710"/>
+<text text-anchor="start" x="735.5" y="-1714.67" font-family="Times Roman,serif" font-size="10.00">TGACG</text>
+</g>
+<!-- 31,1->31,2 -->
+<g id="edge786" class="edge"><title>31,1->31,2</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-1725.53C674.501,-1725.25 684.748,-1725.18 694.703,-1725.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-1728.82 704.933,-1725.52 695.007,-1721.82 694.864,-1728.82"/>
+</g>
+<!-- 32,2 -->
+<g id="node195" class="node"><title>32,2</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-1974" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-1985.17" font-family="Times Roman,serif" font-size="10.00">32,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-1966 727,-1980 778,-1980 778,-1966 727,-1966"/>
+<text text-anchor="start" x="735.5" y="-1970.67" font-family="Times Roman,serif" font-size="10.00">AGCGT</text>
+<polygon fill="#218559" stroke="#218559" points="727,-1952 727,-1966 778,-1966 778,-1952 727,-1952"/>
+<text text-anchor="start" x="736" y="-1956.67" font-family="Times Roman,serif" font-size="10.00">ACGCT</text>
+</g>
+<!-- 31,1->32,2 -->
+<g id="edge788" class="edge"><title>31,1->32,2</title>
+<path fill="none" stroke="#218559" d="M636.491,-1765.39C660.503,-1808.76 702.432,-1884.48 728.624,-1931.78"/>
+<polygon fill="#218559" stroke="#218559" points="725.577,-1933.51 733.483,-1940.56 731.701,-1930.12 725.577,-1933.51"/>
+</g>
+<!-- 31,2->31,1 -->
+<g id="edge796" class="edge"><title>31,2->31,1</title>
+<path fill="none" stroke="#218559" d="M704.933,-1738.48C695.29,-1738.75 685.041,-1738.82 675.091,-1738.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-1735.18 664.867,-1738.47 674.792,-1742.18 674.937,-1735.18"/>
+</g>
+<!-- 31,2->31,3 -->
+<g id="edge792" class="edge"><title>31,2->31,3</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-1725.53C808.501,-1725.25 818.748,-1725.18 828.703,-1725.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-1728.82 838.933,-1725.52 829.007,-1721.82 828.864,-1728.82"/>
+</g>
+<!-- 31,2->30,1 -->
+<g id="edge790" class="edge"><title>31,2->30,1</title>
+<path fill="none" stroke="#dd1e2f" d="M740.329,-1767.36C721.134,-1825.52 685,-1935 685,-1935 685,-1935 685,-2013 685,-2013 685,-2013 709.962,-2060.32 729.413,-2097.19"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="726.459,-2099.09 734.221,-2106.3 732.651,-2095.82 726.459,-2099.09"/>
+</g>
+<!-- 31,2->30,4 -->
+<g id="edge794" class="edge"><title>31,2->30,4</title>
+<path fill="none" stroke="#ebb035" d="M783.6,-1704.91C796.303,-1694.03 808,-1684 808,-1684 808,-1684 820,-1354 820,-1354 820,-1354 830,-1026 830,-1026 830,-1026 1076,-1026 1076,-1026 1076,-1026 1106,-1492 1106,-1492 1106,-1492 1139.58,-1945.29 1150.57,-2093.63"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1147.09,-2094.06 1151.32,-2103.77 1154.07,-2093.54 1147.09,-2094.06"/>
+</g>
+<!-- 31,3->28,4 -->
+<g id="edge802" class="edge"><title>31,3->28,4</title>
+<path fill="none" stroke="#218559" d="M896.271,-1767.58C912.438,-1823.59 942,-1926 942,-1926 942,-1926 964,-2546 964,-2546 964,-2546 990.308,-2637.14 1006.86,-2694.48"/>
+<polygon fill="#218559" stroke="#218559" points="1003.59,-2695.78 1009.73,-2704.42 1010.32,-2693.84 1003.59,-2695.78"/>
+</g>
+<!-- 31,3->31,2 -->
+<g id="edge804" class="edge"><title>31,3->31,2</title>
+<path fill="none" stroke="#218559" d="M838.933,-1738.48C829.29,-1738.75 819.041,-1738.82 809.091,-1738.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-1735.18 798.867,-1738.47 808.792,-1742.18 808.937,-1735.18"/>
+</g>
+<!-- 31,4 -->
+<g id="node162" class="node"><title>31,4</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-1732" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="997.5" y="-1743.17" font-family="Times Roman,serif" font-size="10.00">31,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="995,-1724 995,-1738 1046,-1738 1046,-1724 995,-1724"/>
+<text text-anchor="start" x="1005" y="-1728.67" font-family="Times Roman,serif" font-size="10.00">TCATG</text>
+<polygon fill="#218559" stroke="#218559" points="995,-1710 995,-1724 1046,-1724 1046,-1710 995,-1710"/>
+<text text-anchor="start" x="1004" y="-1714.67" font-family="Times Roman,serif" font-size="10.00">CATGA</text>
+</g>
+<!-- 31,3->31,4 -->
+<g id="edge798" class="edge"><title>31,3->31,4</title>
+<path fill="none" stroke="#dd1e2f" d="M932.867,-1725.53C942.501,-1725.25 952.748,-1725.18 962.703,-1725.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="962.864,-1728.82 972.933,-1725.52 963.007,-1721.82 962.864,-1728.82"/>
+</g>
+<!-- 31,3->32,4 -->
+<g id="edge800" class="edge"><title>31,3->32,4</title>
+<path fill="none" stroke="#218559" d="M917.6,-1704.91C930.303,-1694.03 942,-1684 942,-1684 942,-1684 952,-1293 952,-1293 952,-1293 954,-1293 954,-1293 954,-1293 964,-1780 964,-1780 964,-1780 990.308,-1871.14 1006.86,-1928.48"/>
+<polygon fill="#218559" stroke="#218559" points="1003.59,-1929.78 1009.73,-1938.42 1010.32,-1927.84 1003.59,-1929.78"/>
+</g>
+<!-- 31,4->31,3 -->
+<g id="edge810" class="edge"><title>31,4->31,3</title>
+<path fill="none" stroke="#218559" d="M972.933,-1738.48C963.29,-1738.75 953.041,-1738.82 943.091,-1738.68"/>
+<polygon fill="#218559" stroke="#218559" points="942.937,-1735.18 932.867,-1738.47 942.792,-1742.18 942.937,-1735.18"/>
+</g>
+<!-- 31,4->30,2 -->
+<g id="edge808" class="edge"><title>31,4->30,2</title>
+<path fill="none" stroke="#ebb035" d="M1009.73,-1767.58C993.562,-1823.59 964,-1926 964,-1926 964,-1926 942,-2022 942,-2022 942,-2022 922.563,-2062.96 906.591,-2096.61"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="903.323,-2095.33 902.198,-2105.87 909.647,-2098.34 903.323,-2095.33"/>
+</g>
+<!-- 31,4->30,3 -->
+<g id="edge806" class="edge"><title>31,4->30,3</title>
+<path fill="none" stroke="#dd1e2f" d="M1008.33,-1767.36C989.134,-1825.52 953,-1935 953,-1935 953,-1935 953,-2013 953,-2013 953,-2013 977.962,-2060.32 997.413,-2097.19"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="994.459,-2099.09 1002.22,-2106.3 1000.65,-2095.82 994.459,-2099.09"/>
+</g>
+<!-- 30,1->29,3 -->
+<g id="edge358" class="edge"><title>30,1->29,3</title>
+<path fill="none" stroke="#06a2cb" d="M782.948,-2112.37C795.9,-2100.8 808,-2090 808,-2090 808,-2090 818,-1896 818,-1896 818,-1896 942,-1848 942,-1848 942,-1848 952,-1368 952,-1368 952,-1368 954,-1368 954,-1368 954,-1368 964,-2188 964,-2188 964,-2188 983.437,-2228.96 999.409,-2262.61"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="996.353,-2264.34 1003.8,-2271.87 1002.68,-2261.33 996.353,-2264.34"/>
+</g>
+<!-- 30,1->31,2 -->
+<g id="edge360" class="edge"><title>30,1->31,2</title>
+<path fill="none" stroke="#218559" d="M734.221,-2106.3C714.372,-2068.68 685,-2013 685,-2013 685,-2013 685,-1935 685,-1935 685,-1935 717.423,-1836.76 737.159,-1776.97"/>
+<polygon fill="#218559" stroke="#218559" points="740.519,-1777.95 740.329,-1767.36 733.872,-1775.76 740.519,-1777.95"/>
+</g>
+<!-- 30,1->30,2 -->
+<g id="edge356" class="edge"><title>30,1->30,2</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-2133.53C808.501,-2133.25 818.748,-2133.18 828.703,-2133.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-2136.82 838.933,-2133.52 829.007,-2129.82 828.864,-2136.82"/>
+</g>
+<!-- 30,2->29,1 -->
+<g id="edge362" class="edge"><title>30,2->29,1</title>
+<path fill="none" stroke="#dd1e2f" d="M861.016,-2170.95C839.056,-2198.15 807,-2237.87 783.329,-2267.19"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="780.448,-2265.19 776.89,-2275.17 785.895,-2269.58 780.448,-2265.19"/>
+</g>
+<!-- 30,2->31,4 -->
+<g id="edge366" class="edge"><title>30,2->31,4</title>
+<path fill="none" stroke="#ebb035" d="M902.198,-2105.87C918.736,-2071.02 942,-2022 942,-2022 942,-2022 952,-1898 952,-1898 952,-1898 981.876,-1825.07 1002.02,-1775.89"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1005.36,-1776.98 1005.91,-1766.4 998.881,-1774.32 1005.36,-1776.98"/>
+</g>
+<!-- 30,2->30,1 -->
+<g id="edge368" class="edge"><title>30,2->30,1</title>
+<path fill="none" stroke="#218559" d="M838.933,-2146.48C829.29,-2146.75 819.041,-2146.82 809.091,-2146.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-2143.18 798.867,-2146.47 808.792,-2150.18 808.937,-2143.18"/>
+</g>
+<!-- 30,2->30,3 -->
+<g id="edge364" class="edge"><title>30,2->30,3</title>
+<path fill="none" stroke="#dd1e2f" d="M932.867,-2133.53C942.501,-2133.25 952.748,-2133.18 962.703,-2133.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="962.864,-2136.82 972.933,-2133.52 963.007,-2129.82 962.864,-2136.82"/>
+</g>
+<!-- 30,3->29,1 -->
+<g id="edge372" class="edge"><title>30,3->29,1</title>
+<path fill="none" stroke="#06a2cb" d="M1003.8,-2105.87C987.264,-2071.02 964,-2022 964,-2022 964,-2022 954,-1876 954,-1876 954,-1876 952,-1876 952,-1876 952,-1876 942,-2022 942,-2022 942,-2022 830,-2062 830,-2062 830,-2062 808,-2188 808,-2188 808,-2188 788.563,-2228.96 772.591,-2262.61"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="769.323,-2261.33 768.198,-2271.87 775.647,-2264.34 769.323,-2261.33"/>
+</g>
+<!-- 30,3->31,4 -->
+<g id="edge374" class="edge"><title>30,3->31,4</title>
+<path fill="none" stroke="#218559" d="M1002.22,-2106.3C982.372,-2068.68 953,-2013 953,-2013 953,-2013 953,-1935 953,-1935 953,-1935 985.423,-1836.76 1005.16,-1776.97"/>
+<polygon fill="#218559" stroke="#218559" points="1008.52,-1777.95 1008.33,-1767.36 1001.87,-1775.76 1008.52,-1777.95"/>
+</g>
+<!-- 30,3->30,2 -->
+<g id="edge376" class="edge"><title>30,3->30,2</title>
+<path fill="none" stroke="#218559" d="M972.933,-2146.48C963.29,-2146.75 953.041,-2146.82 943.091,-2146.68"/>
+<polygon fill="#218559" stroke="#218559" points="942.937,-2143.18 932.867,-2146.47 942.792,-2150.18 942.937,-2143.18"/>
+</g>
+<!-- 30,3->30,4 -->
+<g id="edge370" class="edge"><title>30,3->30,4</title>
+<path fill="none" stroke="#dd1e2f" d="M1066.87,-2133.53C1076.5,-2133.25 1086.75,-2133.18 1096.7,-2133.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1096.86,-2136.82 1106.93,-2133.52 1097.01,-2129.82 1096.86,-2136.82"/>
+</g>
+<!-- 30,4->29,3 -->
+<g id="edge378" class="edge"><title>30,4->29,3</title>
+<path fill="none" stroke="#dd1e2f" d="M1129.02,-2170.95C1107.06,-2198.15 1075,-2237.87 1051.33,-2267.19"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1048.45,-2265.19 1044.89,-2275.17 1053.89,-2269.58 1048.45,-2265.19"/>
+</g>
+<!-- 30,4->31,2 -->
+<g id="edge380" class="edge"><title>30,4->31,2</title>
+<path fill="none" stroke="#ebb035" d="M1151.32,-2103.77C1141.21,-1967.31 1106,-1492 1106,-1492 1106,-1492 1088,-996 1088,-996 1088,-996 830,-996 830,-996 830,-996 808,-1684 808,-1684 808,-1684 800.717,-1690.24 791.404,-1698.22"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="788.915,-1695.75 783.6,-1704.91 793.471,-1701.06 788.915,-1695.75"/>
+</g>
+<!-- 30,4->30,3 -->
+<g id="edge382" class="edge"><title>30,4->30,3</title>
+<path fill="none" stroke="#218559" d="M1106.93,-2146.48C1097.29,-2146.75 1087.04,-2146.82 1077.09,-2146.68"/>
+<polygon fill="#218559" stroke="#218559" points="1076.94,-2143.18 1066.87,-2146.47 1076.79,-2150.18 1076.94,-2143.18"/>
+</g>
+<!-- 37,1 -->
+<g id="node169" class="node"><title>37,1</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-660" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="997.5" y="-671.167" font-family="Times Roman,serif" font-size="10.00">37,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="995,-652 995,-666 1046,-666 1046,-652 995,-652"/>
+<text text-anchor="start" x="1003.5" y="-656.667" font-family="Times Roman,serif" font-size="10.00">CCACA</text>
+<polygon fill="#218559" stroke="#218559" points="995,-638 995,-652 1046,-652 1046,-638 995,-638"/>
+<text text-anchor="start" x="1004" y="-642.667" font-family="Times Roman,serif" font-size="10.00">TGTGG</text>
+</g>
+<!-- 37,2 -->
+<g id="node170" class="node"><title>37,2</title>
+<ellipse fill="none" stroke="black" cx="1154" cy="-660" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="1131.5" y="-671.167" font-family="Times Roman,serif" font-size="10.00">37,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1129,-652 1129,-666 1180,-666 1180,-652 1129,-652"/>
+<text text-anchor="start" x="1137.5" y="-656.667" font-family="Times Roman,serif" font-size="10.00">CACAC</text>
+<polygon fill="#218559" stroke="#218559" points="1129,-638 1129,-652 1180,-652 1180,-638 1129,-638"/>
+<text text-anchor="start" x="1138" y="-642.667" font-family="Times Roman,serif" font-size="10.00">GTGTG</text>
+</g>
+<!-- 37,1->37,2 -->
+<g id="edge860" class="edge"><title>37,1->37,2</title>
+<path fill="none" stroke="#dd1e2f" d="M1066.87,-653.529C1076.5,-653.254 1086.75,-653.183 1096.7,-653.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1096.86,-656.817 1106.93,-653.523 1097.01,-649.819 1096.86,-656.817"/>
+</g>
+<!-- 37,2->37,1 -->
+<g id="edge866" class="edge"><title>37,2->37,1</title>
+<path fill="none" stroke="#218559" d="M1106.93,-666.477C1097.29,-666.749 1087.04,-666.817 1077.09,-666.684"/>
+<polygon fill="#218559" stroke="#218559" points="1076.94,-663.18 1066.87,-666.471 1076.79,-670.179 1076.94,-663.18"/>
+</g>
+<!-- 37,3 -->
+<g id="node171" class="node"><title>37,3</title>
+<ellipse fill="none" stroke="black" cx="1288" cy="-660" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="1265.5" y="-671.167" font-family="Times Roman,serif" font-size="10.00">37,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1263,-652 1263,-666 1314,-666 1314,-652 1263,-652"/>
+<text text-anchor="start" x="1271" y="-656.667" font-family="Times Roman,serif" font-size="10.00">ACACG</text>
+<polygon fill="#218559" stroke="#218559" points="1263,-638 1263,-652 1314,-652 1314,-638 1263,-638"/>
+<text text-anchor="start" x="1272.5" y="-642.667" font-family="Times Roman,serif" font-size="10.00">CGTGT</text>
+</g>
+<!-- 37,2->37,3 -->
+<g id="edge862" class="edge"><title>37,2->37,3</title>
+<path fill="none" stroke="#dd1e2f" d="M1200.87,-653.529C1210.5,-653.254 1220.75,-653.183 1230.7,-653.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1230.86,-656.817 1240.93,-653.523 1231.01,-649.819 1230.86,-656.817"/>
+</g>
+<!-- 36,4 -->
+<g id="node177" class="node"><title>36,4</title>
+<ellipse fill="none" stroke="black" cx="1288" cy="-1074" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="1265.5" y="-1085.17" font-family="Times Roman,serif" font-size="10.00">36,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1263,-1066 1263,-1080 1314,-1080 1314,-1066 1263,-1066"/>
+<text text-anchor="start" x="1272.5" y="-1070.67" font-family="Times Roman,serif" font-size="10.00">CGTGT</text>
+<polygon fill="#218559" stroke="#218559" points="1263,-1052 1263,-1066 1314,-1066 1314,-1052 1263,-1052"/>
+<text text-anchor="start" x="1271" y="-1056.67" font-family="Times Roman,serif" font-size="10.00">ACACG</text>
+</g>
+<!-- 37,2->36,4 -->
+<g id="edge864" class="edge"><title>37,2->36,4</title>
+<path fill="none" stroke="#ebb035" d="M1165.42,-695.297C1189.75,-770.458 1246.64,-946.204 1273.48,-1029.15"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1270.19,-1030.36 1276.6,-1038.79 1276.85,-1028.2 1270.19,-1030.36"/>
+</g>
+<!-- 37,3->37,2 -->
+<g id="edge870" class="edge"><title>37,3->37,2</title>
+<path fill="none" stroke="#218559" d="M1240.93,-666.477C1231.29,-666.749 1221.04,-666.817 1211.09,-666.684"/>
+<polygon fill="#218559" stroke="#218559" points="1210.94,-663.18 1200.87,-666.471 1210.79,-670.179 1210.94,-663.18"/>
+</g>
+<!-- 37,4 -->
+<g id="node172" class="node"><title>37,4</title>
+<ellipse fill="none" stroke="black" cx="1422" cy="-660" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="1399.5" y="-671.167" font-family="Times Roman,serif" font-size="10.00">37,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1397,-652 1397,-666 1448,-666 1448,-652 1397,-652"/>
+<text text-anchor="start" x="1405" y="-656.667" font-family="Times Roman,serif" font-size="10.00">CACGC</text>
+<polygon fill="#218559" stroke="#218559" points="1397,-638 1397,-652 1448,-652 1448,-638 1397,-638"/>
+<text text-anchor="start" x="1405" y="-642.667" font-family="Times Roman,serif" font-size="10.00">GCGTG</text>
+</g>
+<!-- 37,3->37,4 -->
+<g id="edge868" class="edge"><title>37,3->37,4</title>
+<path fill="none" stroke="#dd1e2f" d="M1334.87,-653.529C1344.5,-653.254 1354.75,-653.183 1364.7,-653.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1364.86,-656.817 1374.93,-653.523 1365.01,-649.819 1364.86,-656.817"/>
+</g>
+<!-- 37,4->37,3 -->
+<g id="edge874" class="edge"><title>37,4->37,3</title>
+<path fill="none" stroke="#218559" d="M1374.93,-666.477C1365.29,-666.749 1355.04,-666.817 1345.09,-666.684"/>
+<polygon fill="#218559" stroke="#218559" points="1344.94,-663.18 1334.87,-666.471 1344.79,-670.179 1344.94,-663.18"/>
+</g>
+<!-- 36,2 -->
+<g id="node175" class="node"><title>36,2</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-1074" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="997.5" y="-1085.17" font-family="Times Roman,serif" font-size="10.00">36,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="995,-1066 995,-1080 1046,-1080 1046,-1066 995,-1066"/>
+<text text-anchor="start" x="1003.5" y="-1070.67" font-family="Times Roman,serif" font-size="10.00">AGCGT</text>
+<polygon fill="#218559" stroke="#218559" points="995,-1052 995,-1066 1046,-1066 1046,-1052 995,-1052"/>
+<text text-anchor="start" x="1004" y="-1056.67" font-family="Times Roman,serif" font-size="10.00">ACGCT</text>
+</g>
+<!-- 37,4->36,2 -->
+<g id="edge872" class="edge"><title>37,4->36,2</title>
+<path fill="none" stroke="#ebb035" d="M1404.3,-693.583C1381.29,-737.25 1344,-808 1344,-808 1344,-808 1210,-946 1210,-946 1210,-946 1120.23,-1006.48 1064.1,-1044.29"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1062.08,-1041.43 1055.74,-1049.92 1065.99,-1047.24 1062.08,-1041.43"/>
+</g>
+<!-- 36,1 -->
+<g id="node174" class="node"><title>36,1</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-1074" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-1085.17" font-family="Times Roman,serif" font-size="10.00">36,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-1066 861,-1080 912,-1080 912,-1066 861,-1066"/>
+<text text-anchor="start" x="868.5" y="-1070.67" font-family="Times Roman,serif" font-size="10.00">AAGCG</text>
+<polygon fill="#218559" stroke="#218559" points="861,-1052 861,-1066 912,-1066 912,-1052 861,-1052"/>
+<text text-anchor="start" x="870.5" y="-1056.67" font-family="Times Roman,serif" font-size="10.00">CGCTT</text>
+</g>
+<!-- 36,1->36,2 -->
+<g id="edge434" class="edge"><title>36,1->36,2</title>
+<path fill="none" stroke="#dd1e2f" d="M932.867,-1067.53C942.501,-1067.25 952.748,-1067.18 962.703,-1067.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="962.864,-1070.82 972.933,-1067.52 963.007,-1063.82 962.864,-1070.82"/>
+</g>
+<!-- 35,3 -->
+<g id="node181" class="node"><title>35,3</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-1636" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-1647.17" font-family="Times Roman,serif" font-size="10.00">35,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-1628 727,-1642 778,-1642 778,-1628 727,-1628"/>
+<text text-anchor="start" x="737" y="-1632.67" font-family="Times Roman,serif" font-size="10.00">GCTTA</text>
+<polygon fill="#218559" stroke="#218559" points="727,-1614 727,-1628 778,-1628 778,-1614 727,-1614"/>
+<text text-anchor="start" x="736" y="-1618.67" font-family="Times Roman,serif" font-size="10.00">TAAGC</text>
+</g>
+<!-- 36,1->35,3 -->
+<g id="edge438" class="edge"><title>36,1->35,3</title>
+<path fill="none" stroke="#06a2cb" d="M854.4,-1101.09C841.697,-1111.97 830,-1122 830,-1122 830,-1122 818,-1321 818,-1321 818,-1321 808,-1588 808,-1588 808,-1588 800.717,-1594.24 791.404,-1602.22"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="788.915,-1599.75 783.6,-1608.91 793.471,-1605.06 788.915,-1599.75"/>
+</g>
+<!-- 33,3 -->
+<g id="node191" class="node"><title>33,3</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-1540" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-1551.17" font-family="Times Roman,serif" font-size="10.00">33,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-1532 727,-1546 778,-1546 778,-1532 727,-1532"/>
+<text text-anchor="start" x="737" y="-1536.67" font-family="Times Roman,serif" font-size="10.00">GCTTA</text>
+<polygon fill="#218559" stroke="#218559" points="727,-1518 727,-1532 778,-1532 778,-1518 727,-1518"/>
+<text text-anchor="start" x="736" y="-1522.67" font-family="Times Roman,serif" font-size="10.00">TAAGC</text>
+</g>
+<!-- 36,1->33,3 -->
+<g id="edge436" class="edge"><title>36,1->33,3</title>
+<path fill="none" stroke="#06a2cb" d="M854.4,-1101.09C841.697,-1111.97 830,-1122 830,-1122 830,-1122 780.859,-1385.34 760.587,-1493.98"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="757.112,-1493.53 758.718,-1504 763.993,-1494.81 757.112,-1493.53"/>
+</g>
+<!-- 36,2->37,4 -->
+<g id="edge442" class="edge"><title>36,2->37,4</title>
+<path fill="none" stroke="#ebb035" d="M1048.27,-1044.89C1119.77,-971.25 1307.74,-777.671 1386.49,-696.569"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1389.31,-698.692 1393.76,-689.08 1384.29,-693.816 1389.31,-698.692"/>
+</g>
+<!-- 36,2->36,1 -->
+<g id="edge444" class="edge"><title>36,2->36,1</title>
+<path fill="none" stroke="#218559" d="M972.933,-1080.48C963.29,-1080.75 953.041,-1080.82 943.091,-1080.68"/>
+<polygon fill="#218559" stroke="#218559" points="942.937,-1077.18 932.867,-1080.47 942.792,-1084.18 942.937,-1077.18"/>
+</g>
+<!-- 36,3 -->
+<g id="node176" class="node"><title>36,3</title>
+<ellipse fill="none" stroke="black" cx="1154" cy="-1074" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="1131.5" y="-1085.17" font-family="Times Roman,serif" font-size="10.00">36,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1129,-1066 1129,-1080 1180,-1080 1180,-1066 1129,-1066"/>
+<text text-anchor="start" x="1137" y="-1070.67" font-family="Times Roman,serif" font-size="10.00">GCGTG</text>
+<polygon fill="#218559" stroke="#218559" points="1129,-1052 1129,-1066 1180,-1066 1180,-1052 1129,-1052"/>
+<text text-anchor="start" x="1137" y="-1056.67" font-family="Times Roman,serif" font-size="10.00">CACGC</text>
+</g>
+<!-- 36,2->36,3 -->
+<g id="edge440" class="edge"><title>36,2->36,3</title>
+<path fill="none" stroke="#dd1e2f" d="M1066.87,-1067.53C1076.5,-1067.25 1086.75,-1067.18 1096.7,-1067.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1096.86,-1070.82 1106.93,-1067.52 1097.01,-1063.82 1096.86,-1070.82"/>
+</g>
+<!-- 36,3->36,2 -->
+<g id="edge448" class="edge"><title>36,3->36,2</title>
+<path fill="none" stroke="#218559" d="M1106.93,-1080.48C1097.29,-1080.75 1087.04,-1080.82 1077.09,-1080.68"/>
+<polygon fill="#218559" stroke="#218559" points="1076.94,-1077.18 1066.87,-1080.47 1076.79,-1084.18 1076.94,-1077.18"/>
+</g>
+<!-- 36,3->36,4 -->
+<g id="edge446" class="edge"><title>36,3->36,4</title>
+<path fill="none" stroke="#dd1e2f" d="M1200.87,-1067.53C1210.5,-1067.25 1220.75,-1067.18 1230.7,-1067.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1230.86,-1070.82 1240.93,-1067.52 1231.01,-1063.82 1230.86,-1070.82"/>
+</g>
+<!-- 36,4->37,2 -->
+<g id="edge450" class="edge"><title>36,4->37,2</title>
+<path fill="none" stroke="#ebb035" d="M1274.73,-1039.06C1254.94,-986.97 1220,-895 1220,-895 1220,-895 1185.83,-773.346 1166.67,-705.097"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1170.03,-704.117 1163.95,-695.436 1163.29,-706.01 1170.03,-704.117"/>
+</g>
+<!-- 36,4->36,3 -->
+<g id="edge452" class="edge"><title>36,4->36,3</title>
+<path fill="none" stroke="#218559" d="M1240.93,-1080.48C1231.29,-1080.75 1221.04,-1080.82 1211.09,-1080.68"/>
+<polygon fill="#218559" stroke="#218559" points="1210.94,-1077.18 1200.87,-1080.47 1210.79,-1084.18 1210.94,-1077.18"/>
+</g>
+<!-- 35,1 -->
+<g id="node179" class="node"><title>35,1</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-1636" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-1647.17" font-family="Times Roman,serif" font-size="10.00">35,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-1628 459,-1642 510,-1642 510,-1628 459,-1628"/>
+<text text-anchor="start" x="469" y="-1632.67" font-family="Times Roman,serif" font-size="10.00">TCGCT</text>
+<polygon fill="#218559" stroke="#218559" points="459,-1614 459,-1628 510,-1628 510,-1614 459,-1614"/>
+<text text-anchor="start" x="466.5" y="-1618.67" font-family="Times Roman,serif" font-size="10.00">AGCGA</text>
+</g>
+<!-- 35,2 -->
+<g id="node180" class="node"><title>35,2</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-1636" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-1647.17" font-family="Times Roman,serif" font-size="10.00">35,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-1628 593,-1642 644,-1642 644,-1628 593,-1628"/>
+<text text-anchor="start" x="602.5" y="-1632.67" font-family="Times Roman,serif" font-size="10.00">CGCTT</text>
+<polygon fill="#218559" stroke="#218559" points="593,-1614 593,-1628 644,-1628 644,-1614 593,-1614"/>
+<text text-anchor="start" x="600.5" y="-1618.67" font-family="Times Roman,serif" font-size="10.00">AAGCG</text>
+</g>
+<!-- 35,1->35,2 -->
+<g id="edge836" class="edge"><title>35,1->35,2</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-1629.53C540.501,-1629.25 550.748,-1629.18 560.703,-1629.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-1632.82 570.933,-1629.52 561.007,-1625.82 560.864,-1632.82"/>
+</g>
+<!-- 35,2->35,1 -->
+<g id="edge844" class="edge"><title>35,2->35,1</title>
+<path fill="none" stroke="#218559" d="M570.933,-1642.48C561.29,-1642.75 551.041,-1642.82 541.091,-1642.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-1639.18 530.867,-1642.47 540.792,-1646.18 540.937,-1639.18"/>
+</g>
+<!-- 35,2->35,3 -->
+<g id="edge840" class="edge"><title>35,2->35,3</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-1629.53C674.501,-1629.25 684.748,-1629.18 694.703,-1629.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-1632.82 704.933,-1629.52 695.007,-1625.82 694.864,-1632.82"/>
+</g>
+<!-- 34,1 -->
+<g id="node184" class="node"><title>34,1</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-756" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-767.167" font-family="Times Roman,serif" font-size="10.00">34,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-748 593,-762 644,-762 644,-748 593,-748"/>
+<text text-anchor="start" x="603" y="-752.667" font-family="Times Roman,serif" font-size="10.00">GCTTA</text>
+<polygon fill="#218559" stroke="#218559" points="593,-734 593,-748 644,-748 644,-734 593,-734"/>
+<text text-anchor="start" x="602" y="-738.667" font-family="Times Roman,serif" font-size="10.00">TAAGC</text>
+</g>
+<!-- 35,2->34,1 -->
+<g id="edge838" class="edge"><title>35,2->34,1</title>
+<path fill="none" stroke="#dd1e2f" d="M586.07,-1608.84C568.813,-1594.15 551,-1579 551,-1579 551,-1579 551,-1131 551,-1131 551,-1131 591.863,-902.29 609.764,-802.095"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="613.262,-802.416 611.576,-791.957 606.371,-801.185 613.262,-802.416"/>
+</g>
+<!-- 34,4 -->
+<g id="node187" class="node"><title>34,4</title>
+<ellipse fill="none" stroke="black" cx="1020" cy="-756" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="997.5" y="-767.167" font-family="Times Roman,serif" font-size="10.00">34,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="995,-748 995,-762 1046,-762 1046,-748 995,-748"/>
+<text text-anchor="start" x="1004" y="-752.667" font-family="Times Roman,serif" font-size="10.00">TAAGC</text>
+<polygon fill="#218559" stroke="#218559" points="995,-734 995,-748 1046,-748 1046,-734 995,-734"/>
+<text text-anchor="start" x="1005" y="-738.667" font-family="Times Roman,serif" font-size="10.00">GCTTA</text>
+</g>
+<!-- 35,2->34,4 -->
+<g id="edge842" class="edge"><title>35,2->34,4</title>
+<path fill="none" stroke="#ebb035" d="M649.6,-1608.91C662.303,-1598.03 674,-1588 674,-1588 674,-1588 696,-1122 696,-1122 696,-1122 808,-1060 808,-1060 808,-1060 830,-992 830,-992 830,-992 942,-911 942,-911 942,-911 975.191,-845.043 998.271,-799.18"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="1001.51,-800.521 1002.88,-790.015 995.261,-797.375 1001.51,-800.521"/>
+</g>
+<!-- 35,3->36,1 -->
+<g id="edge848" class="edge"><title>35,3->36,1</title>
+<path fill="none" stroke="#06a2cb" d="M783.6,-1608.91C796.303,-1598.03 808,-1588 808,-1588 808,-1588 830,-1122 830,-1122 830,-1122 837.283,-1115.76 846.596,-1107.78"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="849.085,-1110.25 854.4,-1101.09 844.529,-1104.94 849.085,-1110.25"/>
+</g>
+<!-- 35,3->35,2 -->
+<g id="edge852" class="edge"><title>35,3->35,2</title>
+<path fill="none" stroke="#218559" d="M704.933,-1642.48C695.29,-1642.75 685.041,-1642.82 675.091,-1642.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-1639.18 664.867,-1642.47 674.792,-1646.18 674.937,-1639.18"/>
+</g>
+<!-- 35,4 -->
+<g id="node182" class="node"><title>35,4</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-1636" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-1647.17" font-family="Times Roman,serif" font-size="10.00">35,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-1628 861,-1642 912,-1642 912,-1628 861,-1628"/>
+<text text-anchor="start" x="871" y="-1632.67" font-family="Times Roman,serif" font-size="10.00">CTTAA</text>
+<polygon fill="#218559" stroke="#218559" points="861,-1614 861,-1628 912,-1628 912,-1614 861,-1614"/>
+<text text-anchor="start" x="871" y="-1618.67" font-family="Times Roman,serif" font-size="10.00">TTAAG</text>
+</g>
+<!-- 35,3->35,4 -->
+<g id="edge846" class="edge"><title>35,3->35,4</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-1629.53C808.501,-1629.25 818.748,-1629.18 828.703,-1629.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-1632.82 838.933,-1629.52 829.007,-1625.82 828.864,-1632.82"/>
+</g>
+<!-- 32,1 -->
+<g id="node194" class="node"><title>32,1</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-1974" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-1985.17" font-family="Times Roman,serif" font-size="10.00">32,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-1966 593,-1980 644,-1980 644,-1966 593,-1966"/>
+<text text-anchor="start" x="600.5" y="-1970.67" font-family="Times Roman,serif" font-size="10.00">AAGCG</text>
+<polygon fill="#218559" stroke="#218559" points="593,-1952 593,-1966 644,-1966 644,-1952 593,-1952"/>
+<text text-anchor="start" x="602.5" y="-1956.67" font-family="Times Roman,serif" font-size="10.00">CGCTT</text>
+</g>
+<!-- 35,3->32,1 -->
+<g id="edge850" class="edge"><title>35,3->32,1</title>
+<path fill="none" stroke="#06a2cb" d="M720.4,-1663.09C707.697,-1673.97 696,-1684 696,-1684 696,-1684 686,-1878 686,-1878 686,-1878 665.176,-1907.4 646.575,-1933.66"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="643.567,-1931.85 640.643,-1942.03 649.279,-1935.9 643.567,-1931.85"/>
+</g>
+<!-- 35,4->35,3 -->
+<g id="edge858" class="edge"><title>35,4->35,3</title>
+<path fill="none" stroke="#218559" d="M838.933,-1642.48C829.29,-1642.75 819.041,-1642.82 809.091,-1642.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-1639.18 798.867,-1642.47 808.792,-1646.18 808.937,-1639.18"/>
+</g>
+<!-- 34,2 -->
+<g id="node185" class="node"><title>34,2</title>
+<ellipse fill="none" stroke="black" cx="752" cy="-756" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="729.5" y="-767.167" font-family="Times Roman,serif" font-size="10.00">34,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="727,-748 727,-762 778,-762 778,-748 727,-748"/>
+<text text-anchor="start" x="737" y="-752.667" font-family="Times Roman,serif" font-size="10.00">CTTAA</text>
+<polygon fill="#218559" stroke="#218559" points="727,-734 727,-748 778,-748 778,-734 727,-734"/>
+<text text-anchor="start" x="737" y="-738.667" font-family="Times Roman,serif" font-size="10.00">TTAAG</text>
+</g>
+<!-- 35,4->34,2 -->
+<g id="edge856" class="edge"><title>35,4->34,2</title>
+<path fill="none" stroke="#ebb035" d="M854.4,-1608.91C841.697,-1598.03 830,-1588 830,-1588 830,-1588 820,-1343 820,-1343 820,-1343 808,-1122 808,-1122 808,-1122 774.093,-900.394 759.047,-802.057"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="762.495,-801.451 757.523,-792.095 755.576,-802.509 762.495,-801.451"/>
+</g>
+<!-- 34,3 -->
+<g id="node186" class="node"><title>34,3</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-756" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-767.167" font-family="Times Roman,serif" font-size="10.00">34,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-748 861,-762 912,-762 912,-748 861,-748"/>
+<text text-anchor="start" x="871" y="-752.667" font-family="Times Roman,serif" font-size="10.00">TTAAG</text>
+<polygon fill="#218559" stroke="#218559" points="861,-734 861,-748 912,-748 912,-734 861,-734"/>
+<text text-anchor="start" x="871" y="-738.667" font-family="Times Roman,serif" font-size="10.00">CTTAA</text>
+</g>
+<!-- 35,4->34,3 -->
+<g id="edge854" class="edge"><title>35,4->34,3</title>
+<path fill="none" stroke="#dd1e2f" d="M854.07,-1608.84C836.813,-1594.15 819,-1579 819,-1579 819,-1579 819,-1035 819,-1035 819,-1035 855.988,-880.975 874.999,-801.809"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="878.514,-802.16 877.446,-791.619 871.708,-800.525 878.514,-802.16"/>
+</g>
+<!-- 34,1->35,2 -->
+<g id="edge410" class="edge"><title>34,1->35,2</title>
+<path fill="none" stroke="#218559" d="M611.576,-791.957C594.806,-885.816 551,-1131 551,-1131 551,-1131 551,-1579 551,-1579 551,-1579 563.917,-1589.99 578.409,-1602.32"/>
+<polygon fill="#218559" stroke="#218559" points="576.185,-1605.02 586.07,-1608.84 580.721,-1599.69 576.185,-1605.02"/>
+</g>
+<!-- 34,1->34,2 -->
+<g id="edge406" class="edge"><title>34,1->34,2</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-749.529C674.501,-749.254 684.748,-749.183 694.703,-749.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-752.817 704.933,-749.523 695.007,-745.819 694.864,-752.817"/>
+</g>
+<!-- 33,2 -->
+<g id="node190" class="node"><title>33,2</title>
+<ellipse fill="none" stroke="black" cx="618" cy="-1540" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="595.5" y="-1551.17" font-family="Times Roman,serif" font-size="10.00">33,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="593,-1532 593,-1546 644,-1546 644,-1532 593,-1532"/>
+<text text-anchor="start" x="602.5" y="-1536.67" font-family="Times Roman,serif" font-size="10.00">CGCTT</text>
+<polygon fill="#218559" stroke="#218559" points="593,-1518 593,-1532 644,-1532 644,-1518 593,-1518"/>
+<text text-anchor="start" x="600.5" y="-1522.67" font-family="Times Roman,serif" font-size="10.00">AAGCG</text>
+</g>
+<!-- 34,1->33,2 -->
+<g id="edge408" class="edge"><title>34,1->33,2</title>
+<path fill="none" stroke="#218559" d="M611.576,-791.957C594.806,-885.816 551,-1131 551,-1131 551,-1131 551,-1209 551,-1209 551,-1209 590.35,-1403.4 608.737,-1494.24"/>
+<polygon fill="#218559" stroke="#218559" points="605.352,-1495.16 610.766,-1504.26 612.213,-1493.77 605.352,-1495.16"/>
+</g>
+<!-- 34,2->35,4 -->
+<g id="edge414" class="edge"><title>34,2->35,4</title>
+<path fill="none" stroke="#ebb035" d="M759.934,-791.701C777.871,-872.42 820,-1062 820,-1062 820,-1062 830,-1588 830,-1588 830,-1588 837.283,-1594.24 846.596,-1602.22"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="844.529,-1605.06 854.4,-1608.91 849.085,-1599.75 844.529,-1605.06"/>
+</g>
+<!-- 34,2->34,1 -->
+<g id="edge418" class="edge"><title>34,2->34,1</title>
+<path fill="none" stroke="#218559" d="M704.933,-762.477C695.29,-762.749 685.041,-762.817 675.091,-762.684"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-759.18 664.867,-762.471 674.792,-766.179 674.937,-759.18"/>
+</g>
+<!-- 34,2->34,3 -->
+<g id="edge412" class="edge"><title>34,2->34,3</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-749.529C808.501,-749.254 818.748,-749.183 828.703,-749.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-752.817 838.933,-749.523 829.007,-745.819 828.864,-752.817"/>
+</g>
+<!-- 33,4 -->
+<g id="node192" class="node"><title>33,4</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-1540" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-1551.17" font-family="Times Roman,serif" font-size="10.00">33,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-1532 861,-1546 912,-1546 912,-1532 861,-1532"/>
+<text text-anchor="start" x="871" y="-1536.67" font-family="Times Roman,serif" font-size="10.00">CTTAA</text>
+<polygon fill="#218559" stroke="#218559" points="861,-1518 861,-1532 912,-1532 912,-1518 861,-1518"/>
+<text text-anchor="start" x="871" y="-1522.67" font-family="Times Roman,serif" font-size="10.00">TTAAG</text>
+</g>
+<!-- 34,2->33,4 -->
+<g id="edge416" class="edge"><title>34,2->33,4</title>
+<path fill="none" stroke="#ebb035" d="M762.034,-791.413C780.949,-858.173 820,-996 820,-996 820,-996 830,-1492 830,-1492 830,-1492 837.283,-1498.24 846.596,-1506.22"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="844.529,-1509.06 854.4,-1512.91 849.085,-1503.75 844.529,-1509.06"/>
+</g>
+<!-- 34,3->35,4 -->
+<g id="edge422" class="edge"><title>34,3->35,4</title>
+<path fill="none" stroke="#218559" d="M877.446,-791.619C859.384,-866.834 819,-1035 819,-1035 819,-1035 819,-1579 819,-1579 819,-1579 831.917,-1589.99 846.409,-1602.32"/>
+<polygon fill="#218559" stroke="#218559" points="844.185,-1605.02 854.07,-1608.84 848.721,-1599.69 844.185,-1605.02"/>
+</g>
+<!-- 34,3->34,2 -->
+<g id="edge426" class="edge"><title>34,3->34,2</title>
+<path fill="none" stroke="#218559" d="M838.933,-762.477C829.29,-762.749 819.041,-762.817 809.091,-762.684"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-759.18 798.867,-762.471 808.792,-766.179 808.937,-759.18"/>
+</g>
+<!-- 34,3->34,4 -->
+<g id="edge420" class="edge"><title>34,3->34,4</title>
+<path fill="none" stroke="#dd1e2f" d="M932.867,-749.529C942.501,-749.254 952.748,-749.183 962.703,-749.313"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="962.864,-752.817 972.933,-749.523 963.007,-745.819 962.864,-752.817"/>
+</g>
+<!-- 34,3->33,4 -->
+<g id="edge424" class="edge"><title>34,3->33,4</title>
+<path fill="none" stroke="#218559" d="M877.446,-791.619C859.384,-866.834 819,-1035 819,-1035 819,-1035 819,-1483 819,-1483 819,-1483 831.917,-1493.99 846.409,-1506.32"/>
+<polygon fill="#218559" stroke="#218559" points="844.185,-1509.02 854.07,-1512.84 848.721,-1503.69 844.185,-1509.02"/>
+</g>
+<!-- 34,4->35,2 -->
+<g id="edge430" class="edge"><title>34,4->35,2</title>
+<path fill="none" stroke="#ebb035" d="M998.48,-788.28C975.392,-822.912 942,-873 942,-873 942,-873 830,-920 830,-920 830,-920 808,-1000 808,-1000 808,-1000 696,-1091 696,-1091 696,-1091 686,-1262 686,-1262 686,-1262 684,-1296 684,-1296 684,-1296 674,-1588 674,-1588 674,-1588 666.717,-1594.24 657.404,-1602.22"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="654.915,-1599.75 649.6,-1608.91 659.471,-1605.06 654.915,-1599.75"/>
+</g>
+<!-- 34,4->34,3 -->
+<g id="edge432" class="edge"><title>34,4->34,3</title>
+<path fill="none" stroke="#218559" d="M972.933,-762.477C963.29,-762.749 953.041,-762.817 943.091,-762.684"/>
+<polygon fill="#218559" stroke="#218559" points="942.937,-759.18 932.867,-762.471 942.792,-766.179 942.937,-759.18"/>
+</g>
+<!-- 34,4->33,2 -->
+<g id="edge428" class="edge"><title>34,4->33,2</title>
+<path fill="none" stroke="#ebb035" d="M994.294,-786.38C975.762,-808.281 954,-834 954,-834 954,-834 818,-883 818,-883 818,-883 808,-929 808,-929 808,-929 696,-1042 696,-1042 696,-1042 686,-1243 686,-1243 686,-1243 647.466,-1411.3 628.413,-1494.52"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="624.955,-1493.94 626.135,-1504.47 631.778,-1495.5 624.955,-1493.94"/>
+</g>
+<!-- 33,1 -->
+<g id="node189" class="node"><title>33,1</title>
+<ellipse fill="none" stroke="black" cx="484" cy="-1540" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="461.5" y="-1551.17" font-family="Times Roman,serif" font-size="10.00">33,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="459,-1532 459,-1546 510,-1546 510,-1532 459,-1532"/>
+<text text-anchor="start" x="469" y="-1536.67" font-family="Times Roman,serif" font-size="10.00">TCGCT</text>
+<polygon fill="#218559" stroke="#218559" points="459,-1518 459,-1532 510,-1532 510,-1518 459,-1518"/>
+<text text-anchor="start" x="466.5" y="-1522.67" font-family="Times Roman,serif" font-size="10.00">AGCGA</text>
+</g>
+<!-- 33,1->33,2 -->
+<g id="edge812" class="edge"><title>33,1->33,2</title>
+<path fill="none" stroke="#dd1e2f" d="M530.867,-1533.53C540.501,-1533.25 550.748,-1533.18 560.703,-1533.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="560.864,-1536.82 570.933,-1533.52 561.007,-1529.82 560.864,-1536.82"/>
+</g>
+<!-- 33,2->34,1 -->
+<g id="edge814" class="edge"><title>33,2->34,1</title>
+<path fill="none" stroke="#dd1e2f" d="M610.766,-1504.26C593.441,-1418.67 551,-1209 551,-1209 551,-1209 551,-1131 551,-1131 551,-1131 591.863,-902.29 609.764,-802.095"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="613.262,-802.416 611.576,-791.957 606.371,-801.185 613.262,-802.416"/>
+</g>
+<!-- 33,2->34,4 -->
+<g id="edge818" class="edge"><title>33,2->34,4</title>
+<path fill="none" stroke="#ebb035" d="M624.261,-1504C638.874,-1419.98 674,-1218 674,-1218 674,-1218 696,-929 696,-929 696,-929 830,-835 830,-835 830,-835 942,-801 942,-801 942,-801 956.586,-792.585 973.143,-783.033"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="975.001,-786.002 981.914,-777.973 971.503,-779.938 975.001,-786.002"/>
+</g>
+<!-- 33,2->33,1 -->
+<g id="edge820" class="edge"><title>33,2->33,1</title>
+<path fill="none" stroke="#218559" d="M570.933,-1546.48C561.29,-1546.75 551.041,-1546.82 541.091,-1546.68"/>
+<polygon fill="#218559" stroke="#218559" points="540.937,-1543.18 530.867,-1546.47 540.792,-1550.18 540.937,-1543.18"/>
+</g>
+<!-- 33,2->33,3 -->
+<g id="edge816" class="edge"><title>33,2->33,3</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-1533.53C674.501,-1533.25 684.748,-1533.18 694.703,-1533.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-1536.82 704.933,-1533.52 695.007,-1529.82 694.864,-1536.82"/>
+</g>
+<!-- 33,3->36,1 -->
+<g id="edge824" class="edge"><title>33,3->36,1</title>
+<path fill="none" stroke="#06a2cb" d="M759.085,-1504.26C776.501,-1416.41 820,-1197 820,-1197 820,-1197 830,-1122 830,-1122 830,-1122 837.283,-1115.76 846.596,-1107.78"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="849.085,-1110.25 854.4,-1101.09 844.529,-1104.94 849.085,-1110.25"/>
+</g>
+<!-- 33,3->33,2 -->
+<g id="edge828" class="edge"><title>33,3->33,2</title>
+<path fill="none" stroke="#218559" d="M704.933,-1546.48C695.29,-1546.75 685.041,-1546.82 675.091,-1546.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-1543.18 664.867,-1546.47 674.792,-1550.18 674.937,-1543.18"/>
+</g>
+<!-- 33,3->33,4 -->
+<g id="edge822" class="edge"><title>33,3->33,4</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-1533.53C808.501,-1533.25 818.748,-1533.18 828.703,-1533.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-1536.82 838.933,-1533.52 829.007,-1529.82 828.864,-1536.82"/>
+</g>
+<!-- 33,3->32,1 -->
+<g id="edge826" class="edge"><title>33,3->32,1</title>
+<path fill="none" stroke="#06a2cb" d="M720.4,-1567.09C707.697,-1577.97 696,-1588 696,-1588 696,-1588 686,-1698 686,-1698 686,-1698 674,-1780 674,-1780 674,-1780 647.692,-1871.14 631.139,-1928.48"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="627.682,-1927.84 628.271,-1938.42 634.407,-1929.78 627.682,-1927.84"/>
+</g>
+<!-- 33,4->34,2 -->
+<g id="edge832" class="edge"><title>33,4->34,2</title>
+<path fill="none" stroke="#ebb035" d="M854.4,-1512.91C841.697,-1502.03 830,-1492 830,-1492 830,-1492 820,-1032 820,-1032 820,-1032 782.554,-880.014 763.238,-801.612"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="766.541,-800.389 760.751,-791.517 759.745,-802.064 766.541,-800.389"/>
+</g>
+<!-- 33,4->34,3 -->
+<g id="edge830" class="edge"><title>33,4->34,3</title>
+<path fill="none" stroke="#dd1e2f" d="M854.07,-1512.84C836.813,-1498.15 819,-1483 819,-1483 819,-1483 819,-1035 819,-1035 819,-1035 855.988,-880.975 874.999,-801.809"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="878.514,-802.16 877.446,-791.619 871.708,-800.525 878.514,-802.16"/>
+</g>
+<!-- 33,4->33,3 -->
+<g id="edge834" class="edge"><title>33,4->33,3</title>
+<path fill="none" stroke="#218559" d="M838.933,-1546.48C829.29,-1546.75 819.041,-1546.82 809.091,-1546.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-1543.18 798.867,-1546.47 808.792,-1550.18 808.937,-1543.18"/>
+</g>
+<!-- 32,1->35,3 -->
+<g id="edge388" class="edge"><title>32,1->35,3</title>
+<path fill="none" stroke="#06a2cb" d="M638.034,-1941C658.093,-1907.96 686,-1862 686,-1862 686,-1862 696,-1684 696,-1684 696,-1684 703.283,-1677.76 712.596,-1669.78"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="715.085,-1672.25 720.4,-1663.09 710.529,-1666.94 715.085,-1672.25"/>
+</g>
+<!-- 32,1->33,3 -->
+<g id="edge386" class="edge"><title>32,1->33,3</title>
+<path fill="none" stroke="#06a2cb" d="M628.271,-1938.42C644.438,-1882.41 674,-1780 674,-1780 674,-1780 696,-1588 696,-1588 696,-1588 703.283,-1581.76 712.596,-1573.78"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="715.085,-1576.25 720.4,-1567.09 710.529,-1570.94 715.085,-1576.25"/>
+</g>
+<!-- 32,1->32,2 -->
+<g id="edge384" class="edge"><title>32,1->32,2</title>
+<path fill="none" stroke="#dd1e2f" d="M664.867,-1967.53C674.501,-1967.25 684.748,-1967.18 694.703,-1967.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="694.864,-1970.82 704.933,-1967.52 695.007,-1963.82 694.864,-1970.82"/>
+</g>
+<!-- 32,2->31,1 -->
+<g id="edge390" class="edge"><title>32,2->31,1</title>
+<path fill="none" stroke="#dd1e2f" d="M719.991,-1947.17C702.354,-1932.39 684,-1917 684,-1917 684,-1917 653.327,-1831.02 633.801,-1776.29"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="637.071,-1775.04 630.414,-1766.8 630.478,-1777.39 637.071,-1775.04"/>
+</g>
+<!-- 32,2->32,1 -->
+<g id="edge394" class="edge"><title>32,2->32,1</title>
+<path fill="none" stroke="#218559" d="M704.933,-1980.48C695.29,-1980.75 685.041,-1980.82 675.091,-1980.68"/>
+<polygon fill="#218559" stroke="#218559" points="674.937,-1977.18 664.867,-1980.47 674.792,-1984.18 674.937,-1977.18"/>
+</g>
+<!-- 32,3 -->
+<g id="node196" class="node"><title>32,3</title>
+<ellipse fill="none" stroke="black" cx="886" cy="-1974" rx="46.8775" ry="36.0624"/>
+<text text-anchor="start" x="863.5" y="-1985.17" font-family="Times Roman,serif" font-size="10.00">32,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="861,-1966 861,-1980 912,-1980 912,-1966 861,-1966"/>
+<text text-anchor="start" x="870" y="-1970.67" font-family="Times Roman,serif" font-size="10.00">GCGTC</text>
+<polygon fill="#218559" stroke="#218559" points="861,-1952 861,-1966 912,-1966 912,-1952 861,-1952"/>
+<text text-anchor="start" x="868.5" y="-1956.67" font-family="Times Roman,serif" font-size="10.00">GACGC</text>
+</g>
+<!-- 32,2->32,3 -->
+<g id="edge392" class="edge"><title>32,2->32,3</title>
+<path fill="none" stroke="#dd1e2f" d="M798.867,-1967.53C808.501,-1967.25 818.748,-1967.18 828.703,-1967.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="828.864,-1970.82 838.933,-1967.52 829.007,-1963.82 828.864,-1970.82"/>
+</g>
+<!-- 32,3->32,2 -->
+<g id="edge398" class="edge"><title>32,3->32,2</title>
+<path fill="none" stroke="#218559" d="M838.933,-1980.48C829.29,-1980.75 819.041,-1980.82 809.091,-1980.68"/>
+<polygon fill="#218559" stroke="#218559" points="808.937,-1977.18 798.867,-1980.47 808.792,-1984.18 808.937,-1977.18"/>
+</g>
+<!-- 32,3->32,4 -->
+<g id="edge396" class="edge"><title>32,3->32,4</title>
+<path fill="none" stroke="#dd1e2f" d="M932.867,-1967.53C942.501,-1967.25 952.748,-1967.18 962.703,-1967.31"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="962.864,-1970.82 972.933,-1967.52 963.007,-1963.82 962.864,-1970.82"/>
+</g>
+<!-- 32,4->29,2 -->
+<g id="edge402" class="edge"><title>32,4->29,2</title>
+<path fill="none" stroke="#ebb035" d="M1003.8,-2008.13C987.264,-2042.98 964,-2092 964,-2092 964,-2092 942,-2188 942,-2188 942,-2188 922.563,-2228.96 906.591,-2262.61"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="903.323,-2261.33 902.198,-2271.87 909.647,-2264.34 903.323,-2261.33"/>
+</g>
+<!-- 32,4->31,3 -->
+<g id="edge400" class="edge"><title>32,4->31,3</title>
+<path fill="none" stroke="#dd1e2f" d="M1009.73,-1938.42C993.562,-1882.41 964,-1780 964,-1780 964,-1780 954,-1246 954,-1246 954,-1246 952,-1246 952,-1246 952,-1246 942,-1684 942,-1684 942,-1684 934.717,-1690.24 925.404,-1698.22"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="922.915,-1695.75 917.6,-1704.91 927.471,-1701.06 922.915,-1695.75"/>
+</g>
+<!-- 32,4->32,3 -->
+<g id="edge404" class="edge"><title>32,4->32,3</title>
+<path fill="none" stroke="#218559" d="M972.933,-1980.48C963.29,-1980.75 953.041,-1980.82 943.091,-1980.68"/>
+<polygon fill="#218559" stroke="#218559" points="942.937,-1977.18 932.867,-1980.47 942.792,-1984.18 942.937,-1977.18"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq1/.part-0.crc b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq1/.part-0.crc
new file mode 100644
index 0000000..32c39ff
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq1/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq1/.part-1.crc b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq1/.part-1.crc
new file mode 100644
index 0000000..72c5a6b
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq1/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq1/part-0 b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq1/part-0
new file mode 100755
index 0000000..7db79d9
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq1/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq1/part-1 b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq1/part-1
new file mode 100755
index 0000000..54cde64
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq1/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq2.txt b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq2.txt
new file mode 100644
index 0000000..474c862
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq2.txt
@@ -0,0 +1,24 @@
+((2,1) [(2,2)] [] [(3,3)] [] CTAGC) (null)
+((2,2) [(6,1),(2,3)] [(1,4)] [] [(2,1)] TAGCG) (null)
+((2,3) [(2,4)] [] [(3,1)] [(2,2)] AGCGC) (null)
+((2,4) [(6,3)] [(1,2)] [] [(2,3)] GCGCA) (null)
+((4,1) [(4,2)] [] [] [(3,2)] CTAGG) (null)
+((4,2) [(5,1),(4,3)] [] [] [(4,1)] TAGGA) (null)
+((4,3) [(4,4)] [] [] [(3,4),(4,2)] AGGAG) (null)
+((4,4) [(5,3)] [] [] [(4,3)] GGAGT) (null)
+((6,1) [(6,2)] [] [] [(2,2)] AGCGC) (null)
+((6,2) [(6,3)] [] [] [(6,1)] GCGCA) (null)
+((6,3) [(6,4)] [] [] [(2,4),(6,2)] CGCAT) (null)
+((6,4) [] [] [] [(6,3)] GCATT) (null)
+((1,1) [(1,2)] [] [] [] AATGC) (null)
+((1,2) [(1,3)] [(2,4)] [] [(1,1)] ATGCG) (null)
+((1,3) [(1,4)] [] [] [(1,2)] TGCGC) (null)
+((1,4) [] [(2,2)] [] [(1,3)] GCGCT) (null)
+((3,1) [(3,2)] [] [(2,3)] [] CGCTA) (null)
+((3,2) [(4,1),(3,3)] [] [] [(3,1)] GCTAG) (null)
+((3,3) [(3,4)] [] [(2,1)] [(3,2)] CTAGG) (null)
+((3,4) [(4,3)] [] [] [(3,3)] TAGGA) (null)
+((5,1) [(5,2)] [] [] [(4,2)] AGGAG) (null)
+((5,2) [(5,3)] [] [] [(5,1)] GGAGT) (null)
+((5,3) [(5,4)] [] [] [(4,4),(5,2)] GAGTT) (null)
+((5,4) [] [] [] [(5,3)] AGTTG) (null)
diff --git a/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq2.txt.svg b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq2.txt.svg
new file mode 100644
index 0000000..70ffb5c
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq2.txt.svg
@@ -0,0 +1,587 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: walk_random_seq2_txt Pages: 1 -->
+<svg width="1260pt" height="669pt"
+ viewBox="0.00 0.00 1260.00 669.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 665)">
+<title>walk_random_seq2_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-665 1257,-665 1257,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-130 49,-331 211,-331 211,-130 49,-130"/>
+<text text-anchor="middle" x="130" y="-314.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="8,-558 8,-646 500,-646 500,-558 8,-558"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_3</title>
+<polygon fill="none" stroke="black" points="274,-200 274,-288 748,-288 748,-200 274,-200"/>
+</g>
+<g id="graph5" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="150,-462 150,-550 624,-550 624,-462 150,-462"/>
+</g>
+<g id="graph6" class="cluster"><title>cluster_5</title>
+<polygon fill="none" stroke="black" points="770,-8 770,-96 1244,-96 1244,-8 770,-8"/>
+</g>
+<g id="graph7" class="cluster"><title>cluster_4</title>
+<polygon fill="none" stroke="black" points="522,-104 522,-192 996,-192 996,-104 522,-104"/>
+</g>
+<g id="graph8" class="cluster"><title>cluster_6</title>
+<polygon fill="none" stroke="black" points="398,-366 398,-454 872,-454 872,-366 398,-366"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-279" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-279" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.0074,-279C74.8673,-279 156.744,-279 188.46,-279"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="188.862,-282.5 198.862,-279 188.861,-275.5 188.862,-282.5"/>
+<text text-anchor="middle" x="130" y="-284.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-238" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-238" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.0074,-238C74.8673,-238 156.744,-238 188.46,-238"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="188.862,-241.5 198.862,-238 188.861,-234.5 188.862,-241.5"/>
+<text text-anchor="middle" x="130" y="-243.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-197" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-197" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.0074,-197C74.8673,-197 156.744,-197 188.46,-197"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="188.862,-200.5 198.862,-197 188.861,-193.5 188.862,-200.5"/>
+<text text-anchor="middle" x="130" y="-202.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-156" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-156" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.0074,-156C74.8673,-156 156.744,-156 188.46,-156"/>
+<polygon fill="#218559" stroke="#218559" points="188.862,-159.5 198.862,-156 188.861,-152.5 188.862,-159.5"/>
+<text text-anchor="middle" x="130" y="-161.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-602" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-613.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-594 37,-608 82,-608 82,-594 37,-594"/>
+<text text-anchor="start" x="43" y="-598.667" font-family="Times Roman,serif" font-size="10.00">AATGC</text>
+<polygon fill="#218559" stroke="#218559" points="37,-580 37,-594 82,-594 82,-580 37,-580"/>
+<text text-anchor="start" x="44" y="-584.667" font-family="Times Roman,serif" font-size="10.00">GCATT</text>
+</g>
+<!-- 1,2 -->
+<g id="node16" class="node"><title>1,2</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-602" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-613.167" font-family="Times Roman,serif" font-size="10.00">1,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-594 179,-608 224,-608 224,-594 179,-594"/>
+<text text-anchor="start" x="185" y="-598.667" font-family="Times Roman,serif" font-size="10.00">ATGCG</text>
+<polygon fill="#218559" stroke="#218559" points="179,-580 179,-594 224,-594 224,-580 179,-580"/>
+<text text-anchor="start" x="185" y="-584.667" font-family="Times Roman,serif" font-size="10.00">CGCAT</text>
+</g>
+<!-- 1,1->1,2 -->
+<g id="edge77" class="edge"><title>1,1->1,2</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-595.755C116.207,-595.208 132.729,-595.105 148.049,-595.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-598.957 158.425,-595.756 148.534,-591.96 148.326,-598.957"/>
+</g>
+<!-- 1,2->1,1 -->
+<g id="edge83" class="edge"><title>1,2->1,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-608.244C143.825,-608.792 127.305,-608.895 111.982,-608.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-605.043 101.605,-608.245 111.497,-612.04 111.704,-605.043"/>
+</g>
+<!-- 1,3 -->
+<g id="node17" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-602" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-613.167" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-594 303,-608 348,-608 348,-594 303,-594"/>
+<text text-anchor="start" x="308.5" y="-598.667" font-family="Times Roman,serif" font-size="10.00">TGCGC</text>
+<polygon fill="#218559" stroke="#218559" points="303,-580 303,-594 348,-594 348,-580 303,-580"/>
+<text text-anchor="start" x="307.5" y="-584.667" font-family="Times Roman,serif" font-size="10.00">GCGCA</text>
+</g>
+<!-- 1,2->1,3 -->
+<g id="edge79" class="edge"><title>1,2->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-595.562C252.601,-595.258 262.531,-595.176 272.159,-595.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-598.822 282.443,-595.554 272.527,-591.823 272.364,-598.822"/>
+</g>
+<!-- 2,4 -->
+<g id="node28" class="node"><title>2,4</title>
+<ellipse fill="none" stroke="black" cx="573" cy="-506" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="553.5" y="-517.167" font-family="Times Roman,serif" font-size="10.00">2,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="551,-498 551,-512 596,-512 596,-498 551,-498"/>
+<text text-anchor="start" x="555.5" y="-502.667" font-family="Times Roman,serif" font-size="10.00">GCGCA</text>
+<polygon fill="#218559" stroke="#218559" points="551,-484 551,-498 596,-498 596,-484 551,-484"/>
+<text text-anchor="start" x="556.5" y="-488.667" font-family="Times Roman,serif" font-size="10.00">TGCGC</text>
+</g>
+<!-- 1,2->2,4 -->
+<g id="edge81" class="edge"><title>1,2->2,4</title>
+<path fill="none" stroke="#ebb035" d="M231.143,-627.695C246.511,-640.796 262,-654 262,-654 262,-654 274,-661 274,-661 274,-661 500,-661 500,-661 500,-661 512,-639 512,-639 512,-639 535.266,-588.272 553.061,-549.474"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="556.462,-550.454 557.449,-539.906 550.099,-547.536 556.462,-550.454"/>
+</g>
+<!-- 1,3->1,2 -->
+<g id="edge87" class="edge"><title>1,3->1,2</title>
+<path fill="none" stroke="#218559" d="M282.443,-608.446C273.158,-608.745 263.225,-608.824 253.602,-608.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-605.175 243.327,-608.438 253.242,-612.173 253.407,-605.175"/>
+</g>
+<!-- 1,4 -->
+<g id="node18" class="node"><title>1,4</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-602" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-613.167" font-family="Times Roman,serif" font-size="10.00">1,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-594 427,-608 472,-608 472,-594 427,-594"/>
+<text text-anchor="start" x="432.5" y="-598.667" font-family="Times Roman,serif" font-size="10.00">GCGCT</text>
+<polygon fill="#218559" stroke="#218559" points="427,-580 427,-594 472,-594 472,-580 427,-580"/>
+<text text-anchor="start" x="431.5" y="-584.667" font-family="Times Roman,serif" font-size="10.00">AGCGC</text>
+</g>
+<!-- 1,3->1,4 -->
+<g id="edge85" class="edge"><title>1,3->1,4</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-595.562C376.601,-595.258 386.531,-595.176 396.159,-595.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-598.822 406.443,-595.554 396.527,-591.823 396.364,-598.822"/>
+</g>
+<!-- 1,4->1,3 -->
+<g id="edge91" class="edge"><title>1,4->1,3</title>
+<path fill="none" stroke="#218559" d="M406.443,-608.446C397.158,-608.745 387.225,-608.824 377.602,-608.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-605.175 367.327,-608.438 377.242,-612.173 377.407,-605.175"/>
+</g>
+<!-- 2,2 -->
+<g id="node26" class="node"><title>2,2</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-506" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-517.167" font-family="Times Roman,serif" font-size="10.00">2,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-498 303,-512 348,-512 348,-498 303,-498"/>
+<text text-anchor="start" x="309" y="-502.667" font-family="Times Roman,serif" font-size="10.00">TAGCG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-484 303,-498 348,-498 348,-484 303,-484"/>
+<text text-anchor="start" x="309" y="-488.667" font-family="Times Roman,serif" font-size="10.00">CGCTA</text>
+</g>
+<!-- 1,4->2,2 -->
+<g id="edge89" class="edge"><title>1,4->2,2</title>
+<path fill="none" stroke="#ebb035" d="M414.551,-580.127C399.843,-570.789 386,-562 386,-562 386,-562 374.667,-551.596 361.771,-539.757"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="363.804,-536.872 354.071,-532.688 359.07,-542.029 363.804,-536.872"/>
+</g>
+<!-- 3,1 -->
+<g id="node20" class="node"><title>3,1</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-244" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-255.167" font-family="Times Roman,serif" font-size="10.00">3,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-236 303,-250 348,-250 348,-236 303,-236"/>
+<text text-anchor="start" x="309" y="-240.667" font-family="Times Roman,serif" font-size="10.00">CGCTA</text>
+<polygon fill="#218559" stroke="#218559" points="303,-222 303,-236 348,-236 348,-222 303,-222"/>
+<text text-anchor="start" x="309" y="-226.667" font-family="Times Roman,serif" font-size="10.00">TAGCG</text>
+</g>
+<!-- 3,2 -->
+<g id="node21" class="node"><title>3,2</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-244" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-255.167" font-family="Times Roman,serif" font-size="10.00">3,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-236 427,-250 472,-250 472,-236 427,-236"/>
+<text text-anchor="start" x="433" y="-240.667" font-family="Times Roman,serif" font-size="10.00">GCTAG</text>
+<polygon fill="#218559" stroke="#218559" points="427,-222 427,-236 472,-236 472,-222 427,-222"/>
+<text text-anchor="start" x="433" y="-226.667" font-family="Times Roman,serif" font-size="10.00">CTAGC</text>
+</g>
+<!-- 3,1->3,2 -->
+<g id="edge93" class="edge"><title>3,1->3,2</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-237.562C376.601,-237.258 386.531,-237.176 396.159,-237.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-240.822 406.443,-237.554 396.527,-233.823 396.364,-240.822"/>
+</g>
+<!-- 2,3 -->
+<g id="node27" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-506" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-517.167" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-498 427,-512 472,-512 472,-498 427,-498"/>
+<text text-anchor="start" x="431.5" y="-502.667" font-family="Times Roman,serif" font-size="10.00">AGCGC</text>
+<polygon fill="#218559" stroke="#218559" points="427,-484 427,-498 472,-498 472,-484 427,-484"/>
+<text text-anchor="start" x="432.5" y="-488.667" font-family="Times Roman,serif" font-size="10.00">GCGCT</text>
+</g>
+<!-- 3,1->2,3 -->
+<g id="edge95" class="edge"><title>3,1->2,3</title>
+<path fill="none" stroke="#06a2cb" d="M336.891,-278.859C357.577,-339.499 398,-458 398,-458 398,-458 404.502,-464.119 412.863,-471.988"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="410.54,-474.609 420.221,-478.914 415.338,-469.511 410.54,-474.609"/>
+</g>
+<!-- 3,2->3,1 -->
+<g id="edge101" class="edge"><title>3,2->3,1</title>
+<path fill="none" stroke="#218559" d="M406.443,-250.446C397.158,-250.745 387.225,-250.824 377.602,-250.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-247.175 367.327,-250.438 377.242,-254.173 377.407,-247.175"/>
+</g>
+<!-- 3,3 -->
+<g id="node22" class="node"><title>3,3</title>
+<ellipse fill="none" stroke="black" cx="573" cy="-244" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="553.5" y="-255.167" font-family="Times Roman,serif" font-size="10.00">3,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="551,-236 551,-250 596,-250 596,-236 551,-236"/>
+<text text-anchor="start" x="557" y="-240.667" font-family="Times Roman,serif" font-size="10.00">CTAGG</text>
+<polygon fill="#218559" stroke="#218559" points="551,-222 551,-236 596,-236 596,-222 551,-222"/>
+<text text-anchor="start" x="557" y="-226.667" font-family="Times Roman,serif" font-size="10.00">CCTAG</text>
+</g>
+<!-- 3,2->3,3 -->
+<g id="edge99" class="edge"><title>3,2->3,3</title>
+<path fill="none" stroke="#dd1e2f" d="M491.327,-237.562C500.601,-237.258 510.531,-237.176 520.159,-237.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="520.364,-240.822 530.443,-237.554 520.527,-233.823 520.364,-240.822"/>
+</g>
+<!-- 4,1 -->
+<g id="node35" class="node"><title>4,1</title>
+<ellipse fill="none" stroke="black" cx="573" cy="-148" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="553.5" y="-159.167" font-family="Times Roman,serif" font-size="10.00">4,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="551,-140 551,-154 596,-154 596,-140 551,-140"/>
+<text text-anchor="start" x="557" y="-144.667" font-family="Times Roman,serif" font-size="10.00">CTAGG</text>
+<polygon fill="#218559" stroke="#218559" points="551,-126 551,-140 596,-140 596,-126 551,-126"/>
+<text text-anchor="start" x="557" y="-130.667" font-family="Times Roman,serif" font-size="10.00">CCTAG</text>
+</g>
+<!-- 3,2->4,1 -->
+<g id="edge97" class="edge"><title>3,2->4,1</title>
+<path fill="none" stroke="#dd1e2f" d="M479.143,-218.305C494.511,-205.204 510,-192 510,-192 510,-192 519.601,-185.295 531.307,-177.119"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="533.461,-179.884 539.655,-171.288 529.453,-174.145 533.461,-179.884"/>
+</g>
+<!-- 3,3->3,2 -->
+<g id="edge107" class="edge"><title>3,3->3,2</title>
+<path fill="none" stroke="#218559" d="M530.443,-250.446C521.158,-250.745 511.225,-250.824 501.602,-250.681"/>
+<polygon fill="#218559" stroke="#218559" points="501.407,-247.175 491.327,-250.438 501.242,-254.173 501.407,-247.175"/>
+</g>
+<!-- 3,4 -->
+<g id="node23" class="node"><title>3,4</title>
+<ellipse fill="none" stroke="black" cx="697" cy="-244" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="677.5" y="-255.167" font-family="Times Roman,serif" font-size="10.00">3,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="675,-236 675,-250 720,-250 720,-236 675,-236"/>
+<text text-anchor="start" x="681" y="-240.667" font-family="Times Roman,serif" font-size="10.00">TAGGA</text>
+<polygon fill="#218559" stroke="#218559" points="675,-222 675,-236 720,-236 720,-222 675,-222"/>
+<text text-anchor="start" x="682.5" y="-226.667" font-family="Times Roman,serif" font-size="10.00">TCCTA</text>
+</g>
+<!-- 3,3->3,4 -->
+<g id="edge103" class="edge"><title>3,3->3,4</title>
+<path fill="none" stroke="#dd1e2f" d="M615.327,-237.562C624.601,-237.258 634.531,-237.176 644.159,-237.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="644.364,-240.822 654.443,-237.554 644.527,-233.823 644.364,-240.822"/>
+</g>
+<!-- 2,1 -->
+<g id="node25" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-506" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-517.167" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-498 179,-512 224,-512 224,-498 179,-498"/>
+<text text-anchor="start" x="185" y="-502.667" font-family="Times Roman,serif" font-size="10.00">CTAGC</text>
+<polygon fill="#218559" stroke="#218559" points="179,-484 179,-498 224,-498 224,-484 179,-484"/>
+<text text-anchor="start" x="185" y="-488.667" font-family="Times Roman,serif" font-size="10.00">GCTAG</text>
+</g>
+<!-- 3,3->2,1 -->
+<g id="edge105" class="edge"><title>3,3->2,1</title>
+<path fill="none" stroke="#06a2cb" d="M544.148,-271.271C523.744,-290.557 500,-313 500,-313 500,-313 398,-362 398,-362 398,-362 299.419,-434.059 241.729,-476.229"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="239.538,-473.494 233.531,-482.221 243.669,-479.146 239.538,-473.494"/>
+</g>
+<!-- 3,4->3,3 -->
+<g id="edge111" class="edge"><title>3,4->3,3</title>
+<path fill="none" stroke="#218559" d="M654.443,-250.446C645.158,-250.745 635.225,-250.824 625.602,-250.681"/>
+<polygon fill="#218559" stroke="#218559" points="625.407,-247.175 615.327,-250.438 625.242,-254.173 625.407,-247.175"/>
+</g>
+<!-- 4,3 -->
+<g id="node37" class="node"><title>4,3</title>
+<ellipse fill="none" stroke="black" cx="821" cy="-148" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="801.5" y="-159.167" font-family="Times Roman,serif" font-size="10.00">4,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="799,-140 799,-154 844,-154 844,-140 799,-140"/>
+<text text-anchor="start" x="803.5" y="-144.667" font-family="Times Roman,serif" font-size="10.00">AGGAG</text>
+<polygon fill="#218559" stroke="#218559" points="799,-126 799,-140 844,-140 844,-126 799,-126"/>
+<text text-anchor="start" x="806" y="-130.667" font-family="Times Roman,serif" font-size="10.00">CTCCT</text>
+</g>
+<!-- 3,4->4,3 -->
+<g id="edge109" class="edge"><title>3,4->4,3</title>
+<path fill="none" stroke="#dd1e2f" d="M733.282,-224.419C747.287,-216.861 760,-210 760,-210 760,-210 772.484,-197.311 786.157,-183.414"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="788.837,-185.681 793.356,-176.098 783.847,-180.771 788.837,-185.681"/>
+</g>
+<!-- 2,1->3,3 -->
+<g id="edge19" class="edge"><title>2,1->3,3</title>
+<path fill="none" stroke="#06a2cb" d="M213.198,-471.207C230.999,-420.429 262,-332 262,-332 262,-332 500,-289 500,-289 500,-289 513.817,-280.483 529.429,-270.859"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="531.534,-273.673 538.21,-265.446 527.861,-267.714 531.534,-273.673"/>
+</g>
+<!-- 2,1->2,2 -->
+<g id="edge17" class="edge"><title>2,1->2,2</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-499.562C252.601,-499.258 262.531,-499.176 272.159,-499.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-502.822 282.443,-499.554 272.527,-495.823 272.364,-502.822"/>
+</g>
+<!-- 2,2->1,4 -->
+<g id="edge25" class="edge"><title>2,2->1,4</title>
+<path fill="none" stroke="#ebb035" d="M356.611,-530.473C372.521,-542.79 391.976,-557.853 408.924,-570.973"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="406.965,-573.883 417.015,-577.238 411.251,-568.348 406.965,-573.883"/>
+</g>
+<!-- 2,2->2,1 -->
+<g id="edge27" class="edge"><title>2,2->2,1</title>
+<path fill="none" stroke="#218559" d="M282.443,-512.446C273.158,-512.745 263.225,-512.824 253.602,-512.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-509.175 243.327,-512.438 253.242,-516.173 253.407,-509.175"/>
+</g>
+<!-- 2,2->2,3 -->
+<g id="edge23" class="edge"><title>2,2->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-499.562C376.601,-499.258 386.531,-499.176 396.159,-499.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-502.822 406.443,-499.554 396.527,-495.823 396.364,-502.822"/>
+</g>
+<!-- 6,1 -->
+<g id="node40" class="node"><title>6,1</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-410" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-421.167" font-family="Times Roman,serif" font-size="10.00">6,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-402 427,-416 472,-416 472,-402 427,-402"/>
+<text text-anchor="start" x="431.5" y="-406.667" font-family="Times Roman,serif" font-size="10.00">AGCGC</text>
+<polygon fill="#218559" stroke="#218559" points="427,-388 427,-402 472,-402 472,-388 427,-388"/>
+<text text-anchor="start" x="432.5" y="-392.667" font-family="Times Roman,serif" font-size="10.00">GCGCT</text>
+</g>
+<!-- 2,2->6,1 -->
+<g id="edge21" class="edge"><title>2,2->6,1</title>
+<path fill="none" stroke="#dd1e2f" d="M356.611,-481.527C372.521,-469.21 391.976,-454.147 408.924,-441.027"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="411.251,-443.652 417.015,-434.762 406.965,-438.117 411.251,-443.652"/>
+</g>
+<!-- 2,3->3,1 -->
+<g id="edge31" class="edge"><title>2,3->3,1</title>
+<path fill="none" stroke="#06a2cb" d="M419.713,-479.502C403.261,-464.617 386,-449 386,-449 386,-449 356.323,-349.267 338.369,-288.928"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="341.693,-287.827 335.486,-279.241 334.984,-289.824 341.693,-287.827"/>
+</g>
+<!-- 2,3->2,2 -->
+<g id="edge33" class="edge"><title>2,3->2,2</title>
+<path fill="none" stroke="#218559" d="M406.443,-512.446C397.158,-512.745 387.225,-512.824 377.602,-512.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-509.175 367.327,-512.438 377.242,-516.173 377.407,-509.175"/>
+</g>
+<!-- 2,3->2,4 -->
+<g id="edge29" class="edge"><title>2,3->2,4</title>
+<path fill="none" stroke="#dd1e2f" d="M491.327,-499.562C500.601,-499.258 510.531,-499.176 520.159,-499.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="520.364,-502.822 530.443,-499.554 520.527,-495.823 520.364,-502.822"/>
+</g>
+<!-- 2,4->1,2 -->
+<g id="edge37" class="edge"><title>2,4->1,2</title>
+<path fill="none" stroke="#ebb035" d="M555.885,-539.058C534.308,-580.734 500,-647 500,-647 500,-647 274,-647 274,-647 274,-647 260.183,-638.483 244.571,-628.859"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="246.139,-625.714 235.79,-623.446 242.466,-631.673 246.139,-625.714"/>
+</g>
+<!-- 2,4->2,3 -->
+<g id="edge39" class="edge"><title>2,4->2,3</title>
+<path fill="none" stroke="#218559" d="M530.443,-512.446C521.158,-512.745 511.225,-512.824 501.602,-512.681"/>
+<polygon fill="#218559" stroke="#218559" points="501.407,-509.175 491.327,-512.438 501.242,-516.173 501.407,-509.175"/>
+</g>
+<!-- 6,3 -->
+<g id="node42" class="node"><title>6,3</title>
+<ellipse fill="none" stroke="black" cx="697" cy="-410" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="677.5" y="-421.167" font-family="Times Roman,serif" font-size="10.00">6,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="675,-402 675,-416 720,-416 720,-402 675,-402"/>
+<text text-anchor="start" x="681" y="-406.667" font-family="Times Roman,serif" font-size="10.00">CGCAT</text>
+<polygon fill="#218559" stroke="#218559" points="675,-388 675,-402 720,-402 720,-388 675,-388"/>
+<text text-anchor="start" x="681" y="-392.667" font-family="Times Roman,serif" font-size="10.00">ATGCG</text>
+</g>
+<!-- 2,4->6,3 -->
+<g id="edge35" class="edge"><title>2,4->6,3</title>
+<path fill="none" stroke="#dd1e2f" d="M607.817,-484.447C622.387,-475.427 636,-467 636,-467 636,-467 647.721,-456.047 660.888,-443.744"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="663.369,-446.216 668.286,-436.831 658.59,-441.101 663.369,-446.216"/>
+</g>
+<!-- 5,1 -->
+<g id="node30" class="node"><title>5,1</title>
+<ellipse fill="none" stroke="black" cx="821" cy="-52" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="801.5" y="-63.1667" font-family="Times Roman,serif" font-size="10.00">5,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="799,-44 799,-58 844,-58 844,-44 799,-44"/>
+<text text-anchor="start" x="803.5" y="-48.6667" font-family="Times Roman,serif" font-size="10.00">AGGAG</text>
+<polygon fill="#218559" stroke="#218559" points="799,-30 799,-44 844,-44 844,-30 799,-30"/>
+<text text-anchor="start" x="806" y="-34.6667" font-family="Times Roman,serif" font-size="10.00">CTCCT</text>
+</g>
+<!-- 5,2 -->
+<g id="node31" class="node"><title>5,2</title>
+<ellipse fill="none" stroke="black" cx="945" cy="-52" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="925.5" y="-63.1667" font-family="Times Roman,serif" font-size="10.00">5,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="923,-44 923,-58 968,-58 968,-44 923,-44"/>
+<text text-anchor="start" x="928.5" y="-48.6667" font-family="Times Roman,serif" font-size="10.00">GGAGT</text>
+<polygon fill="#218559" stroke="#218559" points="923,-30 923,-44 968,-44 968,-30 923,-30"/>
+<text text-anchor="start" x="929.5" y="-34.6667" font-family="Times Roman,serif" font-size="10.00">ACTCC</text>
+</g>
+<!-- 5,1->5,2 -->
+<g id="edge113" class="edge"><title>5,1->5,2</title>
+<path fill="none" stroke="#dd1e2f" d="M863.327,-45.5616C872.601,-45.2583 882.531,-45.1764 892.159,-45.3159"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="892.364,-48.8215 902.443,-45.5541 892.527,-41.8234 892.364,-48.8215"/>
+</g>
+<!-- 4,2 -->
+<g id="node36" class="node"><title>4,2</title>
+<ellipse fill="none" stroke="black" cx="697" cy="-148" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="677.5" y="-159.167" font-family="Times Roman,serif" font-size="10.00">4,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="675,-140 675,-154 720,-154 720,-140 675,-140"/>
+<text text-anchor="start" x="681" y="-144.667" font-family="Times Roman,serif" font-size="10.00">TAGGA</text>
+<polygon fill="#218559" stroke="#218559" points="675,-126 675,-140 720,-140 720,-126 675,-126"/>
+<text text-anchor="start" x="682.5" y="-130.667" font-family="Times Roman,serif" font-size="10.00">TCCTA</text>
+</g>
+<!-- 5,1->4,2 -->
+<g id="edge115" class="edge"><title>5,1->4,2</title>
+<path fill="none" stroke="#218559" d="M790.857,-77.6953C775.489,-90.7961 760,-104 760,-104 760,-104 750.399,-110.705 738.693,-118.881"/>
+<polygon fill="#218559" stroke="#218559" points="736.539,-116.116 730.345,-124.712 740.547,-121.855 736.539,-116.116"/>
+</g>
+<!-- 5,2->5,1 -->
+<g id="edge119" class="edge"><title>5,2->5,1</title>
+<path fill="none" stroke="#218559" d="M902.443,-58.4459C893.158,-58.7455 883.225,-58.8237 873.602,-58.6806"/>
+<polygon fill="#218559" stroke="#218559" points="873.407,-55.1751 863.327,-58.4384 873.242,-62.1732 873.407,-55.1751"/>
+</g>
+<!-- 5,3 -->
+<g id="node32" class="node"><title>5,3</title>
+<ellipse fill="none" stroke="black" cx="1069" cy="-52" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="1049.5" y="-63.1667" font-family="Times Roman,serif" font-size="10.00">5,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1047,-44 1047,-58 1092,-58 1092,-44 1047,-44"/>
+<text text-anchor="start" x="1053.5" y="-48.6667" font-family="Times Roman,serif" font-size="10.00">GAGTT</text>
+<polygon fill="#218559" stroke="#218559" points="1047,-30 1047,-44 1092,-44 1092,-30 1047,-30"/>
+<text text-anchor="start" x="1053.5" y="-34.6667" font-family="Times Roman,serif" font-size="10.00">AACTC</text>
+</g>
+<!-- 5,2->5,3 -->
+<g id="edge117" class="edge"><title>5,2->5,3</title>
+<path fill="none" stroke="#dd1e2f" d="M987.327,-45.5616C996.601,-45.2583 1006.53,-45.1764 1016.16,-45.3159"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1016.36,-48.8215 1026.44,-45.5541 1016.53,-41.8234 1016.36,-48.8215"/>
+</g>
+<!-- 5,3->5,2 -->
+<g id="edge125" class="edge"><title>5,3->5,2</title>
+<path fill="none" stroke="#218559" d="M1026.44,-58.4459C1017.16,-58.7455 1007.23,-58.8237 997.602,-58.6806"/>
+<polygon fill="#218559" stroke="#218559" points="997.407,-55.1751 987.327,-58.4384 997.242,-62.1732 997.407,-55.1751"/>
+</g>
+<!-- 5,4 -->
+<g id="node33" class="node"><title>5,4</title>
+<ellipse fill="none" stroke="black" cx="1193" cy="-52" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="1173.5" y="-63.1667" font-family="Times Roman,serif" font-size="10.00">5,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1171,-44 1171,-58 1216,-58 1216,-44 1171,-44"/>
+<text text-anchor="start" x="1177.5" y="-48.6667" font-family="Times Roman,serif" font-size="10.00">AGTTG</text>
+<polygon fill="#218559" stroke="#218559" points="1171,-30 1171,-44 1216,-44 1216,-30 1171,-30"/>
+<text text-anchor="start" x="1177" y="-34.6667" font-family="Times Roman,serif" font-size="10.00">CAACT</text>
+</g>
+<!-- 5,3->5,4 -->
+<g id="edge121" class="edge"><title>5,3->5,4</title>
+<path fill="none" stroke="#dd1e2f" d="M1111.33,-45.5616C1120.6,-45.2583 1130.53,-45.1764 1140.16,-45.3159"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1140.36,-48.8215 1150.44,-45.5541 1140.53,-41.8234 1140.36,-48.8215"/>
+</g>
+<!-- 4,4 -->
+<g id="node38" class="node"><title>4,4</title>
+<ellipse fill="none" stroke="black" cx="945" cy="-148" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="925.5" y="-159.167" font-family="Times Roman,serif" font-size="10.00">4,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="923,-140 923,-154 968,-154 968,-140 923,-140"/>
+<text text-anchor="start" x="928.5" y="-144.667" font-family="Times Roman,serif" font-size="10.00">GGAGT</text>
+<polygon fill="#218559" stroke="#218559" points="923,-126 923,-140 968,-140 968,-126 923,-126"/>
+<text text-anchor="start" x="929.5" y="-130.667" font-family="Times Roman,serif" font-size="10.00">ACTCC</text>
+</g>
+<!-- 5,3->4,4 -->
+<g id="edge123" class="edge"><title>5,3->4,4</title>
+<path fill="none" stroke="#218559" d="M1041.36,-80.0975C1025.22,-96.502 1008,-114 1008,-114 1008,-114 1000.24,-118.188 990.168,-123.624"/>
+<polygon fill="#218559" stroke="#218559" points="988.42,-120.59 981.282,-128.419 991.745,-126.75 988.42,-120.59"/>
+</g>
+<!-- 5,4->5,3 -->
+<g id="edge127" class="edge"><title>5,4->5,3</title>
+<path fill="none" stroke="#218559" d="M1150.44,-58.4459C1141.16,-58.7455 1131.23,-58.8237 1121.6,-58.6806"/>
+<polygon fill="#218559" stroke="#218559" points="1121.41,-55.1751 1111.33,-58.4384 1121.24,-62.1732 1121.41,-55.1751"/>
+</g>
+<!-- 4,1->3,2 -->
+<g id="edge43" class="edge"><title>4,1->3,2</title>
+<path fill="none" stroke="#218559" d="M541.015,-172.762C525.051,-185.122 505.587,-200.19 488.68,-213.28"/>
+<polygon fill="#218559" stroke="#218559" points="486.376,-210.637 480.611,-219.527 490.661,-216.173 486.376,-210.637"/>
+</g>
+<!-- 4,1->4,2 -->
+<g id="edge41" class="edge"><title>4,1->4,2</title>
+<path fill="none" stroke="#dd1e2f" d="M615.327,-141.562C624.601,-141.258 634.531,-141.176 644.159,-141.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="644.364,-144.822 654.443,-141.554 644.527,-137.823 644.364,-144.822"/>
+</g>
+<!-- 4,2->5,1 -->
+<g id="edge45" class="edge"><title>4,2->5,1</title>
+<path fill="none" stroke="#dd1e2f" d="M728.611,-123.527C744.521,-111.21 763.976,-96.1473 780.924,-83.0268"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="783.251,-85.6517 789.015,-76.7624 778.965,-80.1166 783.251,-85.6517"/>
+</g>
+<!-- 4,2->4,1 -->
+<g id="edge49" class="edge"><title>4,2->4,1</title>
+<path fill="none" stroke="#218559" d="M654.443,-154.446C645.158,-154.745 635.225,-154.824 625.602,-154.681"/>
+<polygon fill="#218559" stroke="#218559" points="625.407,-151.175 615.327,-154.438 625.242,-158.173 625.407,-151.175"/>
+</g>
+<!-- 4,2->4,3 -->
+<g id="edge47" class="edge"><title>4,2->4,3</title>
+<path fill="none" stroke="#dd1e2f" d="M739.327,-141.562C748.601,-141.258 758.531,-141.176 768.159,-141.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="768.364,-144.822 778.443,-141.554 768.527,-137.823 768.364,-144.822"/>
+</g>
+<!-- 4,3->3,4 -->
+<g id="edge53" class="edge"><title>4,3->3,4</title>
+<path fill="none" stroke="#218559" d="M789.015,-172.762C773.051,-185.122 753.587,-200.19 736.68,-213.28"/>
+<polygon fill="#218559" stroke="#218559" points="734.376,-210.637 728.611,-219.527 738.661,-216.173 734.376,-210.637"/>
+</g>
+<!-- 4,3->4,2 -->
+<g id="edge55" class="edge"><title>4,3->4,2</title>
+<path fill="none" stroke="#218559" d="M778.443,-154.446C769.158,-154.745 759.225,-154.824 749.602,-154.681"/>
+<polygon fill="#218559" stroke="#218559" points="749.407,-151.175 739.327,-154.438 749.242,-158.173 749.407,-151.175"/>
+</g>
+<!-- 4,3->4,4 -->
+<g id="edge51" class="edge"><title>4,3->4,4</title>
+<path fill="none" stroke="#dd1e2f" d="M863.327,-141.562C872.601,-141.258 882.531,-141.176 892.159,-141.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="892.364,-144.822 902.443,-141.554 892.527,-137.823 892.364,-144.822"/>
+</g>
+<!-- 4,4->5,3 -->
+<g id="edge57" class="edge"><title>4,4->5,3</title>
+<path fill="none" stroke="#dd1e2f" d="M976.611,-123.527C992.521,-111.21 1011.98,-96.1473 1028.92,-83.0268"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="1031.25,-85.6517 1037.02,-76.7624 1026.97,-80.1166 1031.25,-85.6517"/>
+</g>
+<!-- 4,4->4,3 -->
+<g id="edge59" class="edge"><title>4,4->4,3</title>
+<path fill="none" stroke="#218559" d="M902.443,-154.446C893.158,-154.745 883.225,-154.824 873.602,-154.681"/>
+<polygon fill="#218559" stroke="#218559" points="873.407,-151.175 863.327,-154.438 873.242,-158.173 873.407,-151.175"/>
+</g>
+<!-- 6,1->2,2 -->
+<g id="edge63" class="edge"><title>6,1->2,2</title>
+<path fill="none" stroke="#218559" d="M415.655,-433.288C400.544,-443.843 386,-454 386,-454 386,-454 375.378,-463.055 362.998,-473.608"/>
+<polygon fill="#218559" stroke="#218559" points="360.482,-471.154 355.143,-480.305 365.023,-476.481 360.482,-471.154"/>
+</g>
+<!-- 6,2 -->
+<g id="node41" class="node"><title>6,2</title>
+<ellipse fill="none" stroke="black" cx="573" cy="-410" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="553.5" y="-421.167" font-family="Times Roman,serif" font-size="10.00">6,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="551,-402 551,-416 596,-416 596,-402 551,-402"/>
+<text text-anchor="start" x="555.5" y="-406.667" font-family="Times Roman,serif" font-size="10.00">GCGCA</text>
+<polygon fill="#218559" stroke="#218559" points="551,-388 551,-402 596,-402 596,-388 551,-388"/>
+<text text-anchor="start" x="556.5" y="-392.667" font-family="Times Roman,serif" font-size="10.00">TGCGC</text>
+</g>
+<!-- 6,1->6,2 -->
+<g id="edge61" class="edge"><title>6,1->6,2</title>
+<path fill="none" stroke="#dd1e2f" d="M491.327,-403.562C500.601,-403.258 510.531,-403.176 520.159,-403.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="520.364,-406.822 530.443,-403.554 520.527,-399.823 520.364,-406.822"/>
+</g>
+<!-- 6,2->6,1 -->
+<g id="edge67" class="edge"><title>6,2->6,1</title>
+<path fill="none" stroke="#218559" d="M530.443,-416.446C521.158,-416.745 511.225,-416.824 501.602,-416.681"/>
+<polygon fill="#218559" stroke="#218559" points="501.407,-413.175 491.327,-416.438 501.242,-420.173 501.407,-413.175"/>
+</g>
+<!-- 6,2->6,3 -->
+<g id="edge65" class="edge"><title>6,2->6,3</title>
+<path fill="none" stroke="#dd1e2f" d="M615.327,-403.562C624.601,-403.258 634.531,-403.176 644.159,-403.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="644.364,-406.822 654.443,-403.554 644.527,-399.823 644.364,-406.822"/>
+</g>
+<!-- 6,3->2,4 -->
+<g id="edge71" class="edge"><title>6,3->2,4</title>
+<path fill="none" stroke="#218559" d="M665.015,-434.762C649.051,-447.122 629.587,-462.19 612.68,-475.28"/>
+<polygon fill="#218559" stroke="#218559" points="610.376,-472.637 604.611,-481.527 614.661,-478.173 610.376,-472.637"/>
+</g>
+<!-- 6,3->6,2 -->
+<g id="edge73" class="edge"><title>6,3->6,2</title>
+<path fill="none" stroke="#218559" d="M654.443,-416.446C645.158,-416.745 635.225,-416.824 625.602,-416.681"/>
+<polygon fill="#218559" stroke="#218559" points="625.407,-413.175 615.327,-416.438 625.242,-420.173 625.407,-413.175"/>
+</g>
+<!-- 6,4 -->
+<g id="node43" class="node"><title>6,4</title>
+<ellipse fill="none" stroke="black" cx="821" cy="-410" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="801.5" y="-421.167" font-family="Times Roman,serif" font-size="10.00">6,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="799,-402 799,-416 844,-416 844,-402 799,-402"/>
+<text text-anchor="start" x="806" y="-406.667" font-family="Times Roman,serif" font-size="10.00">GCATT</text>
+<polygon fill="#218559" stroke="#218559" points="799,-388 799,-402 844,-402 844,-388 799,-388"/>
+<text text-anchor="start" x="805" y="-392.667" font-family="Times Roman,serif" font-size="10.00">AATGC</text>
+</g>
+<!-- 6,3->6,4 -->
+<g id="edge69" class="edge"><title>6,3->6,4</title>
+<path fill="none" stroke="#dd1e2f" d="M739.327,-403.562C748.601,-403.258 758.531,-403.176 768.159,-403.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="768.364,-406.822 778.443,-403.554 768.527,-399.823 768.364,-406.822"/>
+</g>
+<!-- 6,4->6,3 -->
+<g id="edge75" class="edge"><title>6,4->6,3</title>
+<path fill="none" stroke="#218559" d="M778.443,-416.446C769.158,-416.745 759.225,-416.824 749.602,-416.681"/>
+<polygon fill="#218559" stroke="#218559" points="749.407,-413.175 739.327,-416.438 749.242,-420.173 749.407,-413.175"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq2/.part-0.crc b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq2/.part-0.crc
new file mode 100644
index 0000000..2372965
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq2/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq2/.part-1.crc b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq2/.part-1.crc
new file mode 100644
index 0000000..dc540d5
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq2/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq2/part-0 b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq2/part-0
new file mode 100755
index 0000000..f05a165
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq2/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq2/part-1 b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq2/part-1
new file mode 100755
index 0000000..54201ec
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/synthetic/walk_random_seq2/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/tipremove/fr_with_tip.txt b/genomix/genomix-pregelix/data/input/graphs/tipremove/fr_with_tip.txt
new file mode 100644
index 0000000..84f6828
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/tipremove/fr_with_tip.txt
@@ -0,0 +1,6 @@
+((2,1) [(2,3)] [] [] [] GGAATA) (null)
+((2,3) [(2,4)] [] [] [(2,1)] AATAC) (null)
+((2,4) [] [(1,2)] [] [(2,3)] ATACG) (null)
+((1,1) [(1,2)] [] [] [] AACGT) (null)
+((1,2) [(1,3)] [(2,4)] [] [(1,1)] ACGTA) (null)
+((1,3) [] [] [] [(1,2)] CGTATA) (null)
diff --git a/genomix/genomix-pregelix/data/input/graphs/tipremove/fr_with_tip.txt.svg b/genomix/genomix-pregelix/data/input/graphs/tipremove/fr_with_tip.txt.svg
new file mode 100644
index 0000000..9f13c3f
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/tipremove/fr_with_tip.txt.svg
@@ -0,0 +1,183 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.3 (20100126.1600)
+ -->
+<!-- Title: fr_with_tip_txt Pages: 1 -->
+<svg width="640pt" height="417pt"
+ viewBox="0.00 0.00 640.00 417.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(1 1) rotate(0) translate(4 413)">
+<title>fr_with_tip_txt</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-413 637,-413 637,5 -4,5"/>
+<g id="graph2" class="cluster"><title>cluster_legend</title>
+<polygon fill="none" stroke="black" points="49,-8 49,-209 211,-209 211,-8 49,-8"/>
+<text text-anchor="middle" x="130" y="-192.4" font-family="Times Roman,serif" font-size="14.00">legend</text>
+</g>
+<g id="graph3" class="cluster"><title>cluster_1</title>
+<polygon fill="none" stroke="black" points="274,-313 274,-401 624,-401 624,-313 274,-313"/>
+</g>
+<g id="graph4" class="cluster"><title>cluster_2</title>
+<polygon fill="none" stroke="black" points="8,-217 8,-305 376,-305 376,-217 8,-217"/>
+</g>
+<!-- legend_0_0 -->
+<g id="node2" class="node"><title>legend_0_0</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_0 -->
+<g id="node3" class="node"><title>legend_1_0</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-157" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_0->legend_1_0 -->
+<g id="edge3" class="edge"><title>legend_0_0->legend_1_0</title>
+<path fill="none" stroke="#dd1e2f" d="M61.0074,-157C74.8673,-157 156.744,-157 188.46,-157"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="188.862,-160.5 198.862,-157 188.861,-153.5 188.862,-160.5"/>
+<text text-anchor="middle" x="130" y="-162.4" font-family="Times Roman,serif" font-size="14.00">FF</text>
+</g>
+<!-- legend_0_1 -->
+<g id="node5" class="node"><title>legend_0_1</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_1 -->
+<g id="node6" class="node"><title>legend_1_1</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-116" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_1->legend_1_1 -->
+<g id="edge5" class="edge"><title>legend_0_1->legend_1_1</title>
+<path fill="none" stroke="#ebb035" d="M61.0074,-116C74.8673,-116 156.744,-116 188.46,-116"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="188.862,-119.5 198.862,-116 188.861,-112.5 188.862,-119.5"/>
+<text text-anchor="middle" x="130" y="-121.4" font-family="Times Roman,serif" font-size="14.00">FR</text>
+</g>
+<!-- legend_0_2 -->
+<g id="node8" class="node"><title>legend_0_2</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_2 -->
+<g id="node9" class="node"><title>legend_1_2</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-75" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_2->legend_1_2 -->
+<g id="edge7" class="edge"><title>legend_0_2->legend_1_2</title>
+<path fill="none" stroke="#06a2cb" d="M61.0074,-75C74.8673,-75 156.744,-75 188.46,-75"/>
+<polygon fill="#06a2cb" stroke="#06a2cb" points="188.862,-78.5001 198.862,-75 188.861,-71.5001 188.862,-78.5001"/>
+<text text-anchor="middle" x="130" y="-80.4" font-family="Times Roman,serif" font-size="14.00">RF</text>
+</g>
+<!-- legend_0_3 -->
+<g id="node11" class="node"><title>legend_0_3</title>
+<ellipse fill="black" stroke="black" cx="59" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_1_3 -->
+<g id="node12" class="node"><title>legend_1_3</title>
+<ellipse fill="black" stroke="black" cx="201" cy="-34" rx="1.8" ry="1.8"/>
+</g>
+<!-- legend_0_3->legend_1_3 -->
+<g id="edge9" class="edge"><title>legend_0_3->legend_1_3</title>
+<path fill="none" stroke="#218559" d="M61.0074,-34C74.8673,-34 156.744,-34 188.46,-34"/>
+<polygon fill="#218559" stroke="#218559" points="188.862,-37.5001 198.862,-34 188.861,-30.5001 188.862,-37.5001"/>
+<text text-anchor="middle" x="130" y="-39.4" font-family="Times Roman,serif" font-size="14.00">RR</text>
+</g>
+<!-- 1,1 -->
+<g id="node15" class="node"><title>1,1</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-357" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-368.167" font-family="Times Roman,serif" font-size="10.00">1,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-349 303,-363 348,-363 348,-349 303,-349"/>
+<text text-anchor="start" x="309" y="-353.667" font-family="Times Roman,serif" font-size="10.00">AACGT</text>
+<polygon fill="#218559" stroke="#218559" points="303,-335 303,-349 348,-349 348,-335 303,-335"/>
+<text text-anchor="start" x="310" y="-339.667" font-family="Times Roman,serif" font-size="10.00">ACGTT</text>
+</g>
+<!-- 1,2 -->
+<g id="node16" class="node"><title>1,2</title>
+<ellipse fill="none" stroke="black" cx="449" cy="-357" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="429.5" y="-368.167" font-family="Times Roman,serif" font-size="10.00">1,2--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="427,-349 427,-363 472,-363 472,-349 427,-349"/>
+<text text-anchor="start" x="433.5" y="-353.667" font-family="Times Roman,serif" font-size="10.00">ACGTA</text>
+<polygon fill="#218559" stroke="#218559" points="427,-335 427,-349 472,-349 472,-335 427,-335"/>
+<text text-anchor="start" x="434" y="-339.667" font-family="Times Roman,serif" font-size="10.00">TACGT</text>
+</g>
+<!-- 1,1->1,2 -->
+<g id="edge23" class="edge"><title>1,1->1,2</title>
+<path fill="none" stroke="#dd1e2f" d="M367.327,-350.562C376.601,-350.258 386.531,-350.176 396.159,-350.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="396.364,-353.822 406.443,-350.554 396.527,-346.823 396.364,-353.822"/>
+</g>
+<!-- 1,2->1,1 -->
+<g id="edge29" class="edge"><title>1,2->1,1</title>
+<path fill="none" stroke="#218559" d="M406.443,-363.446C397.158,-363.745 387.225,-363.824 377.602,-363.681"/>
+<polygon fill="#218559" stroke="#218559" points="377.407,-360.175 367.327,-363.438 377.242,-367.173 377.407,-360.175"/>
+</g>
+<!-- 1,3 -->
+<g id="node17" class="node"><title>1,3</title>
+<ellipse fill="none" stroke="black" cx="573" cy="-357" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="553.5" y="-368.167" font-family="Times Roman,serif" font-size="10.00">1,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="551,-349 551,-363 596,-363 596,-349 551,-349"/>
+<text text-anchor="start" x="555.5" y="-353.667" font-family="Times Roman,serif" font-size="10.00">CGTATA</text>
+<polygon fill="#218559" stroke="#218559" points="551,-335 551,-349 596,-349 596,-335 551,-335"/>
+<text text-anchor="start" x="555.5" y="-339.667" font-family="Times Roman,serif" font-size="10.00">TATACG</text>
+</g>
+<!-- 1,2->1,3 -->
+<g id="edge25" class="edge"><title>1,2->1,3</title>
+<path fill="none" stroke="#dd1e2f" d="M491.327,-350.562C500.601,-350.258 510.531,-350.176 520.159,-350.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="520.364,-353.822 530.443,-350.554 520.527,-346.823 520.364,-353.822"/>
+</g>
+<!-- 2,4 -->
+<g id="node21" class="node"><title>2,4</title>
+<ellipse fill="none" stroke="black" cx="325" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="305.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,4--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="303,-253 303,-267 348,-267 348,-253 303,-253"/>
+<text text-anchor="start" x="309.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">ATACG</text>
+<polygon fill="#218559" stroke="#218559" points="303,-239 303,-253 348,-253 348,-239 303,-239"/>
+<text text-anchor="start" x="310.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">CGTAT</text>
+</g>
+<!-- 1,2->2,4 -->
+<g id="edge27" class="edge"><title>1,2->2,4</title>
+<path fill="none" stroke="#ebb035" d="M415.655,-333.712C400.544,-323.157 386,-313 386,-313 386,-313 375.378,-303.945 362.998,-293.392"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="365.023,-290.519 355.143,-286.695 360.482,-295.846 365.023,-290.519"/>
+</g>
+<!-- 1,3->1,2 -->
+<g id="edge31" class="edge"><title>1,3->1,2</title>
+<path fill="none" stroke="#218559" d="M530.443,-363.446C521.158,-363.745 511.225,-363.824 501.602,-363.681"/>
+<polygon fill="#218559" stroke="#218559" points="501.407,-360.175 491.327,-363.438 501.242,-367.173 501.407,-360.175"/>
+</g>
+<!-- 2,1 -->
+<g id="node19" class="node"><title>2,1</title>
+<ellipse fill="none" stroke="black" cx="59" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="39.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,1--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="37,-253 37,-267 82,-267 82,-253 37,-253"/>
+<text text-anchor="start" x="39.5" y="-257.667" font-family="Times Roman,serif" font-size="10.00">GGAATA</text>
+<polygon fill="#218559" stroke="#218559" points="37,-239 37,-253 82,-253 82,-239 37,-239"/>
+<text text-anchor="start" x="42.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">TATTCC</text>
+</g>
+<!-- 2,3 -->
+<g id="node20" class="node"><title>2,3</title>
+<ellipse fill="none" stroke="black" cx="201" cy="-261" rx="43.1335" ry="36.0624"/>
+<text text-anchor="start" x="181.5" y="-272.167" font-family="Times Roman,serif" font-size="10.00">2,3--null</text>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="179,-253 179,-267 224,-267 224,-253 179,-253"/>
+<text text-anchor="start" x="186" y="-257.667" font-family="Times Roman,serif" font-size="10.00">AATAC</text>
+<polygon fill="#218559" stroke="#218559" points="179,-239 179,-253 224,-253 224,-239 179,-239"/>
+<text text-anchor="start" x="187.5" y="-243.667" font-family="Times Roman,serif" font-size="10.00">GTATT</text>
+</g>
+<!-- 2,1->2,3 -->
+<g id="edge13" class="edge"><title>2,1->2,3</title>
+<path fill="none" stroke="#dd1e2f" d="M101.605,-254.755C116.207,-254.208 132.729,-254.105 148.049,-254.448"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="148.326,-257.957 158.425,-254.756 148.534,-250.96 148.326,-257.957"/>
+</g>
+<!-- 2,3->2,1 -->
+<g id="edge17" class="edge"><title>2,3->2,1</title>
+<path fill="none" stroke="#218559" d="M158.425,-267.244C143.825,-267.792 127.305,-267.895 111.982,-267.553"/>
+<polygon fill="#218559" stroke="#218559" points="111.704,-264.043 101.605,-267.245 111.497,-271.04 111.704,-264.043"/>
+</g>
+<!-- 2,3->2,4 -->
+<g id="edge15" class="edge"><title>2,3->2,4</title>
+<path fill="none" stroke="#dd1e2f" d="M243.327,-254.562C252.601,-254.258 262.531,-254.176 272.159,-254.316"/>
+<polygon fill="#dd1e2f" stroke="#dd1e2f" points="272.364,-257.822 282.443,-254.554 272.527,-250.823 272.364,-257.822"/>
+</g>
+<!-- 2,4->1,2 -->
+<g id="edge19" class="edge"><title>2,4->1,2</title>
+<path fill="none" stroke="#ebb035" d="M356.611,-285.473C372.521,-297.79 391.976,-312.853 408.924,-325.973"/>
+<polygon fill="#ebb035" stroke="#ebb035" points="406.965,-328.883 417.015,-332.238 411.251,-323.348 406.965,-328.883"/>
+</g>
+<!-- 2,4->2,3 -->
+<g id="edge21" class="edge"><title>2,4->2,3</title>
+<path fill="none" stroke="#218559" d="M282.443,-267.446C273.158,-267.745 263.225,-267.824 253.602,-267.681"/>
+<polygon fill="#218559" stroke="#218559" points="253.407,-264.175 243.327,-267.438 253.242,-271.173 253.407,-264.175"/>
+</g>
+</g>
+</svg>
diff --git a/genomix/genomix-pregelix/data/input/graphs/tipremove/fr_with_tip/.part-0.crc b/genomix/genomix-pregelix/data/input/graphs/tipremove/fr_with_tip/.part-0.crc
new file mode 100644
index 0000000..35486b9
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/tipremove/fr_with_tip/.part-0.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/tipremove/fr_with_tip/.part-1.crc b/genomix/genomix-pregelix/data/input/graphs/tipremove/fr_with_tip/.part-1.crc
new file mode 100644
index 0000000..53a0483
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/tipremove/fr_with_tip/.part-1.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/tipremove/fr_with_tip/part-0 b/genomix/genomix-pregelix/data/input/graphs/tipremove/fr_with_tip/part-0
new file mode 100755
index 0000000..6077d6e
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/tipremove/fr_with_tip/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/tipremove/fr_with_tip/part-1 b/genomix/genomix-pregelix/data/input/graphs/tipremove/fr_with_tip/part-1
new file mode 100755
index 0000000..5e8d3f9
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/tipremove/fr_with_tip/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/reads/bubblemerge/five_ff_bubbles.txt b/genomix/genomix-pregelix/data/input/reads/bubblemerge/five_ff_bubbles.txt
new file mode 100644
index 0000000..63a8e55
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/reads/bubblemerge/five_ff_bubbles.txt
@@ -0,0 +1,4 @@
+1 ACGTCCTT
+2 CGTCCTTA
+3 GTCCTTAG
+4 GTCCTTAG
diff --git a/genomix/genomix-pregelix/data/input/reads/bubblemerge/five_length1_bubbles.txt b/genomix/genomix-pregelix/data/input/reads/bubblemerge/five_length1_bubbles.txt
new file mode 100644
index 0000000..13190dd
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/reads/bubblemerge/five_length1_bubbles.txt
@@ -0,0 +1,6 @@
+1 AATAGAAG
+2 AATAGAAG
+3 AATAGAAG
+4 AATAGAAG
+5 AATAGAAG
+6 AGAAGAAG
diff --git a/genomix/genomix-pregelix/data/input/reads/bubblemerge/fr_bubble.txt b/genomix/genomix-pregelix/data/input/reads/bubblemerge/fr_bubble.txt
new file mode 100644
index 0000000..4026c2c
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/reads/bubblemerge/fr_bubble.txt
@@ -0,0 +1,2 @@
+1 AAACGTAT
+2 GGAATACG
diff --git a/genomix/genomix-pregelix/data/input/reads/bubblemerge/fr_bubble_and_ff_bubble.txt b/genomix/genomix-pregelix/data/input/reads/bubblemerge/fr_bubble_and_ff_bubble.txt
new file mode 100644
index 0000000..e166418
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/reads/bubblemerge/fr_bubble_and_ff_bubble.txt
@@ -0,0 +1,3 @@
+1 AAACGTAT
+2 CGTATTCC
+3 GGAATACG
diff --git a/genomix/genomix-pregelix/data/input/reads/bubblemerge/rf_bubble.txt b/genomix/genomix-pregelix/data/input/reads/bubblemerge/rf_bubble.txt
new file mode 100644
index 0000000..154dc8c
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/reads/bubblemerge/rf_bubble.txt
@@ -0,0 +1,3 @@
+1 ACGGTGTA
+2 ACCGTGGT
+
diff --git a/genomix/genomix-pregelix/data/input/reads/bubblemerge/small_bubble.txt b/genomix/genomix-pregelix/data/input/reads/bubblemerge/small_bubble.txt
new file mode 100644
index 0000000..3e3bf7b
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/reads/bubblemerge/small_bubble.txt
@@ -0,0 +1,2 @@
+1 AATAGAAG
+2 AGAAGCCC
diff --git a/genomix/genomix-pregelix/data/input/reads/bubblemerge/tip_and_bubble.txt b/genomix/genomix-pregelix/data/input/reads/bubblemerge/tip_and_bubble.txt
new file mode 100644
index 0000000..958ccff
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/reads/bubblemerge/tip_and_bubble.txt
@@ -0,0 +1,3 @@
+1 AATAGAAG
+2 ATAGACTA
+3 TAGACTAC
diff --git a/genomix/genomix-pregelix/data/input/reads/pathmerge/singleread.txt b/genomix/genomix-pregelix/data/input/reads/pathmerge/singleread.txt
new file mode 100644
index 0000000..63a95ad
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/reads/pathmerge/singleread.txt
@@ -0,0 +1 @@
+1 AATAGAAG
diff --git a/genomix/genomix-pregelix/data/input/reads/synthetic/walk_random_seq1.txt b/genomix/genomix-pregelix/data/input/reads/synthetic/walk_random_seq1.txt
new file mode 100644
index 0000000..35f1c49
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/reads/synthetic/walk_random_seq1.txt
@@ -0,0 +1,37 @@
+1 TAGTGCGA
+2 CCTCGCAC
+3 GCTAGGGT
+4 GAGGGTTG
+5 AGCAACCC
+6 GTTGCTGA
+7 TTTCAGCA
+8 CTGAAATC
+9 CAGATTTC
+10 GGCAGATT
+11 CTGGCAGA
+12 CTCTGGCA
+13 ATCTCTGG
+14 GCATCTCT
+15 CGGCATCT
+16 AACGGCAT
+17 GAAACGGC
+18 CGTTTCAA
+19 TATTGAAA
+20 TCAATACG
+21 AATACGTG
+22 TACGTGAA
+23 GTTTCACG
+24 TGAAACTA
+25 AAACTATT
+26 GTAATAGT
+27 TATTACGT
+28 TTACGTCA
+29 CATGACGT
+30 GTCATGAC
+31 GCGTCATG
+32 AAGCGTCA
+33 TCGCTTAA
+34 GCTTAAGC
+35 TCGCTTAA
+36 AAGCGTGT
+37 CCACACGC
diff --git a/genomix/genomix-pregelix/data/input/reads/synthetic/walk_random_seq2.txt b/genomix/genomix-pregelix/data/input/reads/synthetic/walk_random_seq2.txt
new file mode 100644
index 0000000..d65f7c0
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/reads/synthetic/walk_random_seq2.txt
@@ -0,0 +1,6 @@
+1 AATGCGCT
+2 CTAGCGCA
+3 CGCTAGGA
+4 CTAGGAGT
+5 AGGAGTTG
+6 AGCGCATT
diff --git a/genomix/genomix-pregelix/data/input/reads/tipremove/fr_with_tip.txt b/genomix/genomix-pregelix/data/input/reads/tipremove/fr_with_tip.txt
new file mode 100644
index 0000000..b6e1640
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/reads/tipremove/fr_with_tip.txt
@@ -0,0 +1,2 @@
+1 AACGTATA
+2 GGAATACG
diff --git a/genomix/genomix-pregelix/data/input2/.out.crc b/genomix/genomix-pregelix/data/input2/.out.crc
new file mode 100644
index 0000000..422fddb
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/.out.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input2/read/part-0 b/genomix/genomix-pregelix/data/input2/read/part-0
new file mode 100755
index 0000000..d702695
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/read/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input2/read/part-1 b/genomix/genomix-pregelix/data/input2/read/part-1
new file mode 100755
index 0000000..5bc30e8
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/read/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input2/read/part-2 b/genomix/genomix-pregelix/data/input2/read/part-2
new file mode 100755
index 0000000..474af09
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/read/part-2
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input2/read/part-3 b/genomix/genomix-pregelix/data/input2/read/part-3
new file mode 100755
index 0000000..98326a6
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/read/part-3
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input2/read2/part-0 b/genomix/genomix-pregelix/data/input2/read2/part-0
new file mode 100755
index 0000000..1620187
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/read2/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input2/read2/part-1 b/genomix/genomix-pregelix/data/input2/read2/part-1
new file mode 100755
index 0000000..d2e2476
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/read2/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input2/read2/part-2 b/genomix/genomix-pregelix/data/input2/read2/part-2
new file mode 100755
index 0000000..7f3575e
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/read2/part-2
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input2/read2/part-3 b/genomix/genomix-pregelix/data/input2/read2/part-3
new file mode 100755
index 0000000..03e23e1
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/read2/part-3
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input2/singleread b/genomix/genomix-pregelix/data/input2/singleread
new file mode 100644
index 0000000..f99f2a9
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/singleread
@@ -0,0 +1 @@
+1 CCGTACGC
diff --git a/genomix/genomix-pregelix/data/input2/tworeads/part-0 b/genomix/genomix-pregelix/data/input2/tworeads/part-0
new file mode 100755
index 0000000..b9cd4fb
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/tworeads/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input2/tworeads/part-1 b/genomix/genomix-pregelix/data/input2/tworeads/part-1
new file mode 100755
index 0000000..69638f9
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/tworeads/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input2/tworeads/part-2 b/genomix/genomix-pregelix/data/input2/tworeads/part-2
new file mode 100755
index 0000000..e69ce75
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/tworeads/part-2
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input2/tworeads/part-3 b/genomix/genomix-pregelix/data/input2/tworeads/part-3
new file mode 100755
index 0000000..77f352b
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/tworeads/part-3
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input2/tworeads_6 b/genomix/genomix-pregelix/data/input2/tworeads_6
new file mode 100644
index 0000000..6cea299
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/tworeads_6
@@ -0,0 +1,2 @@
+1 AATAGAAC
+2 CCGTACGC
diff --git a/genomix/genomix-pregelix/data/input2/unmerge_read/part-0 b/genomix/genomix-pregelix/data/input2/unmerge_read/part-0
new file mode 100755
index 0000000..aab2f64
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/unmerge_read/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input2/unmerge_read/part-1 b/genomix/genomix-pregelix/data/input2/unmerge_read/part-1
new file mode 100755
index 0000000..d7b24b3
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/unmerge_read/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input2/unmerge_read/part-2 b/genomix/genomix-pregelix/data/input2/unmerge_read/part-2
new file mode 100755
index 0000000..f82775c
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/unmerge_read/part-2
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input2/unmerge_read/part-3 b/genomix/genomix-pregelix/data/input2/unmerge_read/part-3
new file mode 100755
index 0000000..b7a3925
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input2/unmerge_read/part-3
Binary files differ
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/2/result.ps b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/2/result.ps
new file mode 100644
index 0000000..19f638a
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/2/result.ps
@@ -0,0 +1,206 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 108 82
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 72 46 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% AAT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+32 19 31.82 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+17.5 15.4 moveto 29 (AAT) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 108 82
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/3/result.ps b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/3/result.ps
new file mode 100644
index 0000000..aa8e89c
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/3/result.ps
@@ -0,0 +1,206 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 106 82
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 70 46 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% ATA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+31 19 31.11 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+17 15.4 moveto 28 (ATA) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 106 82
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/4/result.ps b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/4/result.ps
new file mode 100644
index 0000000..aa8e89c
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/4/result.ps
@@ -0,0 +1,206 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 106 82
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 70 46 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% ATA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+31 19 31.11 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+17 15.4 moveto 28 (ATA) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 106 82
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/5/result.ps b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/5/result.ps
new file mode 100644
index 0000000..f22a610
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/5/result.ps
@@ -0,0 +1,206 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 110 82
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 74 46 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% AGA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+33 19 33.23 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+17.5 15.4 moveto 31 (AGA) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 110 82
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/6/result.ps b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/6/result.ps
new file mode 100644
index 0000000..c293727
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/6/result.ps
@@ -0,0 +1,206 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 106 82
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 70 46 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% CTA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+31 19 31.11 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+17 15.4 moveto 28 (CTA) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 106 82
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/7/result.ps b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/7/result.ps
new file mode 100644
index 0000000..f22a610
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/7/result.ps
@@ -0,0 +1,206 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 110 82
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 74 46 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% AGA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+33 19 33.23 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+17.5 15.4 moveto 31 (AGA) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 110 82
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/8/result.ps b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/8/result.ps
new file mode 100644
index 0000000..f22a610
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/8/result.ps
@@ -0,0 +1,206 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 110 82
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 74 46 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% AGA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+33 19 33.23 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+17.5 15.4 moveto 31 (AGA) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 110 82
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/9/result.ps b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/9/result.ps
new file mode 100644
index 0000000..f22a610
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/graphviz/9/result.ps
@@ -0,0 +1,206 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 110 82
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 74 46 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% AGA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+33 19 33.23 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+17.5 15.4 moveto 31 (AGA) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 110 82
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.2.crc b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.2.crc
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.2.crc
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.3.crc b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.3.crc
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.3.crc
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.4.crc b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.4.crc
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.4.crc
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.5.crc b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.5.crc
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.5.crc
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.6.crc b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.6.crc
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.6.crc
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.7.crc b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.7.crc
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.7.crc
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.8.crc b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.8.crc
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.8.crc
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.9.crc b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.9.crc
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/.9.crc
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/2 b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/2
new file mode 100755
index 0000000..f00dc00
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/2
@@ -0,0 +1 @@
+AAT {[(1-1_0)] [] [] [] [] AATA}
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/3 b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/3
new file mode 100755
index 0000000..b32ce12
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/3
@@ -0,0 +1 @@
+ATA {[(1-2_0)] [] [] [] [] AATAG}
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/4 b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/4
new file mode 100755
index 0000000..7d695cd
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/4
@@ -0,0 +1 @@
+ATA {[(1-2_0)] [] [] [] [] AATAGA}
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/5 b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/5
new file mode 100755
index 0000000..dd2c522
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/5
@@ -0,0 +1 @@
+AGA {[(1-4_0)] [] [] [] [] AATAGAA}
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/6 b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/6
new file mode 100755
index 0000000..9daadca
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/6
@@ -0,0 +1 @@
+CTA {[(1-3_0)] [] [] [] [] GTTCTATT}
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/7 b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/7
new file mode 100755
index 0000000..72c2cd5
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/7
@@ -0,0 +1 @@
+AGA {[(1-4_0)] [] [] [] [] AATAGAACT}
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/8 b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/8
new file mode 100755
index 0000000..6ee8b4f
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/8
@@ -0,0 +1 @@
+AGA {[(1-4_0)] [] [] [] [] AATAGAACTT}
diff --git a/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/9 b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/9
new file mode 100755
index 0000000..1225238
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/LogAlgorithmForMergeGraph/txt/9
@@ -0,0 +1 @@
+AGA {[(1-4_0)] [] [] [] [] AATAGAACTTA}
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/2/result.ps b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/2/result.ps
new file mode 100644
index 0000000..19f638a
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/2/result.ps
@@ -0,0 +1,206 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 108 82
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 72 46 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% AAT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+32 19 31.82 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+17.5 15.4 moveto 29 (AAT) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 108 82
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/3/result.ps b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/3/result.ps
new file mode 100644
index 0000000..19f638a
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/3/result.ps
@@ -0,0 +1,206 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 108 82
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 72 46 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% AAT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+32 19 31.82 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+17.5 15.4 moveto 29 (AAT) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 108 82
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/4/result.ps b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/4/result.ps
new file mode 100644
index 0000000..19f638a
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/4/result.ps
@@ -0,0 +1,206 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 108 82
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 72 46 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% AAT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+32 19 31.82 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+17.5 15.4 moveto 29 (AAT) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 108 82
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/5/result.ps b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/5/result.ps
new file mode 100644
index 0000000..19f638a
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/5/result.ps
@@ -0,0 +1,206 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 108 82
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 72 46 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% AAT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+32 19 31.82 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+17.5 15.4 moveto 29 (AAT) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 108 82
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/6/result.ps b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/6/result.ps
new file mode 100644
index 0000000..19f638a
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/6/result.ps
@@ -0,0 +1,206 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 108 82
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 72 46 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% AAT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+32 19 31.82 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+17.5 15.4 moveto 29 (AAT) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 108 82
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/7/result.ps b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/7/result.ps
new file mode 100644
index 0000000..19f638a
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/7/result.ps
@@ -0,0 +1,206 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 108 82
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 72 46 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% AAT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+32 19 31.82 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+17.5 15.4 moveto 29 (AAT) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 108 82
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/8/result.ps b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/8/result.ps
new file mode 100644
index 0000000..19f638a
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/8/result.ps
@@ -0,0 +1,206 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 108 82
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 72 46 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% AAT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+32 19 31.82 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+17.5 15.4 moveto 29 (AAT) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 108 82
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/9/result.ps b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/9/result.ps
new file mode 100644
index 0000000..19f638a
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/graphviz/9/result.ps
@@ -0,0 +1,206 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 108 82
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 72 46 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% AAT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+32 19 31.82 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+17.5 15.4 moveto 29 (AAT) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 108 82
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.2.crc b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.2.crc
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.2.crc
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.3.crc b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.3.crc
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.3.crc
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.4.crc b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.4.crc
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.4.crc
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.5.crc b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.5.crc
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.5.crc
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.6.crc b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.6.crc
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.6.crc
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.7.crc b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.7.crc
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.7.crc
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.8.crc b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.8.crc
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.8.crc
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.9.crc b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.9.crc
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/.9.crc
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/2 b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/2
new file mode 100755
index 0000000..f00dc00
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/2
@@ -0,0 +1 @@
+AAT {[(1-1_0)] [] [] [] [] AATA}
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/3 b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/3
new file mode 100755
index 0000000..588691b
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/3
@@ -0,0 +1 @@
+AAT {[(1-1_0)] [] [] [] [] AATAG}
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/4 b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/4
new file mode 100755
index 0000000..15476c2
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/4
@@ -0,0 +1 @@
+AAT {[(1-1_0)] [] [] [] [] AATAGA}
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/5 b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/5
new file mode 100755
index 0000000..33b2666
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/5
@@ -0,0 +1 @@
+AAT {[(1-1_0)] [] [] [] [] AATAGAA}
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/6 b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/6
new file mode 100755
index 0000000..c6ca9b5
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/6
@@ -0,0 +1 @@
+AAT {[(1-1_0)] [] [] [] [] AATAGAAC}
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/7 b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/7
new file mode 100755
index 0000000..bc487c7
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/7
@@ -0,0 +1 @@
+AAT {[(1-1_0)] [] [] [] [] AATAGAACT}
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/8 b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/8
new file mode 100755
index 0000000..f85f319
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/8
@@ -0,0 +1 @@
+AAT {[(1-1_0)] [] [] [] [] AATAGAACTT}
diff --git a/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/9 b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/9
new file mode 100755
index 0000000..30986e1
--- /dev/null
+++ b/genomix/genomix-pregelix/data/result/P4ForMergeGraph/txt/9
@@ -0,0 +1 @@
+AAT {[(1-1_0)] [] [] [] [] AATAGAACTTA}
diff --git a/genomix/genomix-pregelix/graph/BridgePath b/genomix/genomix-pregelix/graph/BridgePath
new file mode 100644
index 0000000..c10ea60
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/BridgePath
@@ -0,0 +1,14 @@
+ACCCC C|G 1
+CCCCG A|T 1
+CTCCG A|T 1
+TTCCA T|C 2
+ACTCC C|G 1
+CCGTG CT| 2
+TCCAC T|CT 2
+CCACC T|C 1
+CCACT T|C 1
+CACCC C|C 1
+TTTCC |A 2
+CCCGT C|G 1
+TCCGT C|G 1
+CACTC C|C 1
diff --git a/genomix/genomix-pregelix/graph/BridgePath_out.ps b/genomix/genomix-pregelix/graph/BridgePath_out.ps
new file mode 100644
index 0000000..9f66e44
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/BridgePath_out.ps
@@ -0,0 +1,603 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 248 674
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 212 638 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% ACCCC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 241 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 237.4 moveto 49 (ACCCC) alignedtext
+grestore
+% CCCCG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 167 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+21.5 163.4 moveto 51 (CCCCG) alignedtext
+grestore
+% ACCCC->CCCCG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 47 222.33 moveto
+47 214.26 47 204.65 47 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 50.5 195.67 moveto
+47 185.67 lineto
+43.5 195.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 50.5 195.67 moveto
+47 185.67 lineto
+43.5 195.67 lineto
+closepath stroke
+grestore
+% CCCGT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 93 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 89.4 moveto 49 (CCCGT) alignedtext
+grestore
+% CCCCG->CCCGT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 47.25 148.33 moveto
+47.36 140.26 47.49 130.65 47.61 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.11 121.71 moveto
+47.75 111.67 lineto
+44.11 121.62 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.11 121.71 moveto
+47.75 111.67 lineto
+44.11 121.62 lineto
+closepath stroke
+grestore
+% CCGTG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+102 19 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+76.5 15.4 moveto 51 (CCGTG) alignedtext
+grestore
+% CCCGT->CCGTG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 61.07 75.09 moveto
+67.59 66.15 75.6 55.18 82.8 45.31 curveto
+stroke
+0 0 0 edgecolor
+newpath 85.79 47.15 moveto
+88.86 37 lineto
+80.14 43.02 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 85.79 47.15 moveto
+88.86 37 lineto
+80.14 43.02 lineto
+closepath stroke
+grestore
+% CTCCG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+158 167 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+133.5 163.4 moveto 49 (CTCCG) alignedtext
+grestore
+% TCCGT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+157 93 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+133 89.4 moveto 48 (TCCGT) alignedtext
+grestore
+% CTCCG->TCCGT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 157.75 148.33 moveto
+157.64 140.26 157.51 130.65 157.39 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 160.89 121.62 moveto
+157.25 111.67 lineto
+153.89 121.71 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 160.89 121.62 moveto
+157.25 111.67 lineto
+153.89 121.71 lineto
+closepath stroke
+grestore
+% TCCGT->CCGTG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 143.69 75.09 moveto
+137.05 66.15 128.89 55.18 121.56 45.31 curveto
+stroke
+0 0 0 edgecolor
+newpath 124.16 42.94 moveto
+115.38 37 lineto
+118.54 47.12 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 124.16 42.94 moveto
+115.38 37 lineto
+118.54 47.12 lineto
+closepath stroke
+grestore
+% TTCCA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+102 537 44.05 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+78.5 533.4 moveto 47 (TTCCA) alignedtext
+grestore
+% TCCAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+102 463 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+78 459.4 moveto 48 (TCCAC) alignedtext
+grestore
+% TTCCA->TCCAC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 102 518.33 moveto
+102 510.26 102 500.65 102 491.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 105.5 491.67 moveto
+102 481.67 lineto
+98.5 491.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 105.5 491.67 moveto
+102 481.67 lineto
+98.5 491.67 lineto
+closepath stroke
+grestore
+% CCACC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 389 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 385.4 moveto 49 (CCACC) alignedtext
+grestore
+% TCCAC->CCACC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 88.69 445.09 moveto
+81.93 436 73.6 424.79 66.17 414.79 curveto
+stroke
+0 0 0 edgecolor
+newpath 68.97 412.69 moveto
+60.19 406.75 lineto
+63.35 416.86 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 68.97 412.69 moveto
+60.19 406.75 lineto
+63.35 416.86 lineto
+closepath stroke
+grestore
+% CCACT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+157 389 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+132.5 385.4 moveto 49 (CCACT) alignedtext
+grestore
+% TCCAC->CCACT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 115.31 445.09 moveto
+122.07 436 130.4 424.79 137.83 414.79 curveto
+stroke
+0 0 0 edgecolor
+newpath 140.65 416.86 moveto
+143.81 406.75 lineto
+135.03 412.69 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 140.65 416.86 moveto
+143.81 406.75 lineto
+135.03 412.69 lineto
+closepath stroke
+grestore
+% ACTCC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+157 241 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+133 237.4 moveto 48 (ACTCC) alignedtext
+grestore
+% ACTCC->CTCCG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 157.25 222.33 moveto
+157.36 214.26 157.49 204.65 157.61 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 161.11 195.71 moveto
+157.75 185.67 lineto
+154.11 195.62 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 161.11 195.71 moveto
+157.75 185.67 lineto
+154.11 195.62 lineto
+closepath stroke
+grestore
+% CACCC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 315 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 311.4 moveto 49 (CACCC) alignedtext
+grestore
+% CCACC->CACCC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 47 370.33 moveto
+47 362.26 47 352.65 47 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 50.5 343.67 moveto
+47 333.67 lineto
+43.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 50.5 343.67 moveto
+47 333.67 lineto
+43.5 343.67 lineto
+closepath stroke
+grestore
+% CACTC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+157 315 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+133 311.4 moveto 48 (CACTC) alignedtext
+grestore
+% CCACT->CACTC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 157 370.33 moveto
+157 362.26 157 352.65 157 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 160.5 343.67 moveto
+157 333.67 lineto
+153.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 160.5 343.67 moveto
+157 333.67 lineto
+153.5 343.67 lineto
+closepath stroke
+grestore
+% CACCC->ACCCC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 47 296.33 moveto
+47 288.26 47 278.65 47 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 50.5 269.67 moveto
+47 259.67 lineto
+43.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 50.5 269.67 moveto
+47 259.67 lineto
+43.5 269.67 lineto
+closepath stroke
+grestore
+% CACTC->ACTCC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 157 296.33 moveto
+157 288.26 157 278.65 157 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 160.5 269.67 moveto
+157 259.67 lineto
+153.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 160.5 269.67 moveto
+157 259.67 lineto
+153.5 269.67 lineto
+closepath stroke
+grestore
+% TTTCC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+102 611 43.84 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+79 607.4 moveto 46 (TTTCC) alignedtext
+grestore
+% TTTCC->TTCCA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 102 592.33 moveto
+102 584.26 102 574.65 102 565.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 105.5 565.67 moveto
+102 555.67 lineto
+98.5 565.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 105.5 565.67 moveto
+102 555.67 lineto
+98.5 565.67 lineto
+closepath stroke
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 248 674
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/graph/CyclePath b/genomix/genomix-pregelix/graph/CyclePath
new file mode 100644
index 0000000..db30e3a
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/CyclePath
@@ -0,0 +1,10 @@
+GCAAC |T 1
+CATCA T|A 1
+CTTCA A|T 1
+AACTT C|C 1
+ACTTC A|A 1
+TCAAC A|T 1
+ATCAA C|C 1
+TTCAT C|C 1
+CAACT GT|T 2
+TCATC T|A 1
diff --git a/genomix/genomix-pregelix/graph/CyclePath_out.ps b/genomix/genomix-pregelix/graph/CyclePath_out.ps
new file mode 100644
index 0000000..1b13ecb
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/CyclePath_out.ps
@@ -0,0 +1,489 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 175 748
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 139 712 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% GCAAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+83 685 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+57 681.4 moveto 52 (GCAAC) alignedtext
+grestore
+% CAACT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+83 611 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+58.5 607.4 moveto 49 (CAACT) alignedtext
+grestore
+% GCAAC->CAACT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 83 666.33 moveto
+83 658.26 83 648.65 83 639.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 86.5 639.67 moveto
+83 629.67 lineto
+79.5 639.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 86.5 639.67 moveto
+83 629.67 lineto
+79.5 639.67 lineto
+closepath stroke
+grestore
+% AACTT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+46 537 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+21.5 533.4 moveto 49 (AACTT) alignedtext
+grestore
+% CAACT->AACTT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 73.85 592.71 moveto
+69.58 584.17 64.41 573.83 59.69 564.38 curveto
+stroke
+0 0 0 edgecolor
+newpath 62.77 562.72 moveto
+55.17 555.34 lineto
+56.51 565.85 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 62.77 562.72 moveto
+55.17 555.34 lineto
+56.51 565.85 lineto
+closepath stroke
+grestore
+% CATCA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 167 44.05 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 163.4 moveto 47 (CATCA) alignedtext
+grestore
+% ATCAA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 93 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23 89.4 moveto 48 (ATCAA) alignedtext
+grestore
+% CATCA->ATCAA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 47 148.33 moveto
+47 140.26 47 130.65 47 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 50.5 121.67 moveto
+47 111.67 lineto
+43.5 121.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 50.5 121.67 moveto
+47 111.67 lineto
+43.5 121.67 lineto
+closepath stroke
+grestore
+% TCAAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+83 19 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+58.5 15.4 moveto 49 (TCAAC) alignedtext
+grestore
+% ATCAA->TCAAC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 55.9 74.71 moveto
+60.05 66.17 65.08 55.83 69.68 46.38 curveto
+stroke
+0 0 0 edgecolor
+newpath 72.85 47.86 moveto
+74.08 37.34 lineto
+66.56 44.8 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 72.85 47.86 moveto
+74.08 37.34 lineto
+66.56 44.8 lineto
+closepath stroke
+grestore
+% CTTCA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 389 44.05 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 385.4 moveto 47 (CTTCA) alignedtext
+grestore
+% TTCAT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 315 43.84 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+24 311.4 moveto 46 (TTCAT) alignedtext
+grestore
+% CTTCA->TTCAT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 47 370.33 moveto
+47 362.26 47 352.65 47 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 50.5 343.67 moveto
+47 333.67 lineto
+43.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 50.5 343.67 moveto
+47 333.67 lineto
+43.5 343.67 lineto
+closepath stroke
+grestore
+% TCATC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 241 43.84 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+24 237.4 moveto 46 (TCATC) alignedtext
+grestore
+% TTCAT->TCATC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 47 296.33 moveto
+47 288.26 47 278.65 47 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 50.5 269.67 moveto
+47 259.67 lineto
+43.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 50.5 269.67 moveto
+47 259.67 lineto
+43.5 269.67 lineto
+closepath stroke
+grestore
+% ACTTC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+47 463 44.05 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 459.4 moveto 47 (ACTTC) alignedtext
+grestore
+% AACTT->ACTTC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 46.25 518.33 moveto
+46.36 510.26 46.49 500.65 46.61 491.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 50.11 491.71 moveto
+46.75 481.67 lineto
+43.11 491.62 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 50.11 491.71 moveto
+46.75 481.67 lineto
+43.11 491.62 lineto
+closepath stroke
+grestore
+% ACTTC->CTTCA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 47 444.33 moveto
+47 436.26 47 426.65 47 417.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 50.5 417.67 moveto
+47 407.67 lineto
+43.5 417.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 50.5 417.67 moveto
+47 407.67 lineto
+43.5 417.67 lineto
+closepath stroke
+grestore
+% TCAAC->CAACT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 90.67 37.32 moveto
+101.5 64.93 120 119.06 120 167 curveto
+120 463 120 463 120 463 curveto
+120 505.88 105.2 553.72 94.32 583.1 curveto
+stroke
+0 0 0 edgecolor
+newpath 90.96 582.09 moveto
+90.67 592.68 lineto
+97.5 584.58 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 90.96 582.09 moveto
+90.67 592.68 lineto
+97.5 584.58 lineto
+closepath stroke
+grestore
+% TCATC->CATCA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 47 222.33 moveto
+47 214.26 47 204.65 47 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 50.5 195.67 moveto
+47 185.67 lineto
+43.5 195.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 50.5 195.67 moveto
+47 185.67 lineto
+43.5 195.67 lineto
+closepath stroke
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 175 748
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/graph/LongPath b/genomix/genomix-pregelix/graph/LongPath
new file mode 100644
index 0000000..82c0298
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/LongPath
@@ -0,0 +1,13 @@
+CTCAG C|T 1
+AGTAC C|G 1
+GGCCT |C 1
+ACGCC T|C 1
+CCTCA G|G 1
+CCCGG G| 1
+GCCTC G|A 1
+CAGTA T|C 1
+GTACG A|C 1
+GCCCG C|G 1
+CGCCC A|G 1
+TCAGT C|A 1
+TACGC G|C 1
diff --git a/genomix/genomix-pregelix/graph/LongPath_out.ps b/genomix/genomix-pregelix/graph/LongPath_out.ps
new file mode 100644
index 0000000..1bef623
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/LongPath_out.ps
@@ -0,0 +1,554 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 140 970
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 104 934 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% CTCAG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 685 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 681.4 moveto 49 (CTCAG) alignedtext
+grestore
+% TCAGT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 611 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+24 607.4 moveto 48 (TCAGT) alignedtext
+grestore
+% CTCAG->TCAGT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 666.33 moveto
+48 658.26 48 648.65 48 639.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 639.67 moveto
+48 629.67 lineto
+44.5 639.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 639.67 moveto
+48 629.67 lineto
+44.5 639.67 lineto
+closepath stroke
+grestore
+% CAGTA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 537 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 533.4 moveto 49 (CAGTA) alignedtext
+grestore
+% TCAGT->CAGTA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 592.33 moveto
+48 584.26 48 574.65 48 565.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 565.67 moveto
+48 555.67 lineto
+44.5 565.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 565.67 moveto
+48 555.67 lineto
+44.5 565.67 lineto
+closepath stroke
+grestore
+% AGTAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 463 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 459.4 moveto 49 (AGTAC) alignedtext
+grestore
+% GTACG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 389 46.17 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23 385.4 moveto 50 (GTACG) alignedtext
+grestore
+% AGTAC->GTACG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 444.33 moveto
+48 436.26 48 426.65 48 417.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 417.67 moveto
+48 407.67 lineto
+44.5 417.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 417.67 moveto
+48 407.67 lineto
+44.5 417.67 lineto
+closepath stroke
+grestore
+% TACGC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 315 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 311.4 moveto 49 (TACGC) alignedtext
+grestore
+% GTACG->TACGC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 370.33 moveto
+48 362.26 48 352.65 48 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 343.67 moveto
+48 333.67 lineto
+44.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 343.67 moveto
+48 333.67 lineto
+44.5 343.67 lineto
+closepath stroke
+grestore
+% GGCCT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 907 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22 903.4 moveto 52 (GGCCT) alignedtext
+grestore
+% GCCTC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 833 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 829.4 moveto 49 (GCCTC) alignedtext
+grestore
+% GGCCT->GCCTC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 888.33 moveto
+48 880.26 48 870.65 48 861.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 861.67 moveto
+48 851.67 lineto
+44.5 861.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 861.67 moveto
+48 851.67 lineto
+44.5 861.67 lineto
+closepath stroke
+grestore
+% CCTCA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 759 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+24 755.4 moveto 48 (CCTCA) alignedtext
+grestore
+% GCCTC->CCTCA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 814.33 moveto
+48 806.26 48 796.65 48 787.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 787.67 moveto
+48 777.67 lineto
+44.5 787.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 787.67 moveto
+48 777.67 lineto
+44.5 787.67 lineto
+closepath stroke
+grestore
+% ACGCC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 241 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 237.4 moveto 51 (ACGCC) alignedtext
+grestore
+% CGCCC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 167 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 163.4 moveto 51 (CGCCC) alignedtext
+grestore
+% ACGCC->CGCCC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 222.33 moveto
+48 214.26 48 204.65 48 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 195.67 moveto
+48 185.67 lineto
+44.5 195.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 195.67 moveto
+48 185.67 lineto
+44.5 195.67 lineto
+closepath stroke
+grestore
+% GCCCG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 93 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22 89.4 moveto 52 (GCCCG) alignedtext
+grestore
+% CGCCC->GCCCG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 148.33 moveto
+48 140.26 48 130.65 48 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 121.67 moveto
+48 111.67 lineto
+44.5 121.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 121.67 moveto
+48 111.67 lineto
+44.5 121.67 lineto
+closepath stroke
+grestore
+% CCTCA->CTCAG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 740.33 moveto
+48 732.26 48 722.65 48 713.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 713.67 moveto
+48 703.67 lineto
+44.5 713.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 713.67 moveto
+48 703.67 lineto
+44.5 713.67 lineto
+closepath stroke
+grestore
+% CAGTA->AGTAC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 518.33 moveto
+48 510.26 48 500.65 48 491.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 491.67 moveto
+48 481.67 lineto
+44.5 491.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 491.67 moveto
+48 481.67 lineto
+44.5 491.67 lineto
+closepath stroke
+grestore
+% TACGC->ACGCC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 296.33 moveto
+48 288.26 48 278.65 48 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 269.67 moveto
+48 259.67 lineto
+44.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 269.67 moveto
+48 259.67 lineto
+44.5 269.67 lineto
+closepath stroke
+grestore
+% CCCGG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 19 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22 15.4 moveto 52 (CCCGG) alignedtext
+grestore
+% GCCCG->CCCGG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 74.33 moveto
+48 66.26 48 56.65 48 47.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 47.67 moveto
+48 37.67 lineto
+44.5 47.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 47.67 moveto
+48 37.67 lineto
+44.5 47.67 lineto
+closepath stroke
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 140 970
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/graph/Path b/genomix/genomix-pregelix/graph/Path
new file mode 100644
index 0000000..67d55ca
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/Path
@@ -0,0 +1,8 @@
+CTCAG C|T 1
+AGTAC C|G 1
+GGCCT |C 1
+CCTCA G|G 1
+GCCTC G|A 1
+CAGTA T|C 1
+GTACG A| 1
+TCAGT C|A 1
diff --git a/genomix/genomix-pregelix/graph/Path_out.ps b/genomix/genomix-pregelix/graph/Path_out.ps
new file mode 100644
index 0000000..43f733f
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/Path_out.ps
@@ -0,0 +1,409 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 140 600
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 104 564 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% CTCAG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 315 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 311.4 moveto 49 (CTCAG) alignedtext
+grestore
+% TCAGT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 241 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+24 237.4 moveto 48 (TCAGT) alignedtext
+grestore
+% CTCAG->TCAGT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 296.33 moveto
+48 288.26 48 278.65 48 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 269.67 moveto
+48 259.67 lineto
+44.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 269.67 moveto
+48 259.67 lineto
+44.5 269.67 lineto
+closepath stroke
+grestore
+% CAGTA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 167 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 163.4 moveto 49 (CAGTA) alignedtext
+grestore
+% TCAGT->CAGTA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 222.33 moveto
+48 214.26 48 204.65 48 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 195.67 moveto
+48 185.67 lineto
+44.5 195.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 195.67 moveto
+48 185.67 lineto
+44.5 195.67 lineto
+closepath stroke
+grestore
+% AGTAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 93 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 89.4 moveto 49 (AGTAC) alignedtext
+grestore
+% GTACG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 19 46.17 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23 15.4 moveto 50 (GTACG) alignedtext
+grestore
+% AGTAC->GTACG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 74.33 moveto
+48 66.26 48 56.65 48 47.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 47.67 moveto
+48 37.67 lineto
+44.5 47.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 47.67 moveto
+48 37.67 lineto
+44.5 47.67 lineto
+closepath stroke
+grestore
+% GGCCT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 537 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22 533.4 moveto 52 (GGCCT) alignedtext
+grestore
+% GCCTC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 463 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 459.4 moveto 49 (GCCTC) alignedtext
+grestore
+% GGCCT->GCCTC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 518.33 moveto
+48 510.26 48 500.65 48 491.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 491.67 moveto
+48 481.67 lineto
+44.5 491.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 491.67 moveto
+48 481.67 lineto
+44.5 491.67 lineto
+closepath stroke
+grestore
+% CCTCA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 389 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+24 385.4 moveto 48 (CCTCA) alignedtext
+grestore
+% GCCTC->CCTCA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 444.33 moveto
+48 436.26 48 426.65 48 417.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 417.67 moveto
+48 407.67 lineto
+44.5 417.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 417.67 moveto
+48 407.67 lineto
+44.5 417.67 lineto
+closepath stroke
+grestore
+% CCTCA->CTCAG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 370.33 moveto
+48 362.26 48 352.65 48 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 343.67 moveto
+48 333.67 lineto
+44.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 343.67 moveto
+48 333.67 lineto
+44.5 343.67 lineto
+closepath stroke
+grestore
+% CAGTA->AGTAC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 148.33 moveto
+48 140.26 48 130.65 48 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 121.67 moveto
+48 111.67 lineto
+44.5 121.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 121.67 moveto
+48 111.67 lineto
+44.5 121.67 lineto
+closepath stroke
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 140 600
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/graph/SimplePath b/genomix/genomix-pregelix/graph/SimplePath
new file mode 100644
index 0000000..2e0667e
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/SimplePath
@@ -0,0 +1,18 @@
+AGCAC C| 1
+AAGAC |A 1
+CAGCA A|C 1
+TCGCA A|T 1
+CGGCA G|A 1
+TATCG A|C 1
+CAAGA G|A 1
+ACAGC G|A 1
+ATCGC T|A 1
+GCGGC |A 1
+GCATC C| 1
+ATATC |G 1
+GCAAG G|A 1
+GACAG A|C 1
+CGCAT T|C 1
+GGCAA C|G 1
+AAGAA C| 1
+AGACA A|G 1
diff --git a/genomix/genomix-pregelix/graph/SimplePath_out.ps b/genomix/genomix-pregelix/graph/SimplePath_out.ps
new file mode 100644
index 0000000..3b3bf39
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/SimplePath_out.ps
@@ -0,0 +1,659 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 366 452
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 330 416 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% AAGAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 389 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22 385.4 moveto 52 (AAGAC) alignedtext
+grestore
+% AGACA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 315 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 311.4 moveto 51 (AGACA) alignedtext
+grestore
+% AAGAC->AGACA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 370.33 moveto
+48 362.26 48 352.65 48 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 343.67 moveto
+48 333.67 lineto
+44.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 343.67 moveto
+48 333.67 lineto
+44.5 343.67 lineto
+closepath stroke
+grestore
+% GACAG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 241 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22 237.4 moveto 52 (GACAG) alignedtext
+grestore
+% AGACA->GACAG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 296.33 moveto
+48 288.26 48 278.65 48 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 269.67 moveto
+48 259.67 lineto
+44.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 269.67 moveto
+48 259.67 lineto
+44.5 269.67 lineto
+closepath stroke
+grestore
+% CAGCA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 93 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 89.4 moveto 51 (CAGCA) alignedtext
+grestore
+% AGCAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 19 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 15.4 moveto 51 (AGCAC) alignedtext
+grestore
+% CAGCA->AGCAC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 74.33 moveto
+48 66.26 48 56.65 48 47.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 47.67 moveto
+48 37.67 lineto
+44.5 47.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 47.67 moveto
+48 37.67 lineto
+44.5 47.67 lineto
+closepath stroke
+grestore
+% TCGCA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+160 167 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+135.5 163.4 moveto 49 (TCGCA) alignedtext
+grestore
+% CGCAT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+160 93 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+135.5 89.4 moveto 49 (CGCAT) alignedtext
+grestore
+% TCGCA->CGCAT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 160 148.33 moveto
+160 140.26 160 130.65 160 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 163.5 121.67 moveto
+160 111.67 lineto
+156.5 121.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 163.5 121.67 moveto
+160 111.67 lineto
+156.5 121.67 lineto
+closepath stroke
+grestore
+% GCATC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+160 19 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+135.5 15.4 moveto 49 (GCATC) alignedtext
+grestore
+% CGCAT->GCATC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 160 74.33 moveto
+160 66.26 160 56.65 160 47.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 163.5 47.67 moveto
+160 37.67 lineto
+156.5 47.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 163.5 47.67 moveto
+160 37.67 lineto
+156.5 47.67 lineto
+closepath stroke
+grestore
+% CGGCA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+273 315 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+247 311.4 moveto 52 (CGGCA) alignedtext
+grestore
+% GGCAA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+273 241 48.79 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+246.5 237.4 moveto 53 (GGCAA) alignedtext
+grestore
+% CGGCA->GGCAA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 273 296.33 moveto
+273 288.26 273 278.65 273 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 276.5 269.67 moveto
+273 259.67 lineto
+269.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 276.5 269.67 moveto
+273 259.67 lineto
+269.5 269.67 lineto
+closepath stroke
+grestore
+% GCAAG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+273 167 48.79 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+246.5 163.4 moveto 53 (GCAAG) alignedtext
+grestore
+% GGCAA->GCAAG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 273 222.33 moveto
+273 214.26 273 204.65 273 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 276.5 195.67 moveto
+273 185.67 lineto
+269.5 195.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 276.5 195.67 moveto
+273 185.67 lineto
+269.5 195.67 lineto
+closepath stroke
+grestore
+% TATCG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+160 315 44.05 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+136.5 311.4 moveto 47 (TATCG) alignedtext
+grestore
+% ATCGC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+160 241 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+135.5 237.4 moveto 49 (ATCGC) alignedtext
+grestore
+% TATCG->ATCGC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 160 296.33 moveto
+160 288.26 160 278.65 160 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 163.5 269.67 moveto
+160 259.67 lineto
+156.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 163.5 269.67 moveto
+160 259.67 lineto
+156.5 269.67 lineto
+closepath stroke
+grestore
+% ATCGC->TCGCA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 160 222.33 moveto
+160 214.26 160 204.65 160 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 163.5 195.67 moveto
+160 185.67 lineto
+156.5 195.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 163.5 195.67 moveto
+160 185.67 lineto
+156.5 195.67 lineto
+closepath stroke
+grestore
+% CAAGA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+273 93 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+247 89.4 moveto 52 (CAAGA) alignedtext
+grestore
+% AAGAA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+273 19 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+247 15.4 moveto 52 (AAGAA) alignedtext
+grestore
+% CAAGA->AAGAA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 273 74.33 moveto
+273 66.26 273 56.65 273 47.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 276.5 47.67 moveto
+273 37.67 lineto
+269.5 47.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 276.5 47.67 moveto
+273 37.67 lineto
+269.5 47.67 lineto
+closepath stroke
+grestore
+% ACAGC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 167 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 163.4 moveto 51 (ACAGC) alignedtext
+grestore
+% ACAGC->CAGCA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 148.33 moveto
+48 140.26 48 130.65 48 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 121.67 moveto
+48 111.67 lineto
+44.5 121.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 121.67 moveto
+48 111.67 lineto
+44.5 121.67 lineto
+closepath stroke
+grestore
+% GCGGC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+273 389 49 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+246 385.4 moveto 54 (GCGGC) alignedtext
+grestore
+% GCGGC->CGGCA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 273 370.33 moveto
+273 362.26 273 352.65 273 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 276.5 343.67 moveto
+273 333.67 lineto
+269.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 276.5 343.67 moveto
+273 333.67 lineto
+269.5 343.67 lineto
+closepath stroke
+grestore
+% ATATC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+160 389 43.13 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+137.5 385.4 moveto 45 (ATATC) alignedtext
+grestore
+% ATATC->TATCG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 160 370.33 moveto
+160 362.26 160 352.65 160 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 163.5 343.67 moveto
+160 333.67 lineto
+156.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 163.5 343.67 moveto
+160 333.67 lineto
+156.5 343.67 lineto
+closepath stroke
+grestore
+% GCAAG->CAAGA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 273 148.33 moveto
+273 140.26 273 130.65 273 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 276.5 121.67 moveto
+273 111.67 lineto
+269.5 121.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 276.5 121.67 moveto
+273 111.67 lineto
+269.5 121.67 lineto
+closepath stroke
+grestore
+% GACAG->ACAGC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 222.33 moveto
+48 214.26 48 204.65 48 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 195.67 moveto
+48 185.67 lineto
+44.5 195.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 195.67 moveto
+48 185.67 lineto
+44.5 195.67 lineto
+closepath stroke
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 366 452
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/graph/SinglePath b/genomix/genomix-pregelix/graph/SinglePath
new file mode 100644
index 0000000..02f42ba
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/SinglePath
@@ -0,0 +1,6 @@
+ACAAC G|A 1
+CAACA A|G 1
+ACAGT A| 1
+AACAG C|T 1
+GACAA A|C 1
+AGACA |A 1
diff --git a/genomix/genomix-pregelix/graph/SinglePath_out.ps b/genomix/genomix-pregelix/graph/SinglePath_out.ps
new file mode 100644
index 0000000..8371636
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/SinglePath_out.ps
@@ -0,0 +1,351 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 140 452
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 104 416 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% ACAAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 241 46.17 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23 237.4 moveto 50 (ACAAC) alignedtext
+grestore
+% CAACA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 167 46.17 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23 163.4 moveto 50 (CAACA) alignedtext
+grestore
+% ACAAC->CAACA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 222.33 moveto
+48 214.26 48 204.65 48 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 195.67 moveto
+48 185.67 lineto
+44.5 195.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 195.67 moveto
+48 185.67 lineto
+44.5 195.67 lineto
+closepath stroke
+grestore
+% AACAG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 93 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22 89.4 moveto 52 (AACAG) alignedtext
+grestore
+% CAACA->AACAG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 148.33 moveto
+48 140.26 48 130.65 48 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 121.67 moveto
+48 111.67 lineto
+44.5 121.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 121.67 moveto
+48 111.67 lineto
+44.5 121.67 lineto
+closepath stroke
+grestore
+% ACAGT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 19 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 15.4 moveto 49 (ACAGT) alignedtext
+grestore
+% AACAG->ACAGT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 74.33 moveto
+48 66.26 48 56.65 48 47.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 47.67 moveto
+48 37.67 lineto
+44.5 47.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 47.67 moveto
+48 37.67 lineto
+44.5 47.67 lineto
+closepath stroke
+grestore
+% GACAA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 315 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22 311.4 moveto 52 (GACAA) alignedtext
+grestore
+% GACAA->ACAAC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 296.33 moveto
+48 288.26 48 278.65 48 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 269.67 moveto
+48 259.67 lineto
+44.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 269.67 moveto
+48 259.67 lineto
+44.5 269.67 lineto
+closepath stroke
+grestore
+% AGACA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 389 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 385.4 moveto 51 (AGACA) alignedtext
+grestore
+% AGACA->GACAA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 370.33 moveto
+48 362.26 48 352.65 48 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 343.67 moveto
+48 333.67 lineto
+44.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 343.67 moveto
+48 333.67 lineto
+44.5 343.67 lineto
+closepath stroke
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 140 452
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/graph/ThreeKmer b/genomix/genomix-pregelix/graph/ThreeKmer
new file mode 100644
index 0000000..7ce9890
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/ThreeKmer
@@ -0,0 +1,3 @@
+CTCGG A|T 1
+ACTCG |G 1
+TCGGT C| 1
diff --git a/genomix/genomix-pregelix/graph/TreePath b/genomix/genomix-pregelix/graph/TreePath
new file mode 100644
index 0000000..0a3d5c6
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/TreePath
@@ -0,0 +1,29 @@
+CTAAA A|C 1
+GTAAC A|T 1
+CTCAG C|T 2
+GCTAT G|C 1
+AGTAC C|G 1
+GGCCT |CG 3
+ATCCC T| 1
+ACGCC T|C 1
+CCTCA G|G 2
+CCCGG G| 1
+CCTGG G|C 1
+GCCTC G|A 2
+CAGTA T|AC 2
+TAAAC C| 1
+ACTAA A|A 1
+AGTAA C|C 1
+TAACT G|A 1
+GTACG A|C 1
+GCCCG C|G 1
+CGCCC A|G 1
+TGGCT C|A 1
+TATCC C|C 1
+TCAGT C|A 2
+TACGC G|C 1
+CTGGC C|T 1
+CTATC G|C 1
+AACTA T|A 1
+GCCTG G|G 1
+GGCTA T|T 1
diff --git a/genomix/genomix-pregelix/graph/TreePath_out.ps b/genomix/genomix-pregelix/graph/TreePath_out.ps
new file mode 100644
index 0000000..a1e7da1
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/TreePath_out.ps
@@ -0,0 +1,1018 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 359 970
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 323 934 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% CTAAA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+160 93 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+135.5 89.4 moveto 49 (CTAAA) alignedtext
+grestore
+% TAAAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+160 19 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+135.5 15.4 moveto 49 (TAAAC) alignedtext
+grestore
+% CTAAA->TAAAC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 160 74.33 moveto
+160 66.26 160 56.65 160 47.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 163.5 47.67 moveto
+160 37.67 lineto
+156.5 47.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 163.5 47.67 moveto
+160 37.67 lineto
+156.5 47.67 lineto
+closepath stroke
+grestore
+% GTAAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+158 389 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+133.5 385.4 moveto 49 (GTAAC) alignedtext
+grestore
+% TAACT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+158 315 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+134 311.4 moveto 48 (TAACT) alignedtext
+grestore
+% GTAAC->TAACT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 158 370.33 moveto
+158 362.26 158 352.65 158 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 161.5 343.67 moveto
+158 333.67 lineto
+154.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 161.5 343.67 moveto
+158 333.67 lineto
+154.5 343.67 lineto
+closepath stroke
+grestore
+% AACTA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+159 241 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+134.5 237.4 moveto 49 (AACTA) alignedtext
+grestore
+% TAACT->AACTA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 158.25 296.33 moveto
+158.36 288.26 158.49 278.65 158.61 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 162.11 269.71 moveto
+158.75 259.67 lineto
+155.11 269.62 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 162.11 269.71 moveto
+158.75 259.67 lineto
+155.11 269.62 lineto
+closepath stroke
+grestore
+% CTCAG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+155 685 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+130.5 681.4 moveto 49 (CTCAG) alignedtext
+grestore
+% TCAGT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+155 611 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+131 607.4 moveto 48 (TCAGT) alignedtext
+grestore
+% CTCAG->TCAGT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 155 666.33 moveto
+155 658.26 155 648.65 155 639.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 158.5 639.67 moveto
+155 629.67 lineto
+151.5 639.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 158.5 639.67 moveto
+155 629.67 lineto
+151.5 639.67 lineto
+closepath stroke
+grestore
+% CAGTA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+155 537 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+130.5 533.4 moveto 49 (CAGTA) alignedtext
+grestore
+% TCAGT->CAGTA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 155 592.33 moveto
+155 584.26 155 574.65 155 565.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 158.5 565.67 moveto
+155 555.67 lineto
+151.5 565.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 158.5 565.67 moveto
+155 555.67 lineto
+151.5 565.67 lineto
+closepath stroke
+grestore
+% GCTAT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+267 463 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+243 459.4 moveto 48 (GCTAT) alignedtext
+grestore
+% CTATC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+267 389 43.84 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+244 385.4 moveto 46 (CTATC) alignedtext
+grestore
+% GCTAT->CTATC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 267 444.33 moveto
+267 436.26 267 426.65 267 417.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 270.5 417.67 moveto
+267 407.67 lineto
+263.5 417.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 270.5 417.67 moveto
+267 407.67 lineto
+263.5 417.67 lineto
+closepath stroke
+grestore
+% TATCC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+267 315 43.84 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+244 311.4 moveto 46 (TATCC) alignedtext
+grestore
+% CTATC->TATCC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 267 370.33 moveto
+267 362.26 267 352.65 267 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 270.5 343.67 moveto
+267 333.67 lineto
+263.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 270.5 343.67 moveto
+267 333.67 lineto
+263.5 343.67 lineto
+closepath stroke
+grestore
+% AGTAC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 463 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 459.4 moveto 49 (AGTAC) alignedtext
+grestore
+% GTACG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 389 46.17 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23 385.4 moveto 50 (GTACG) alignedtext
+grestore
+% AGTAC->GTACG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 444.33 moveto
+48 436.26 48 426.65 48 417.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 417.67 moveto
+48 407.67 lineto
+44.5 417.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 417.67 moveto
+48 407.67 lineto
+44.5 417.67 lineto
+closepath stroke
+grestore
+% TACGC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 315 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+23.5 311.4 moveto 49 (TACGC) alignedtext
+grestore
+% GTACG->TACGC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 370.33 moveto
+48 362.26 48 352.65 48 343.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 343.67 moveto
+48 333.67 lineto
+44.5 343.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 343.67 moveto
+48 333.67 lineto
+44.5 343.67 lineto
+closepath stroke
+grestore
+% GGCCT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+211 907 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+185 903.4 moveto 52 (GGCCT) alignedtext
+grestore
+% GCCTC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+155 833 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+130.5 829.4 moveto 49 (GCCTC) alignedtext
+grestore
+% GGCCT->GCCTC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 197.44 889.09 moveto
+190.57 880 182.09 868.79 174.52 858.79 curveto
+stroke
+0 0 0 edgecolor
+newpath 177.26 856.61 moveto
+168.43 850.75 lineto
+171.67 860.83 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 177.26 856.61 moveto
+168.43 850.75 lineto
+171.67 860.83 lineto
+closepath stroke
+grestore
+% GCCTG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+267 833 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+241 829.4 moveto 52 (GCCTG) alignedtext
+grestore
+% GGCCT->GCCTG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 224.56 889.09 moveto
+231.38 880.06 239.79 868.96 247.32 859.01 curveto
+stroke
+0 0 0 edgecolor
+newpath 250.13 861.09 moveto
+253.37 851 lineto
+244.55 856.87 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 250.13 861.09 moveto
+253.37 851 lineto
+244.55 856.87 lineto
+closepath stroke
+grestore
+% CCTCA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+155 759 44.76 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+131 755.4 moveto 48 (CCTCA) alignedtext
+grestore
+% GCCTC->CCTCA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 155 814.33 moveto
+155 806.26 155 796.65 155 787.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 158.5 787.67 moveto
+155 777.67 lineto
+151.5 787.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 158.5 787.67 moveto
+155 777.67 lineto
+151.5 787.67 lineto
+closepath stroke
+grestore
+% CCTGG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+267 759 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+241 755.4 moveto 52 (CCTGG) alignedtext
+grestore
+% GCCTG->CCTGG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 267 814.33 moveto
+267 806.26 267 796.65 267 787.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 270.5 787.67 moveto
+267 777.67 lineto
+263.5 787.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 270.5 787.67 moveto
+267 777.67 lineto
+263.5 787.67 lineto
+closepath stroke
+grestore
+% ACGCC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 241 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 237.4 moveto 51 (ACGCC) alignedtext
+grestore
+% CGCCC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 167 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22.5 163.4 moveto 51 (CGCCC) alignedtext
+grestore
+% ACGCC->CGCCC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 222.33 moveto
+48 214.26 48 204.65 48 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 195.67 moveto
+48 185.67 lineto
+44.5 195.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 195.67 moveto
+48 185.67 lineto
+44.5 195.67 lineto
+closepath stroke
+grestore
+% GCCCG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 93 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22 89.4 moveto 52 (GCCCG) alignedtext
+grestore
+% CGCCC->GCCCG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 148.33 moveto
+48 140.26 48 130.65 48 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 121.67 moveto
+48 111.67 lineto
+44.5 121.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 121.67 moveto
+48 111.67 lineto
+44.5 121.67 lineto
+closepath stroke
+grestore
+% CCTCA->CTCAG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 155 740.33 moveto
+155 732.26 155 722.65 155 713.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 158.5 713.67 moveto
+155 703.67 lineto
+151.5 713.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 158.5 713.67 moveto
+155 703.67 lineto
+151.5 713.67 lineto
+closepath stroke
+grestore
+% CTGGC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+267 685 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+241 681.4 moveto 52 (CTGGC) alignedtext
+grestore
+% CCTGG->CTGGC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 267 740.33 moveto
+267 732.26 267 722.65 267 713.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 270.5 713.67 moveto
+267 703.67 lineto
+263.5 713.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 270.5 713.67 moveto
+267 703.67 lineto
+263.5 713.67 lineto
+closepath stroke
+grestore
+% TGGCT
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+267 611 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+241.5 607.4 moveto 51 (TGGCT) alignedtext
+grestore
+% CTGGC->TGGCT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 267 666.33 moveto
+267 658.26 267 648.65 267 639.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 270.5 639.67 moveto
+267 629.67 lineto
+263.5 639.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 270.5 639.67 moveto
+267 629.67 lineto
+263.5 639.67 lineto
+closepath stroke
+grestore
+% CAGTA->AGTAC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 131.78 520.94 moveto
+116.5 510.37 96.3 496.4 79.56 484.83 curveto
+stroke
+0 0 0 edgecolor
+newpath 81.39 481.84 moveto
+71.17 479.03 lineto
+77.41 487.59 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 81.39 481.84 moveto
+71.17 479.03 lineto
+77.41 487.59 lineto
+closepath stroke
+grestore
+% AGTAA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+158 463 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+133.5 459.4 moveto 49 (AGTAA) alignedtext
+grestore
+% CAGTA->AGTAA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 155.76 518.33 moveto
+156.08 510.26 156.47 500.65 156.84 491.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 160.34 491.8 moveto
+157.24 481.67 lineto
+153.34 491.52 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 160.34 491.8 moveto
+157.24 481.67 lineto
+153.34 491.52 lineto
+closepath stroke
+grestore
+% AGTAA->GTAAC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 158 444.33 moveto
+158 436.26 158 426.65 158 417.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 161.5 417.67 moveto
+158 407.67 lineto
+154.5 417.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 161.5 417.67 moveto
+158 407.67 lineto
+154.5 417.67 lineto
+closepath stroke
+grestore
+% ACTAA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+159 167 45.96 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+134.5 163.4 moveto 49 (ACTAA) alignedtext
+grestore
+% ACTAA->CTAAA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 159.25 148.33 moveto
+159.36 140.26 159.49 130.65 159.61 121.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 163.11 121.71 moveto
+159.75 111.67 lineto
+156.11 121.62 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 163.11 121.71 moveto
+159.75 111.67 lineto
+156.11 121.62 lineto
+closepath stroke
+grestore
+% AACTA->ACTAA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 159 222.33 moveto
+159 214.26 159 204.65 159 195.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 162.5 195.67 moveto
+159 185.67 lineto
+155.5 195.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 162.5 195.67 moveto
+159 185.67 lineto
+155.5 195.67 lineto
+closepath stroke
+grestore
+% TACGC->ACGCC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 296.33 moveto
+48 288.26 48 278.65 48 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 269.67 moveto
+48 259.67 lineto
+44.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 269.67 moveto
+48 259.67 lineto
+44.5 269.67 lineto
+closepath stroke
+grestore
+% CCCGG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+48 19 48.08 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+22 15.4 moveto 52 (CCCGG) alignedtext
+grestore
+% GCCCG->CCCGG
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 48 74.33 moveto
+48 66.26 48 56.65 48 47.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 51.5 47.67 moveto
+48 37.67 lineto
+44.5 47.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 51.5 47.67 moveto
+48 37.67 lineto
+44.5 47.67 lineto
+closepath stroke
+grestore
+% GGCTA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+267 537 46.88 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+241.5 533.4 moveto 51 (GGCTA) alignedtext
+grestore
+% TGGCT->GGCTA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 267 592.33 moveto
+267 584.26 267 574.65 267 565.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 270.5 565.67 moveto
+267 555.67 lineto
+263.5 565.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 270.5 565.67 moveto
+267 555.67 lineto
+263.5 565.67 lineto
+closepath stroke
+grestore
+% GGCTA->GCTAT
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 267 518.33 moveto
+267 510.26 267 500.65 267 491.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 270.5 491.67 moveto
+267 481.67 lineto
+263.5 491.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 270.5 491.67 moveto
+267 481.67 lineto
+263.5 491.67 lineto
+closepath stroke
+grestore
+% ATCCC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+267 241 44.05 18.38 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+243.5 237.4 moveto 47 (ATCCC) alignedtext
+grestore
+% TATCC->ATCCC
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 267 296.33 moveto
+267 288.26 267 278.65 267 269.71 curveto
+stroke
+0 0 0 edgecolor
+newpath 270.5 269.67 moveto
+267 259.67 lineto
+263.5 269.67 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 270.5 269.67 moveto
+267 259.67 lineto
+263.5 269.67 lineto
+closepath stroke
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 359 970
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/graph/mergeTest/BridgePath b/genomix/genomix-pregelix/graph/mergeTest/BridgePath
new file mode 100644
index 0000000..0717611
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/mergeTest/BridgePath
@@ -0,0 +1,2 @@
+TTTCCACTCCGTG
+TTTCCACCCCGTG
\ No newline at end of file
diff --git a/genomix/genomix-pregelix/graph/mergeTest/CyclePath b/genomix/genomix-pregelix/graph/mergeTest/CyclePath
new file mode 100644
index 0000000..04080f4
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/mergeTest/CyclePath
@@ -0,0 +1 @@
+GCAACTTCATCAACT
\ No newline at end of file
diff --git a/genomix/genomix-pregelix/graph/mergeTest/LongPath b/genomix/genomix-pregelix/graph/mergeTest/LongPath
new file mode 100644
index 0000000..acd3c1a
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/mergeTest/LongPath
@@ -0,0 +1 @@
+GGCCTCAGTACGCCCGG
diff --git a/genomix/genomix-pregelix/graph/mergeTest/Path b/genomix/genomix-pregelix/graph/mergeTest/Path
new file mode 100644
index 0000000..f63bbcf
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/mergeTest/Path
@@ -0,0 +1 @@
+GGCCTCAGTACG
diff --git a/genomix/genomix-pregelix/graph/mergeTest/SimplePath b/genomix/genomix-pregelix/graph/mergeTest/SimplePath
new file mode 100644
index 0000000..80c03af
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/mergeTest/SimplePath
@@ -0,0 +1,3 @@
+ATATCGCATC
+AAGACAGCAC
+GCGGCAAGAA
diff --git a/genomix/genomix-pregelix/graph/mergeTest/SinglePath b/genomix/genomix-pregelix/graph/mergeTest/SinglePath
new file mode 100644
index 0000000..56ef5f8
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/mergeTest/SinglePath
@@ -0,0 +1 @@
+AGACAACAGT
diff --git a/genomix/genomix-pregelix/graph/mergeTest/ThreeKmer b/genomix/genomix-pregelix/graph/mergeTest/ThreeKmer
new file mode 100644
index 0000000..ec004fa
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/mergeTest/ThreeKmer
@@ -0,0 +1 @@
+ACTCGGT
diff --git a/genomix/genomix-pregelix/graph/mergeTest/TreePath b/genomix/genomix-pregelix/graph/mergeTest/TreePath
new file mode 100644
index 0000000..f3c13ce
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/mergeTest/TreePath
@@ -0,0 +1,3 @@
+GGCCTGGCTATCCC
+GGCCTCAGTAACTAAAC
+GGCCTCAGTACGCCCGG
\ No newline at end of file
diff --git a/genomix/genomix-pregelix/graph/mergeTest/TwoKmer b/genomix/genomix-pregelix/graph/mergeTest/TwoKmer
new file mode 100644
index 0000000..8c2a74b
--- /dev/null
+++ b/genomix/genomix-pregelix/graph/mergeTest/TwoKmer
@@ -0,0 +1 @@
+ACACTG
diff --git a/genomix/genomix-pregelix/pom.xml b/genomix/genomix-pregelix/pom.xml
new file mode 100644
index 0000000..95227c5
--- /dev/null
+++ b/genomix/genomix-pregelix/pom.xml
@@ -0,0 +1,285 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>edu.uci.ics.pregelix</groupId>
+ <artifactId>genomix-pregelix</artifactId>
+ <packaging>jar</packaging>
+ <version>0.2.6-SNAPSHOT</version>
+ <name>genomix-pregelix</name>
+
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ </properties>
+
+ <profiles>
+ <profile>
+ <id>macosx</id>
+ <activation>
+ <os>
+ <name>mac os x</name>
+ </os>
+ <jdk>1.7</jdk>
+ </activation>
+ <properties>
+ <jvm.extraargs>-Djava.nio.channels.spi.SelectorProvider=sun.nio.ch.KQueueSelectorProvider</jvm.extraargs>
+ </properties>
+ </profile>
+ </profiles>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <configuration>
+ <descriptorRefs>
+ <descriptorRef>jar-with-dependencies</descriptorRef>
+ </descriptorRefs>
+ </configuration>
+ <executions>
+ <execution>
+ <id>make-my-jar-with-dependencies</id>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>appassembler-maven-plugin</artifactId>
+ <version>1.3</version>
+ <executions>
+ <execution>
+ <configuration>
+ <programs>
+ <program>
+ <mainClass>edu.uci.ics.genomix.pregelix.example.Client</mainClass>
+ <name>pregelix</name>
+ </program>
+ </programs>
+ <repositoryLayout>flat</repositoryLayout>
+ <repositoryName>lib</repositoryName>
+ </configuration>
+ <phase>package</phase>
+ <goals>
+ <goal>assemble</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>2.7.2</version>
+ <configuration>
+ <forkMode>pertest</forkMode>
+ <argLine>-enableassertions -Xmx2047m -Dfile.encoding=UTF-8
+ -Djava.util.logging.config.file=src/test/resources/logging.properties</argLine>
+ <includes>
+ <include>**/*TestSuite.java</include>
+ <include>**/*Test.java</include>
+ </includes>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-clean-plugin</artifactId>
+ <configuration>
+ <filesets>
+ <fileset>
+ <directory>.</directory>
+ <includes>
+ <include>teststore*</include>
+ <include>edu*</include>
+ <include>actual*</include>
+ <include>build*</include>
+ <include>expect*</include>
+ <include>ClusterController*</include>
+ <include>edu.uci.*</include>
+ </includes>
+ </fileset>
+ </filesets>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.8.1</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>pregelix-core</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>genomix-data</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-dataflow-std</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-api</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-dataflow-common</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-data-std</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-control-cc</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-control-nc</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.kenai.nbpwr</groupId>
+ <artifactId>org-apache-commons-io</artifactId>
+ <version>1.3.1-201002241208</version>
+ <type>nbm</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks.examples</groupId>
+ <artifactId>hyracks-integration-tests</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-ipc</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ <version>0.20.2</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>0.20.2</version>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-hdfs-core</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-hdfs-core</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>genomix-hyracks</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>genomix-hadoop</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+
+ <scm>
+ <connection>scm:svn:https://hyracks.googlecode.com/svn/trunk/fullstack/pregelix</connection>
+ <developerConnection>scm:svn:https://hyracks.googlecode.com/svn/trunk/fullstack/pregelix</developerConnection>
+ <url>http://code.google.com/p/hyracks/source/browse/#svn/trunk/fullstack/pregelix</url>
+ </scm>
+
+ <distributionManagement>
+ <repository>
+ <id>hyracks-releases</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-releases/</url>
+ </repository>
+ <snapshotRepository>
+ <id>hyracks-snapshots</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-snapshots/</url>
+ </snapshotRepository>
+ </distributionManagement>
+
+ <reporting>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-changelog-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </reporting>
+
+ <repositories>
+ <repository>
+ <id>hyracks-public</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/groups/hyracks-public/</url>
+ </repository>
+ <repository>
+ <id>jboss-public</id>
+ <url>https://repository.jboss.org/nexus/content/groups/public/</url>
+ </repository>
+ </repositories>
+
+ <pluginRepositories>
+ <pluginRepository>
+ <id>hyracks-public</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/groups/hyracks-public/</url>
+ <releases>
+ <updatePolicy>always</updatePolicy>
+ </releases>
+ </pluginRepository>
+ </pluginRepositories>
+</project>
+
+
diff --git a/genomix/genomix-pregelix/src/main/assembly/binary-assembly.xml b/genomix/genomix-pregelix/src/main/assembly/binary-assembly.xml
new file mode 100755
index 0000000..0500499
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/assembly/binary-assembly.xml
@@ -0,0 +1,19 @@
+<assembly>
+ <id>binary-assembly</id>
+ <formats>
+ <format>zip</format>
+ <format>dir</format>
+ </formats>
+ <includeBaseDirectory>false</includeBaseDirectory>
+ <fileSets>
+ <fileSet>
+ <directory>target/appassembler/bin</directory>
+ <outputDirectory>bin</outputDirectory>
+ <fileMode>0755</fileMode>
+ </fileSet>
+ <fileSet>
+ <directory>target/appassembler/lib</directory>
+ <outputDirectory>lib</outputDirectory>
+ </fileSet>
+ </fileSets>
+</assembly>
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryDataCleanVertexInputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryDataCleanVertexInputFormat.java
new file mode 100644
index 0000000..e4f0cde
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryDataCleanVertexInputFormat.java
@@ -0,0 +1,104 @@
+package edu.uci.ics.genomix.pregelix.api.io.binary;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+
+import edu.uci.ics.pregelix.api.io.VertexInputFormat;
+import edu.uci.ics.pregelix.api.io.VertexReader;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+public class BinaryDataCleanVertexInputFormat<I extends WritableComparable<?>, V extends Writable, E extends Writable, M extends Writable>
+ extends VertexInputFormat<I, V, E, M> {
+
+ /** Uses the SequenceFileInputFormat to do everything */
+ @SuppressWarnings("rawtypes")
+ protected SequenceFileInputFormat binaryInputFormat = new SequenceFileInputFormat();
+
+ /**
+ * Abstract class to be implemented by the user based on their specific
+ * vertex input. Easiest to ignore the key value separator and only use key
+ * instead.
+ *
+ * @param <I>
+ * Vertex index value
+ * @param <V>
+ * Vertex value
+ * @param <E>
+ * Edge value
+ */
+ public static abstract class BinaryDataCleanVertexReader<I extends WritableComparable<?>, V extends Writable, E extends Writable, M extends Writable>
+ implements VertexReader<I, V, E, M> {
+ /** Internal line record reader */
+ private final RecordReader<VKmerBytesWritable, VertexValueWritable> lineRecordReader;
+ /** Context passed to initialize */
+ private TaskAttemptContext context;
+
+ /**
+ * Initialize with the LineRecordReader.
+ *
+ * @param recordReader
+ * Line record reader from SequenceFileInputFormat
+ */
+ public BinaryDataCleanVertexReader(RecordReader<VKmerBytesWritable, VertexValueWritable> recordReader) {
+ this.lineRecordReader = recordReader;
+ }
+
+ @Override
+ public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException,
+ InterruptedException {
+ lineRecordReader.initialize(inputSplit, context);
+ this.context = context;
+ }
+
+ @Override
+ public void close() throws IOException {
+ lineRecordReader.close();
+ }
+
+ @Override
+ public float getProgress() throws IOException, InterruptedException {
+ return lineRecordReader.getProgress();
+ }
+
+ /**
+ * Get the line record reader.
+ *
+ * @return Record reader to be used for reading.
+ */
+ protected RecordReader<VKmerBytesWritable, VertexValueWritable> getRecordReader() {
+ return lineRecordReader;
+ }
+
+ /**
+ * Get the context.
+ *
+ * @return Context passed to initialize.
+ */
+ protected TaskAttemptContext getContext() {
+ return context;
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public List<InputSplit> getSplits(JobContext context, int numWorkers) throws IOException, InterruptedException {
+ // Ignore the hint of numWorkers here since we are using SequenceFileInputFormat
+ // to do this for us
+ return binaryInputFormat.getSplits(context);
+ }
+
+ @Override
+ public VertexReader<I, V, E, M> createVertexReader(InputSplit split, TaskAttemptContext context) throws IOException {
+ return null;
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryDataCleanVertexOutputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryDataCleanVertexOutputFormat.java
new file mode 100644
index 0000000..30510a3
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryDataCleanVertexOutputFormat.java
@@ -0,0 +1,102 @@
+package edu.uci.ics.genomix.pregelix.api.io.binary;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.pregelix.api.io.VertexOutputFormat;
+import edu.uci.ics.pregelix.api.io.VertexWriter;
+
+/**
+ * Abstract class that users should subclass to use their own text based vertex
+ * output format.
+ *
+ * @param <I>
+ * Vertex index value
+ * @param <V>
+ * Vertex value
+ * @param <E>
+ * Edge value
+ */
+@SuppressWarnings("rawtypes")
+public abstract class BinaryDataCleanVertexOutputFormat<I extends WritableComparable, V extends Writable, E extends Writable>
+ extends VertexOutputFormat<I, V, E> {
+ /** Uses the SequenceFileOutputFormat to do everything */
+ protected SequenceFileOutputFormat binaryOutputFormat = new SequenceFileOutputFormat();
+
+ /**
+ * Abstract class to be implemented by the user based on their specific
+ * vertex output. Easiest to ignore the key value separator and only use key
+ * instead.
+ *
+ * @param <I>
+ * Vertex index value
+ * @param <V>
+ * Vertex value
+ * @param <E>
+ * Edge value
+ */
+ public static abstract class BinaryVertexWriter<I extends WritableComparable, V extends Writable, E extends Writable>
+ implements VertexWriter<I, V, E> {
+ /** Context passed to initialize */
+ private TaskAttemptContext context;
+ /** Internal line record writer */
+ private final RecordWriter<VKmerBytesWritable, VertexValueWritable> lineRecordWriter;
+
+ /**
+ * Initialize with the LineRecordWriter.
+ *
+ * @param lineRecordWriter
+ * Line record writer from SequenceFileOutputFormat
+ */
+ public BinaryVertexWriter(RecordWriter<VKmerBytesWritable, VertexValueWritable> lineRecordWriter) {
+ this.lineRecordWriter = lineRecordWriter;
+ }
+
+ @Override
+ public void initialize(TaskAttemptContext context) throws IOException {
+ this.context = context;
+ }
+
+ @Override
+ public void close(TaskAttemptContext context) throws IOException, InterruptedException {
+ lineRecordWriter.close(context);
+ }
+
+ /**
+ * Get the line record writer.
+ *
+ * @return Record writer to be used for writing.
+ */
+ public RecordWriter<VKmerBytesWritable, VertexValueWritable> getRecordWriter() {
+ return lineRecordWriter;
+ }
+
+ /**
+ * Get the context.
+ *
+ * @return Context passed to initialize.
+ */
+ public TaskAttemptContext getContext() {
+ return context;
+ }
+ }
+
+ @Override
+ public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
+ binaryOutputFormat.checkOutputSpecs(context);
+ }
+
+ @Override
+ public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
+ return binaryOutputFormat.getOutputCommitter(context);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/InitialGraphCleanVertexInputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/InitialGraphCleanVertexInputFormat.java
new file mode 100644
index 0000000..4221865
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/InitialGraphCleanVertexInputFormat.java
@@ -0,0 +1,104 @@
+package edu.uci.ics.genomix.pregelix.api.io.binary;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+
+import edu.uci.ics.pregelix.api.io.VertexInputFormat;
+import edu.uci.ics.pregelix.api.io.VertexReader;
+import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+public class InitialGraphCleanVertexInputFormat<I extends WritableComparable<?>, V extends Writable, E extends Writable, M extends Writable>
+ extends VertexInputFormat<I, V, E, M> {
+
+ /** Uses the SequenceFileInputFormat to do everything */
+ @SuppressWarnings("rawtypes")
+ protected SequenceFileInputFormat binaryInputFormat = new SequenceFileInputFormat();
+
+ /**
+ * Abstract class to be implemented by the user based on their specific
+ * vertex input. Easiest to ignore the key value separator and only use key
+ * instead.
+ *
+ * @param <I>
+ * Vertex index value
+ * @param <V>
+ * Vertex value
+ * @param <E>
+ * Edge value
+ */
+ public static abstract class BinaryVertexReader<I extends WritableComparable<?>, V extends Writable, E extends Writable, M extends Writable>
+ implements VertexReader<I, V, E, M> {
+ /** Internal line record reader */
+ private final RecordReader<VKmerBytesWritable, NodeWritable> lineRecordReader;
+ /** Context passed to initialize */
+ private TaskAttemptContext context;
+
+ /**
+ * Initialize with the LineRecordReader.
+ *
+ * @param recordReader
+ * Line record reader from SequenceFileInputFormat
+ */
+ public BinaryVertexReader(RecordReader<VKmerBytesWritable, NodeWritable> recordReader) {
+ this.lineRecordReader = recordReader;
+ }
+
+ @Override
+ public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException,
+ InterruptedException {
+ lineRecordReader.initialize(inputSplit, context);
+ this.context = context;
+ }
+
+ @Override
+ public void close() throws IOException {
+ lineRecordReader.close();
+ }
+
+ @Override
+ public float getProgress() throws IOException, InterruptedException {
+ return lineRecordReader.getProgress();
+ }
+
+ /**
+ * Get the line record reader.
+ *
+ * @return Record reader to be used for reading.
+ */
+ protected RecordReader<VKmerBytesWritable, NodeWritable> getRecordReader() {
+ return lineRecordReader;
+ }
+
+ /**
+ * Get the context.
+ *
+ * @return Context passed to initialize.
+ */
+ protected TaskAttemptContext getContext() {
+ return context;
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public List<InputSplit> getSplits(JobContext context, int numWorkers) throws IOException, InterruptedException {
+ // Ignore the hint of numWorkers here since we are using SequenceFileInputFormat
+ // to do this for us
+ return binaryInputFormat.getSplits(context);
+ }
+
+ @Override
+ public VertexReader<I, V, E, M> createVertexReader(InputSplit split, TaskAttemptContext context) throws IOException {
+ return null;
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/client/Client.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/client/Client.java
new file mode 100644
index 0000000..73a8648
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/client/Client.java
@@ -0,0 +1,79 @@
+package edu.uci.ics.genomix.pregelix.client;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.kohsuke.args4j.CmdLineException;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+import edu.uci.ics.genomix.pregelix.operator.pathmerge.BasicGraphCleanVertex;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.core.base.IDriver.Plan;
+import edu.uci.ics.pregelix.core.driver.Driver;
+
+public class Client {
+ //test rebase
+ private static class Options {
+ @Option(name = "-inputpaths", usage = "comma seprated input paths", required = true)
+ public String inputPaths;
+
+ @Option(name = "-outputpath", usage = "output path", required = true)
+ public String outputPath;
+
+ @Option(name = "-ip", usage = "ip address of cluster controller", required = true)
+ public String ipAddress;
+
+ @Option(name = "-port", usage = "port of cluster controller", required = false)
+ public int port;
+
+ @Option(name = "-plan", usage = "query plan choice", required = false)
+ public Plan planChoice = Plan.OUTER_JOIN;
+
+ @Option(name = "-tmpKmer-kmerByteSize", usage = "the kmerByteSize of tmpKmer", required = false)
+ public int sizeKmer;
+
+ @Option(name = "-num-iteration", usage = "max number of iterations, for pagerank job only", required = false)
+ public int numIteration = -1;
+
+ @Option(name = "-runtime-profiling", usage = "whether to do runtime profifling", required = false)
+ public String profiling = "false";
+
+// @Option(name = "-pseudo-rate", usage = "the rate of pseduHead", required = false)
+// public float pseudoRate = -1;
+//
+// @Option(name = "-max-patitionround", usage = "max rounds in partition phase", required = false)
+// public int maxRound = -1;
+ }
+
+ public static void run(String[] args, PregelixJob job) throws Exception {
+ Options options = prepareJob(args, job);
+ Driver driver = new Driver(Client.class);
+ driver.runJob(job, options.planChoice, options.ipAddress, options.port, Boolean.parseBoolean(options.profiling));
+ }
+
+ private static Options prepareJob(String[] args, PregelixJob job) throws CmdLineException, IOException {
+ Options options = new Options();
+ CmdLineParser parser = new CmdLineParser(options);
+ parser.parseArgument(args);
+
+ String[] inputs = options.inputPaths.split(";");
+ FileInputFormat.setInputPaths(job, inputs[0]);
+ for (int i = 1; i < inputs.length; i++)
+ FileInputFormat.addInputPaths(job, inputs[0]);
+ FileOutputFormat.setOutputPath(job, new Path(options.outputPath));
+ job.getConfiguration().setInt(BasicGraphCleanVertex.KMER_SIZE, options.sizeKmer);
+ if (options.numIteration > 0) {
+ job.getConfiguration().setInt(BasicGraphCleanVertex.ITERATIONS, options.numIteration);
+ }
+
+// if (options.pseudoRate > 0 && options.pseudoRate <= 1)
+// job.getConfiguration().setFloat(P3ForPathMergeVertex.PSEUDORATE, options.pseudoRate);
+// if (options.maxRound > 0)
+// job.getConfiguration().setInt(P3ForPathMergeVertex.MAXROUND, options.maxRound);
+ return options;
+
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/GraphCleanInputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/GraphCleanInputFormat.java
new file mode 100644
index 0000000..e0c8fa5
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/GraphCleanInputFormat.java
@@ -0,0 +1,74 @@
+package edu.uci.ics.genomix.pregelix.format;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.io.VertexReader;
+import edu.uci.ics.pregelix.api.util.BspUtils;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.pregelix.api.io.binary.BinaryDataCleanVertexInputFormat;
+import edu.uci.ics.genomix.pregelix.api.io.binary.BinaryDataCleanVertexInputFormat.BinaryDataCleanVertexReader;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+public class GraphCleanInputFormat extends
+ BinaryDataCleanVertexInputFormat<VKmerBytesWritable, VertexValueWritable, NullWritable, MessageWritable> {
+ /**
+ * Format INPUT
+ */
+ @SuppressWarnings("unchecked")
+ @Override
+ public VertexReader<VKmerBytesWritable, VertexValueWritable, NullWritable, MessageWritable> createVertexReader(
+ InputSplit split, TaskAttemptContext context) throws IOException {
+ return new BinaryDataCleanLoadGraphReader(binaryInputFormat.createRecordReader(split, context));
+ }
+}
+
+@SuppressWarnings("rawtypes")
+class BinaryDataCleanLoadGraphReader extends
+ BinaryDataCleanVertexReader<VKmerBytesWritable, VertexValueWritable, NullWritable, MessageWritable> {
+ private Vertex vertex;
+ private VKmerBytesWritable vertexId = new VKmerBytesWritable();
+ private VertexValueWritable vertexValue = new VertexValueWritable();
+
+ public BinaryDataCleanLoadGraphReader(RecordReader<VKmerBytesWritable, VertexValueWritable> recordReader) {
+ super(recordReader);
+ }
+
+ @Override
+ public boolean nextVertex() throws IOException, InterruptedException {
+ return getRecordReader().nextKeyValue();
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public Vertex<VKmerBytesWritable, VertexValueWritable, NullWritable, MessageWritable> getCurrentVertex()
+ throws IOException, InterruptedException {
+ if (vertex == null)
+ vertex = (Vertex) BspUtils.createVertex(getContext().getConfiguration());
+
+ vertex.getMsgList().clear();
+ vertex.getEdges().clear();
+
+ vertex.reset();
+ if (getRecordReader() != null) {
+ /**
+ * set the src vertex id
+ */
+ vertexId.setAsCopy(getRecordReader().getCurrentKey());
+ vertex.setVertexId(vertexId);
+ /**
+ * set the vertex value
+ */
+ vertexValue.set(getRecordReader().getCurrentValue());
+ vertex.setVertexValue(vertexValue);
+ }
+
+ return vertex;
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/GraphCleanOutputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/GraphCleanOutputFormat.java
new file mode 100644
index 0000000..77960ba
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/GraphCleanOutputFormat.java
@@ -0,0 +1,41 @@
+package edu.uci.ics.genomix.pregelix.format;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import edu.uci.ics.genomix.pregelix.api.io.binary.BinaryDataCleanVertexOutputFormat;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.io.VertexWriter;
+
+public class GraphCleanOutputFormat extends
+ BinaryDataCleanVertexOutputFormat<VKmerBytesWritable, VertexValueWritable, NullWritable> {
+
+ @Override
+ public VertexWriter<VKmerBytesWritable, VertexValueWritable, NullWritable> createVertexWriter(
+ TaskAttemptContext context) throws IOException, InterruptedException {
+ @SuppressWarnings("unchecked")
+ RecordWriter<VKmerBytesWritable, VertexValueWritable> recordWriter = binaryOutputFormat.getRecordWriter(context);
+ return new BinaryLoadGraphVertexWriter(recordWriter);
+ }
+
+ /**
+ * Simple VertexWriter that supports {@link BinaryLoadGraphVertex}
+ */
+ public static class BinaryLoadGraphVertexWriter extends
+ BinaryVertexWriter<VKmerBytesWritable, VertexValueWritable, NullWritable> {
+ public BinaryLoadGraphVertexWriter(RecordWriter<VKmerBytesWritable, VertexValueWritable> lineRecordWriter) {
+ super(lineRecordWriter);
+ }
+
+ @Override
+ public void writeVertex(Vertex<VKmerBytesWritable, VertexValueWritable, NullWritable, ?> vertex)
+ throws IOException, InterruptedException {
+ getRecordWriter().write(vertex.getVertexId(), vertex.getVertexValue());
+ }
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/InitialGraphCleanInputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/InitialGraphCleanInputFormat.java
new file mode 100644
index 0000000..edb305f
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/InitialGraphCleanInputFormat.java
@@ -0,0 +1,86 @@
+package edu.uci.ics.genomix.pregelix.format;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.io.VertexReader;
+import edu.uci.ics.pregelix.api.util.BspUtils;
+import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable.State;
+import edu.uci.ics.genomix.pregelix.api.io.binary.InitialGraphCleanVertexInputFormat;
+import edu.uci.ics.genomix.pregelix.api.io.binary.InitialGraphCleanVertexInputFormat.BinaryVertexReader;
+
+public class InitialGraphCleanInputFormat extends
+ InitialGraphCleanVertexInputFormat<VKmerBytesWritable, VertexValueWritable, NullWritable, MessageWritable> {
+ /**
+ * Format INPUT
+ */
+ @SuppressWarnings("unchecked")
+ @Override
+ public VertexReader<VKmerBytesWritable, VertexValueWritable, NullWritable, MessageWritable> createVertexReader(
+ InputSplit split, TaskAttemptContext context) throws IOException {
+ return new BinaryLoadGraphReader(binaryInputFormat.createRecordReader(split, context));
+ }
+}
+
+@SuppressWarnings("rawtypes")
+class BinaryLoadGraphReader extends
+ BinaryVertexReader<VKmerBytesWritable, VertexValueWritable, NullWritable, MessageWritable> {
+
+ private Vertex vertex;
+ private VKmerBytesWritable vertexId = new VKmerBytesWritable();
+ private NodeWritable node = new NodeWritable();
+ private VertexValueWritable vertexValue = new VertexValueWritable();
+
+ public BinaryLoadGraphReader(RecordReader<VKmerBytesWritable, NodeWritable> recordReader) {
+ super(recordReader);
+ }
+
+ @Override
+ public boolean nextVertex() throws IOException, InterruptedException {
+ return getRecordReader().nextKeyValue();
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public Vertex<VKmerBytesWritable, VertexValueWritable, NullWritable, MessageWritable> getCurrentVertex()
+ throws IOException, InterruptedException {
+ if (vertex == null)
+ vertex = (Vertex) BspUtils.createVertex(getContext().getConfiguration());
+
+ vertex.getMsgList().clear();
+ vertex.getEdges().clear();
+
+ vertex.reset();
+ if (getRecordReader() != null) {
+ /**
+ * set the src vertex id
+ */
+ vertexId.setAsCopy(getRecordReader().getCurrentKey());
+ vertex.setVertexId(vertexId);
+ /**
+ * set the vertex value
+ */
+ node.set(getRecordReader().getCurrentValue());
+ vertexValue.setNodeIdList(node.getNodeIdList());
+ vertexValue.setFFList(node.getFFList());
+ vertexValue.setFRList(node.getFRList());
+ vertexValue.setRFList(node.getRFList());
+ vertexValue.setRRList(node.getRRList());
+ // TODO make this more efficient (don't use toString)
+ vertexValue.setActualKmer(new VKmerBytesWritable(vertexId));
+ vertexValue.setState(State.IS_NON);
+ vertex.setVertexValue(vertexValue);
+ }
+
+ return vertex;
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/P2PathMergeOutputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/P2PathMergeOutputFormat.java
new file mode 100644
index 0000000..b4c0aee
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/P2PathMergeOutputFormat.java
@@ -0,0 +1,46 @@
+package edu.uci.ics.genomix.pregelix.format;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import edu.uci.ics.genomix.pregelix.api.io.binary.BinaryDataCleanVertexOutputFormat;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable.State;
+import edu.uci.ics.genomix.pregelix.operator.pathmerge.P2ForPathMergeVertex;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.io.VertexWriter;
+
+public class P2PathMergeOutputFormat extends
+ BinaryDataCleanVertexOutputFormat<VKmerBytesWritable, VertexValueWritable, NullWritable> {
+
+ @Override
+ public VertexWriter<VKmerBytesWritable, VertexValueWritable, NullWritable> createVertexWriter(
+ TaskAttemptContext context) throws IOException, InterruptedException {
+ @SuppressWarnings("unchecked")
+ RecordWriter<VKmerBytesWritable, VertexValueWritable> recordWriter = binaryOutputFormat.getRecordWriter(context);
+ return new BinaryLoadGraphVertexWriter(recordWriter);
+ }
+
+ /**
+ * Simple VertexWriter that supports {@link BinaryLoadGraphVertex}
+ */
+ public static class BinaryLoadGraphVertexWriter extends
+ BinaryVertexWriter<VKmerBytesWritable, VertexValueWritable, NullWritable> {
+ public BinaryLoadGraphVertexWriter(RecordWriter<VKmerBytesWritable, VertexValueWritable> lineRecordWriter) {
+ super(lineRecordWriter);
+ }
+
+ @Override
+ public void writeVertex(Vertex<VKmerBytesWritable, VertexValueWritable, NullWritable, ?> vertex)
+ throws IOException, InterruptedException {
+ byte selfFlag = (byte)(vertex.getVertexValue().getState() & State.VERTEX_MASK);
+ if(selfFlag == State.IS_FINAL)
+ getRecordWriter().write(vertex.getVertexId(), vertex.getVertexValue());
+ P2ForPathMergeVertex.fakeVertexExist = false;
+ }
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/graph/GenerateGraphViz.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/graph/GenerateGraphViz.java
new file mode 100644
index 0000000..89abdc2
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/graph/GenerateGraphViz.java
@@ -0,0 +1,188 @@
+package edu.uci.ics.genomix.pregelix.graph;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.util.Iterator;
+
+import org.apache.commons.io.filefilter.WildcardFileFilter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile;
+
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+public class GenerateGraphViz {
+
+ /**
+ * Construct a DOT graph in memory, convert it
+ * to image and store the image in the file system.
+ */
+ public static void convertGraphBuildingOutputToGraphViz(String srcDir, String destDir) throws Exception {
+ GraphViz gv = new GraphViz();
+ gv.addln(gv.start_graph());
+
+ Configuration conf = new Configuration();
+ FileSystem fileSys = FileSystem.getLocal(conf);
+ File srcPath = new File(srcDir);
+
+ String outputNode = "";
+ String outputEdge = "";
+ for (File f : srcPath.listFiles((FilenameFilter) (new WildcardFileFilter("part*")))) {
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, new Path(f.getAbsolutePath()), conf);
+ VKmerBytesWritable key = new VKmerBytesWritable();
+ NodeWritable value = new NodeWritable();
+
+ gv.addln("rankdir=LR\n");
+
+ while (reader.next(key, value)) {
+ outputNode = "";
+ outputEdge = "";
+ if (key == null) {
+ break;
+ }
+ outputNode += key.toString();
+ /** convert edge to graph **/
+ outputEdge = convertEdgeToGraph(outputNode, value);
+ gv.addln(outputEdge);
+ /** add readIdSet **/
+ outputNode += " [shape=record, label = \"<f0> " + key.toString()
+ + "|<f1> " + value.getNodeIdList().printReadIdSet()
+ + "\"]\n";
+ gv.addln(outputNode);
+ }
+ reader.close();
+ }
+
+ gv.addln(gv.end_graph());
+ System.out.println(gv.getDotSource());
+
+ String type = "ps";
+ File folder = new File(destDir);
+ folder.mkdirs();
+ File out = new File(destDir + "/result." + type); // Linux
+ gv.writeGraphToFile(gv.getGraph(gv.getDotSource(), type), out);
+ }
+
+ public static void convertGraphCleanOutputToGraphViz(String srcDir, String destDir) throws Exception {
+ GraphViz gv = new GraphViz();
+ gv.addln(gv.start_graph());
+
+ Configuration conf = new Configuration();
+ FileSystem fileSys = FileSystem.getLocal(conf);
+ File srcPath = new File(srcDir);
+
+ String outputNode = "";
+ String outputEdge = "";
+ for (File f : srcPath.listFiles((FilenameFilter) (new WildcardFileFilter("part*")))) {
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, new Path(f.getAbsolutePath()), conf);
+ VKmerBytesWritable key = new VKmerBytesWritable();
+ VertexValueWritable value = new VertexValueWritable();
+
+ gv.addln("rankdir=LR\n");
+
+ while (reader.next(key, value)) {
+ outputNode = "";
+ outputEdge = "";
+ if (key == null) {
+ break;
+ }
+ outputNode += key.toString();
+ /** convert edge to graph **/
+ outputEdge = convertEdgeToGraph(outputNode, value);
+ gv.addln(outputEdge);
+ /** add readIdSet **/
+ outputNode += " [shape=record, label = \"<f0> " + key.toString()
+ + "|<f1> " + value.getNodeIdList().printReadIdSet()
+ + "\"]\n";
+ gv.addln(outputNode);
+ }
+ reader.close();
+ }
+
+ gv.addln(gv.end_graph());
+ System.out.println(gv.getDotSource());
+
+ String type = "ps";
+ File folder = new File(destDir);
+ folder.mkdirs();
+ File out = new File(destDir + "/result." + type); // Linux
+ gv.writeGraphToFile(gv.getGraph(gv.getDotSource(), type), out);
+ }
+
+ /**
+ * For graph building
+ * @param outputNode
+ * @param value
+ * @return
+ */
+ public static String convertEdgeToGraph(String outputNode, NodeWritable value){
+ String outputEdge = "";
+ Iterator<VKmerBytesWritable> kmerIterator;
+ kmerIterator = value.getFFList().iterator();
+ while(kmerIterator.hasNext()){
+ VKmerBytesWritable edge = kmerIterator.next();
+ outputEdge += outputNode + " -> " + edge.toString() + "[color = \"black\" label =\"FF\"]\n";
+ }
+ kmerIterator = value.getFRList().iterator();
+ while(kmerIterator.hasNext()){
+ VKmerBytesWritable edge = kmerIterator.next();
+ outputEdge += outputNode + " -> " + edge.toString() + "[color = \"blue\" label =\"FR\"]\n";
+ }
+ kmerIterator = value.getRFList().iterator();
+ while(kmerIterator.hasNext()){
+ VKmerBytesWritable edge = kmerIterator.next();
+ outputEdge += outputNode + " -> " + edge.toString() + "[color = \"green\" label =\"RF\"]\n";
+ }
+ kmerIterator = value.getRRList().iterator();
+ while(kmerIterator.hasNext()){
+ VKmerBytesWritable edge = kmerIterator.next();
+ outputEdge += outputNode + " -> " + edge.toString() + "[color = \"red\" label =\"RR\"]\n";
+ }
+ //TODO should output actualKmer instead of kmer
+ if(outputEdge == "")
+ outputEdge += outputNode;
+ return outputEdge;
+ }
+
+ /**
+ * For graph clean
+ * @param outputNode
+ * @param value
+ * @return
+ */
+ public static String convertEdgeToGraph(String outputNode, VertexValueWritable value){
+ String outputEdge = "";
+ Iterator<VKmerBytesWritable> kmerIterator;
+ kmerIterator = value.getFFList().iterator();
+ while(kmerIterator.hasNext()){
+ VKmerBytesWritable edge = kmerIterator.next();
+ outputEdge += outputNode + " -> " + edge.toString() + "[color = \"black\" label =\"FF\"]\n";
+ }
+ kmerIterator = value.getFRList().iterator();
+ while(kmerIterator.hasNext()){
+ VKmerBytesWritable edge = kmerIterator.next();
+ outputEdge += outputNode + " -> " + edge.toString() + "[color = \"blue\" label =\"FR\"]\n";
+ }
+ kmerIterator = value.getRFList().iterator();
+ while(kmerIterator.hasNext()){
+ VKmerBytesWritable edge = kmerIterator.next();
+ outputEdge += outputNode + " -> " + edge.toString() + "[color = \"green\" label =\"RF\"]\n";
+ }
+ kmerIterator = value.getRRList().iterator();
+ while(kmerIterator.hasNext()){
+ VKmerBytesWritable edge = kmerIterator.next();
+ outputEdge += outputNode + " -> " + edge.toString() + "[color = \"red\" label =\"RR\"]\n";
+ }
+ //TODO should output actualKmer instead of kmer
+ if(outputEdge == "")
+ outputEdge += outputNode;
+ return outputEdge;
+ }
+
+ public static void main(String[] args) throws Exception {
+ GenerateGraphViz.convertGraphCleanOutputToGraphViz("data/actual/bubbleadd/BubbleAddGraph/bin/5", "graphtest");
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/graph/GraphViz.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/graph/GraphViz.java
new file mode 100644
index 0000000..4175595
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/graph/GraphViz.java
@@ -0,0 +1,297 @@
+package edu.uci.ics.genomix.pregelix.graph;
+
+// GraphViz.java - a simple API to call dot from Java programs
+
+/*$Id$*/
+/*
+ ******************************************************************************
+ * *
+ * (c) Copyright 2003 Laszlo Szathmary *
+ * *
+ * This program is free software; you can redistribute it and/or modify it *
+ * under the terms of the GNU Lesser General Public License as published by *
+ * the Free Software Foundation; either version 2.1 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, but *
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public *
+ * License for more details. *
+ * *
+ * You should have received a copy of the GNU Lesser General Public License *
+ * along with this program; if not, write to the Free Software Foundation, *
+ * Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *
+ * *
+ ******************************************************************************
+ */
+
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.InputStreamReader;
+
+/**
+ * <dl>
+ * <dt>Purpose: GraphViz Java API
+ * <dd>
+ * <dt>Description:
+ * <dd>With this Java class you can simply call dot from your Java programs
+ * <dt>Example usage:
+ * <dd>
+ *
+ * <pre>
+ * GraphViz gv = new GraphViz();
+ * gv.addln(gv.start_graph());
+ * gv.addln("A -> B;");
+ * gv.addln("A -> C;");
+ * gv.addln(gv.end_graph());
+ * System.out.println(gv.getDotSource());
+ *
+ * String type = "gif";
+ * File out = new File("out." + type); // out.gif in this example
+ * gv.writeGraphToFile(gv.getGraph(gv.getDotSource(), type), out);
+ * </pre>
+ *
+ * </dd>
+ * </dl>
+ *
+ * @version v0.4, 2011/02/05 (February) -- Patch of Keheliya Gallaba is added. Now you
+ * can specify the type of the output file: gif, dot, fig, pdf, ps, svg, png, etc.
+ * @version v0.3, 2010/11/29 (November) -- Windows support + ability
+ * to read the graph from a text file
+ * @version v0.2, 2010/07/22 (July) -- bug fix
+ * @version v0.1, 2003/12/04 (December) -- first release
+ * @author Laszlo Szathmary (<a href="jabba.laci@gmail.com">jabba.laci@gmail.com</a>)
+ */
+public class GraphViz {
+ /**
+ * The dir. where temporary files will be created.
+ */
+ private static String TEMP_DIR = "/tmp"; // Linux
+ // private static String TEMP_DIR = "c:/temp"; // Windows
+
+ /**
+ * Where is your dot program located? It will be called externally.
+ */
+ private static String DOT = "/usr/bin/dot"; // Linux
+ // private static String DOT = "c:/Program Files/Graphviz2.26.3/bin/dot.exe"; // Windows
+
+ /**
+ * The source of the graph written in dot language.
+ */
+ private StringBuilder graph = new StringBuilder();
+
+ /**
+ * Constructor: creates a new GraphViz object that will contain
+ * a graph.
+ */
+ public GraphViz() {
+ }
+
+ /**
+ * Returns the graph's source description in dot language.
+ *
+ * @return Source of the graph in dot language.
+ */
+ public String getDotSource() {
+ return graph.toString();
+ }
+
+ /**
+ * Adds a string to the graph's source (without newline).
+ */
+ public void add(String line) {
+ graph.append(line);
+ }
+
+ /**
+ * Adds a string to the graph's source (with newline).
+ */
+ public void addln(String line) {
+ graph.append(line + "\n");
+ }
+
+ /**
+ * Adds a newline to the graph's source.
+ */
+ public void addln() {
+ graph.append('\n');
+ }
+
+ /**
+ * Returns the graph as an image in binary format.
+ *
+ * @param dot_source
+ * Source of the graph to be drawn.
+ * @param type
+ * Type of the output image to be produced, e.g.: gif, dot, fig, pdf, ps, svg, png.
+ * @return A byte array containing the image of the graph.
+ */
+ public byte[] getGraph(String dot_source, String type) {
+ File dot;
+ byte[] img_stream = null;
+
+ try {
+ dot = writeDotSourceToFile(dot_source);
+ if (dot != null) {
+ img_stream = get_img_stream(dot, type);
+ if (dot.delete() == false)
+ System.err.println("Warning: " + dot.getAbsolutePath() + " could not be deleted!");
+ return img_stream;
+ }
+ return null;
+ } catch (java.io.IOException ioe) {
+ return null;
+ }
+ }
+
+ /**
+ * Writes the graph's image in a file.
+ *
+ * @param img
+ * A byte array containing the image of the graph.
+ * @param file
+ * Name of the file to where we want to write.
+ * @return Success: 1, Failure: -1
+ */
+ public int writeGraphToFile(byte[] img, String file) {
+ File to = new File(file);
+ return writeGraphToFile(img, to);
+ }
+
+ /**
+ * Writes the graph's image in a file.
+ *
+ * @param img
+ * A byte array containing the image of the graph.
+ * @param to
+ * A File object to where we want to write.
+ * @return Success: 1, Failure: -1
+ */
+ public int writeGraphToFile(byte[] img, File to) {
+ try {
+ FileOutputStream fos = new FileOutputStream(to);
+ fos.write(img);
+ fos.close();
+ } catch (java.io.IOException ioe) {
+ return -1;
+ }
+ return 1;
+ }
+
+ /**
+ * It will call the external dot program, and return the image in
+ * binary format.
+ *
+ * @param dot
+ * Source of the graph (in dot language).
+ * @param type
+ * Type of the output image to be produced, e.g.: gif, dot, fig, pdf, ps, svg, png.
+ * @return The image of the graph in .gif format.
+ */
+ private byte[] get_img_stream(File dot, String type) {
+ File img;
+ byte[] img_stream = null;
+
+ try {
+ img = File.createTempFile("graph_", "." + type, new File(GraphViz.TEMP_DIR));
+ Runtime rt = Runtime.getRuntime();
+
+ // patch by Mike Chenault
+ String[] args = { DOT, "-T" + type, dot.getAbsolutePath(), "-o", img.getAbsolutePath() };
+ Process p = rt.exec(args);
+
+ p.waitFor();
+
+ FileInputStream in = new FileInputStream(img.getAbsolutePath());
+ img_stream = new byte[in.available()];
+ in.read(img_stream);
+ // Close it if we need to
+ if (in != null)
+ in.close();
+
+ if (img.delete() == false)
+ System.err.println("Warning: " + img.getAbsolutePath() + " could not be deleted!");
+ } catch (java.io.IOException ioe) {
+ System.err.println("Error: in I/O processing of tempfile in dir " + GraphViz.TEMP_DIR + "\n");
+ System.err.println(" or in calling external command");
+ ioe.printStackTrace();
+ } catch (java.lang.InterruptedException ie) {
+ System.err.println("Error: the execution of the external program was interrupted");
+ ie.printStackTrace();
+ }
+
+ return img_stream;
+ }
+
+ /**
+ * Writes the source of the graph in a file, and returns the written file
+ * as a File object.
+ *
+ * @param str
+ * Source of the graph (in dot language).
+ * @return The file (as a File object) that contains the source of the graph.
+ */
+ private File writeDotSourceToFile(String str) throws java.io.IOException {
+ File temp;
+ try {
+ temp = File.createTempFile("graph_", ".dot.tmp", new File(GraphViz.TEMP_DIR));
+ FileWriter fout = new FileWriter(temp);
+ fout.write(str);
+ fout.close();
+ } catch (Exception e) {
+ System.err.println("Error: I/O error while writing the dot source to temp file!");
+ return null;
+ }
+ return temp;
+ }
+
+ /**
+ * Returns a string that is used to start a graph.
+ *
+ * @return A string to open a graph.
+ */
+ public String start_graph() {
+ return "digraph G {";
+ }
+
+ /**
+ * Returns a string that is used to end a graph.
+ *
+ * @return A string to close a graph.
+ */
+ public String end_graph() {
+ return "}";
+ }
+
+ /**
+ * Read a DOT graph from a text file.
+ *
+ * @param input
+ * Input text file containing the DOT graph
+ * source.
+ */
+ public void readSource(String input) {
+ StringBuilder sb = new StringBuilder();
+
+ try {
+ FileInputStream fis = new FileInputStream(input);
+ DataInputStream dis = new DataInputStream(fis);
+ BufferedReader br = new BufferedReader(new InputStreamReader(dis));
+ String line;
+ while ((line = br.readLine()) != null) {
+ sb.append(line);
+ }
+ dis.close();
+ } catch (Exception e) {
+ System.err.println("Error: " + e.getMessage());
+ }
+
+ this.graph = sb;
+ }
+
+} // end of class GraphViz
+
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/AdjacencyListWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/AdjacencyListWritable.java
new file mode 100644
index 0000000..44fa444
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/AdjacencyListWritable.java
@@ -0,0 +1,76 @@
+package edu.uci.ics.genomix.pregelix.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+
+import edu.uci.ics.genomix.type.VKmerListWritable;
+
+public class AdjacencyListWritable implements WritableComparable<AdjacencyListWritable>{
+ private VKmerListWritable forwardList;
+ private VKmerListWritable reverseList;
+
+ public AdjacencyListWritable(){
+ forwardList = new VKmerListWritable();
+ reverseList = new VKmerListWritable();
+ }
+
+ public AdjacencyListWritable(int kmerSize){
+ forwardList = new VKmerListWritable();
+ reverseList = new VKmerListWritable();
+ }
+
+ public void set(AdjacencyListWritable adjacencyList){
+ forwardList.setCopy(adjacencyList.getForwardList());
+ reverseList.setCopy(adjacencyList.getReverseList());
+ }
+
+ public void reset(){
+ forwardList.reset();
+ reverseList.reset();
+ }
+
+ public void reset(int kmerSize){
+ forwardList.reset();
+ reverseList.reset();
+ }
+
+ public int getCountOfPosition(){
+ return forwardList.getCountOfPosition() + reverseList.getCountOfPosition();
+ }
+
+ public VKmerListWritable getForwardList() {
+ return forwardList;
+ }
+
+ public void setForwardList(VKmerListWritable forwardList) {
+ this.forwardList = forwardList;
+ }
+
+ public VKmerListWritable getReverseList() {
+ return reverseList;
+ }
+
+ public void setReverseList(VKmerListWritable reverseList) {
+ this.reverseList = reverseList;
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ forwardList.readFields(in);
+ reverseList.readFields(in);
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ forwardList.write(out);
+ reverseList.write(out);
+ }
+
+ @Override
+ public int compareTo(AdjacencyListWritable o) {
+ return 0;
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/HashMapWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/HashMapWritable.java
new file mode 100644
index 0000000..c4556c7
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/HashMapWritable.java
@@ -0,0 +1,102 @@
+package edu.uci.ics.genomix.pregelix.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Map;
+import java.util.Set;
+import java.util.HashMap;
+
+import org.apache.hadoop.io.Writable;
+@SuppressWarnings("unchecked")
+public class HashMapWritable<K extends Writable, V extends Writable> extends HashMap<K, V> implements
+ Writable {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * Creates a HashMapWritable object.
+ */
+ public HashMapWritable() {
+ super();
+ }
+
+ /**
+ * Creates a HashMapWritable object from a regular HashMap.
+ */
+ public HashMapWritable(HashMap<K, V> map) {
+ super(map);
+ }
+
+ /**
+ * Deserializes the array.
+ *
+ * @param in
+ * source for raw byte representation
+ */
+
+ @SuppressWarnings("rawtypes")
+ public void readFields(DataInput in) throws IOException {
+
+ this.clear();
+
+ int numEntries = in.readInt();
+ if(numEntries==0) return;
+
+ String keyClassName = in.readUTF();
+ String valueClassName = in.readUTF();
+
+ K objK;
+ V objV;
+ try {
+ Class keyClass = Class.forName(keyClassName);
+ Class valueClass = Class.forName(valueClassName);
+ for (int i = 0; i < numEntries; i++) {
+ objK = (K) keyClass.newInstance();
+ objK.readFields(in);
+ objV = (V) valueClass.newInstance();
+ objV.readFields(in);
+ put(objK, objV);
+ }
+
+ } catch (ClassNotFoundException e) {
+ e.printStackTrace();
+ } catch (IllegalAccessException e) {
+ e.printStackTrace();
+ } catch (InstantiationException e) {
+ e.printStackTrace();
+ }
+
+ }
+
+ /**
+ * Serializes this array.
+ *
+ * @param out
+ * where to write the raw byte representation
+ */
+ public void write(DataOutput out) throws IOException {
+ // Write out the number of entries in the map
+ out.writeInt(size());
+ if(size()==0) return;
+
+ // Write out the class names for keys and values
+ // assuming that data is homogeneuos (i.e., all entries have same types)
+ Set<Map.Entry<K, V>> entries = entrySet();
+ Map.Entry<K, V> first = entries.iterator().next();
+ K objK = first.getKey();
+ V objV = first.getValue();
+ out.writeUTF(objK.getClass().getCanonicalName());
+ out.writeUTF(objV.getClass().getCanonicalName());
+
+ // Then write out each key/value pair
+ for (Map.Entry<K, V> e: entrySet()) {
+ e.getKey().write(out);
+ e.getValue().write(out);
+ }
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/IncomingListWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/IncomingListWritable.java
new file mode 100644
index 0000000..8dec857
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/IncomingListWritable.java
@@ -0,0 +1,56 @@
+package edu.uci.ics.genomix.pregelix.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+
+import edu.uci.ics.genomix.type.PositionListWritable;
+
+public class IncomingListWritable implements WritableComparable<IncomingListWritable>{
+ private PositionListWritable reverseForwardList;
+ private PositionListWritable reverseReverseList;
+
+ public IncomingListWritable(){
+ reverseForwardList = new PositionListWritable();
+ reverseReverseList = new PositionListWritable();
+ }
+
+ public PositionListWritable getReverseForwardList() {
+ return reverseForwardList;
+ }
+
+ public void setReverseForwardList(PositionListWritable reverseForwardList) {
+ this.reverseForwardList = reverseForwardList;
+ }
+
+ public PositionListWritable getReverseReverseList() {
+ return reverseReverseList;
+ }
+
+ public void setReverseReverseList(PositionListWritable reverseReverseList) {
+ this.reverseReverseList = reverseReverseList;
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ reverseForwardList.readFields(in);
+ reverseReverseList.readFields(in);
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ reverseForwardList.write(out);
+ reverseReverseList.write(out);
+ }
+
+ @Override
+ public int compareTo(IncomingListWritable o) {
+ return 0;
+ }
+
+ public int inDegree(){
+ return reverseReverseList.getCountOfPosition() + reverseForwardList.getCountOfPosition();
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/MergeBubbleMessageWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/MergeBubbleMessageWritable.java
new file mode 100644
index 0000000..c42bf32
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/MergeBubbleMessageWritable.java
@@ -0,0 +1,196 @@
+package edu.uci.ics.genomix.pregelix.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+
+import edu.uci.ics.genomix.type.PositionWritable;
+import edu.uci.ics.genomix.pregelix.type.CheckMessage;
+import edu.uci.ics.genomix.pregelix.type.Message;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+
+public class MergeBubbleMessageWritable implements WritableComparable<MergeBubbleMessageWritable> {
+ /**
+ * sourceVertexId stores source vertexId when headVertex sends the message
+ * stores neighber vertexValue when pathVertex sends the message
+ * file stores the point to the file that stores the chains of connected DNA
+ */
+ private PositionWritable sourceVertexId;
+ private KmerBytesWritable chainVertexId;
+ private AdjacencyListWritable neighberNode; //incoming or outgoing
+ private byte message;
+ private PositionWritable startVertexId;
+
+ private byte checkMessage;
+
+ public MergeBubbleMessageWritable() {
+ sourceVertexId = new PositionWritable();
+ chainVertexId = new KmerBytesWritable();
+ neighberNode = new AdjacencyListWritable();
+ startVertexId = new PositionWritable();
+ message = Message.NON;
+ checkMessage = (byte) 0;
+ }
+
+ public void set(MergeBubbleMessageWritable msg) {
+ checkMessage = 0;
+ if (sourceVertexId != null) {
+ checkMessage |= CheckMessage.SOURCE;
+ this.sourceVertexId.set(msg.getSourceVertexId());
+ }
+ if (chainVertexId != null) {
+ checkMessage |= CheckMessage.ACUTUALKMER;
+ this.chainVertexId.setAsCopy(msg.getChainVertexId());
+ }
+ if (neighberNode != null) {
+ checkMessage |= CheckMessage.NEIGHBER;
+ this.neighberNode.set(msg.getNeighberNode());
+ }
+ if (startVertexId != null) {
+ checkMessage |= CheckMessage.START;
+ this.startVertexId.set(msg.getStartVertexId());
+ }
+ this.message = msg.getMessage();
+ }
+
+ public void set(PositionWritable sourceVertexId, KmerBytesWritable chainVertexId, AdjacencyListWritable neighberNode, PositionWritable startVertexId, byte message) {
+ checkMessage = 0;
+ if (sourceVertexId != null) {
+ checkMessage |= CheckMessage.SOURCE;
+
+ this.sourceVertexId.set((byte)0, sourceVertexId.getReadId(),sourceVertexId.getPosId());
+ }
+ if (chainVertexId != null) {
+ checkMessage |= CheckMessage.ACUTUALKMER;
+ this.chainVertexId.setAsCopy(chainVertexId);
+
+ }
+ if (neighberNode != null) {
+ checkMessage |= CheckMessage.NEIGHBER;
+ this.neighberNode.set(neighberNode);
+ }
+ if (startVertexId != null) {
+ checkMessage |= CheckMessage.START;
+ this.startVertexId.set(startVertexId);
+ }
+ this.message = message;
+ }
+
+ public void reset() {
+ checkMessage = 0;
+// chainVertexId.reset();
+ neighberNode.reset();
+ message = Message.NON;
+ }
+
+ public PositionWritable getSourceVertexId() {
+ return sourceVertexId;
+ }
+
+ public void setSourceVertexId(PositionWritable sourceVertexId) {
+ if (sourceVertexId != null) {
+ checkMessage |= CheckMessage.SOURCE;
+ this.sourceVertexId.set((byte)0, sourceVertexId.getReadId(),sourceVertexId.getPosId());
+ }
+ }
+
+ public KmerBytesWritable getChainVertexId() {
+ return chainVertexId;
+ }
+
+ public void setChainVertexId(KmerBytesWritable chainVertexId) {
+ if (chainVertexId != null) {
+ checkMessage |= CheckMessage.ACUTUALKMER;
+ this.chainVertexId.setAsCopy(chainVertexId);
+
+ }
+ }
+
+ public AdjacencyListWritable getNeighberNode() {
+ return neighberNode;
+ }
+
+ public void setNeighberNode(AdjacencyListWritable neighberNode) {
+ if(neighberNode != null){
+ checkMessage |= CheckMessage.NEIGHBER;
+ this.neighberNode.set(neighberNode);
+ }
+ }
+
+ public int getLengthOfChain() {
+ return KmerBytesWritable.getKmerLength();
+ }
+
+ public PositionWritable getStartVertexId() {
+ return startVertexId;
+ }
+
+ public void setStartVertexId(PositionWritable startVertexId) {
+ if(startVertexId != null){
+ checkMessage |= CheckMessage.START;
+ this.startVertexId.set(startVertexId);
+ }
+ }
+
+ public byte getMessage() {
+ return message;
+ }
+
+ public void setMessage(byte message) {
+ this.message = message;
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.writeByte(checkMessage);
+ if ((checkMessage & CheckMessage.SOURCE) != 0)
+ sourceVertexId.write(out);
+ if ((checkMessage & CheckMessage.ACUTUALKMER) != 0)
+ chainVertexId.write(out);
+ if ((checkMessage & CheckMessage.NEIGHBER) != 0)
+ neighberNode.write(out);
+ if ((checkMessage & CheckMessage.START) != 0)
+ startVertexId.write(out);
+ out.write(message);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ this.reset();
+ checkMessage = in.readByte();
+ if ((checkMessage & CheckMessage.SOURCE) != 0)
+ sourceVertexId.readFields(in);
+ if ((checkMessage & CheckMessage.ACUTUALKMER) != 0)
+ chainVertexId.readFields(in);
+ if ((checkMessage & CheckMessage.NEIGHBER) != 0)
+ neighberNode.readFields(in);
+ if ((checkMessage & CheckMessage.START) != 0)
+ startVertexId.readFields(in);
+ message = in.readByte();
+ }
+
+ @Override
+ public int hashCode() {
+ return sourceVertexId.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o instanceof MergeBubbleMessageWritable) {
+ MergeBubbleMessageWritable tp = (MergeBubbleMessageWritable) o;
+ return sourceVertexId.equals(tp.sourceVertexId);
+ }
+ return false;
+ }
+
+ @Override
+ public String toString() {
+ return sourceVertexId.toString();
+ }
+
+ public int compareTo(MergeBubbleMessageWritable tp) {
+ return sourceVertexId.compareTo(tp.sourceVertexId);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/MessageWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/MessageWritable.java
new file mode 100644
index 0000000..822d136
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/MessageWritable.java
@@ -0,0 +1,318 @@
+package edu.uci.ics.genomix.pregelix.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.hadoop.io.WritableComparable;
+
+import edu.uci.ics.genomix.pregelix.type.CheckMessage;
+import edu.uci.ics.genomix.pregelix.type.Message;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.PositionListWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+public class MessageWritable implements WritableComparable<MessageWritable> {
+ /**
+ * sourceVertexId stores source vertexId when headVertex sends the message
+ * stores neighber vertexValue when pathVertex sends the message
+ * file stores the point to the file that stores the chains of connected DNA
+ */
+ private VKmerBytesWritable sourceVertexId;
+ private VKmerBytesWritable actualKmer;
+ private AdjacencyListWritable neighberNode; //incoming or outgoing
+ private PositionListWritable nodeIdList = new PositionListWritable();
+ private float averageCoverage;
+ private byte flag;
+ private boolean isFlip;
+ private int kmerlength = 0;
+ private boolean updateMsg = false;
+ private VKmerBytesWritable startVertexId;
+
+ private byte checkMessage;
+
+ public MessageWritable() {
+ sourceVertexId = new VKmerBytesWritable();
+ actualKmer = new VKmerBytesWritable();
+ neighberNode = new AdjacencyListWritable();
+ startVertexId = new VKmerBytesWritable();
+ averageCoverage = 0;
+ flag = Message.NON;
+ isFlip = false;
+ checkMessage = (byte) 0;
+ }
+
+ public MessageWritable(int kmerSize) {
+ kmerlength = kmerSize;
+ sourceVertexId = new VKmerBytesWritable(kmerSize);
+ actualKmer = new VKmerBytesWritable(0);
+
+ neighberNode = new AdjacencyListWritable(kmerSize);
+ startVertexId = new VKmerBytesWritable(kmerSize);
+ averageCoverage = 0;
+ flag = Message.NON;
+ isFlip = false;
+ checkMessage = (byte) 0;
+ }
+
+ public void set(MessageWritable msg) {
+ this.kmerlength = msg.kmerlength;
+ checkMessage = 0;
+ if (sourceVertexId != null) {
+ checkMessage |= CheckMessage.SOURCE;
+ this.sourceVertexId.setAsCopy(msg.getSourceVertexId());
+ }
+ if (actualKmer != null) {
+ checkMessage |= CheckMessage.ACUTUALKMER;
+ this.actualKmer.setAsCopy(msg.getActualKmer());
+
+ }
+ if (neighberNode != null) {
+ checkMessage |= CheckMessage.NEIGHBER;
+ this.neighberNode.set(msg.getNeighberNode());
+ }
+ if (startVertexId != null) {
+ checkMessage |= CheckMessage.START;
+ this.startVertexId.setAsCopy(msg.getStartVertexId());
+ }
+ checkMessage |= CheckMessage.ADJMSG;
+ this.flag = msg.getFlag();
+ updateMsg = msg.isUpdateMsg();
+ }
+
+ public void set(int kmerlength, VKmerBytesWritable sourceVertexId, VKmerBytesWritable chainVertexId, AdjacencyListWritable neighberNode, byte message) {
+ this.kmerlength = kmerlength;
+ checkMessage = 0;
+ if (sourceVertexId != null) {
+ checkMessage |= CheckMessage.SOURCE;
+ this.sourceVertexId.setAsCopy(sourceVertexId);
+ }
+ if (chainVertexId != null) {
+ checkMessage |= CheckMessage.ACUTUALKMER;
+ this.actualKmer.setAsCopy(chainVertexId);
+
+ }
+ if (neighberNode != null) {
+ checkMessage |= CheckMessage.NEIGHBER;
+ this.neighberNode.set(neighberNode);
+ }
+ this.flag = message;
+ }
+
+ public void reset() {
+ reset(0);
+ }
+
+ public void reset(int kmerSize) {
+ checkMessage = (byte) 0;
+ kmerlength = kmerSize;
+// actualKmer.reset();
+ neighberNode.reset(kmerSize);
+ startVertexId.reset(kmerSize);
+ averageCoverage = 0;
+ flag = Message.NON;
+ isFlip = false;
+ }
+
+ public VKmerBytesWritable getSourceVertexId() {
+ return sourceVertexId;
+ }
+
+ public void setSourceVertexId(VKmerBytesWritable sourceVertexId) {
+ if (sourceVertexId != null) {
+ checkMessage |= CheckMessage.SOURCE;
+ this.sourceVertexId.setAsCopy(sourceVertexId);
+ }
+ }
+
+ public VKmerBytesWritable getActualKmer() {
+ return actualKmer;
+ }
+
+ public void setActualKmer(VKmerBytesWritable actualKmer) {
+ if (actualKmer != null) {
+ checkMessage |= CheckMessage.ACUTUALKMER;
+ this.actualKmer.setAsCopy(actualKmer);
+
+ }
+ }
+
+ public VKmerBytesWritable getCreatedVertexId() {
+ return actualKmer;
+ }
+
+ public void setCreatedVertexId(VKmerBytesWritable actualKmer) {
+ if (actualKmer != null) {
+ checkMessage |= CheckMessage.ACUTUALKMER;
+ this.actualKmer.setAsCopy(actualKmer);
+
+ }
+ }
+
+ public AdjacencyListWritable getNeighberNode() {
+ return neighberNode;
+ }
+
+ public void setNeighberNode(AdjacencyListWritable neighberNode) {
+ if(neighberNode != null){
+ checkMessage |= CheckMessage.NEIGHBER;
+ this.neighberNode.set(neighberNode);
+ }
+ }
+
+ public VKmerBytesWritable getStartVertexId() {
+ return startVertexId;
+ }
+
+ public void setStartVertexId(VKmerBytesWritable startVertexId) {
+ if(startVertexId != null){
+ checkMessage |= CheckMessage.START;
+ this.startVertexId.setAsCopy(startVertexId);
+ }
+ }
+
+ /** for Scaffolding, startVertexId is used as seekedVertexId **/
+ public VKmerBytesWritable getSeekedVertexId() {
+ return startVertexId;
+ }
+
+ public void setSeekedVertexId(VKmerBytesWritable startVertexId) {
+ if(startVertexId != null){
+ checkMessage |= CheckMessage.START;
+ this.startVertexId.setAsCopy(startVertexId);
+ }
+ }
+
+ public float getAverageCoverage() {
+ return averageCoverage;
+ }
+
+ public void setAverageCoverage(float averageCoverage) {
+ this.averageCoverage = averageCoverage;
+ }
+
+ public int getLengthOfChain() {
+ return actualKmer.getKmerLetterLength();
+ }
+
+ public byte getFlag() {
+ return flag;
+ }
+
+ public void setFlag(byte message) {
+ this.flag = message;
+ }
+
+ public boolean isFlip() {
+ return isFlip;
+ }
+
+ public void setFlip(boolean isFlip) {
+ this.isFlip = isFlip;
+ }
+
+
+ public boolean isUpdateMsg() {
+ return updateMsg;
+ }
+
+ public void setUpdateMsg(boolean updateMsg) {
+ this.updateMsg = updateMsg;
+ }
+
+ public PositionListWritable getNodeIdList() {
+ return nodeIdList;
+ }
+
+ public void setNodeIdList(PositionListWritable nodeIdList) {
+ if(nodeIdList != null){
+ checkMessage |= CheckMessage.NODEIDLIST;
+ this.nodeIdList.set(nodeIdList);
+ }
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(kmerlength);
+ out.writeByte(checkMessage);
+ if ((checkMessage & CheckMessage.SOURCE) != 0)
+ sourceVertexId.write(out);
+ if ((checkMessage & CheckMessage.ACUTUALKMER) != 0)
+ actualKmer.write(out);
+ if ((checkMessage & CheckMessage.NEIGHBER) != 0)
+ neighberNode.write(out);
+ if ((checkMessage & CheckMessage.NODEIDLIST) != 0)
+ nodeIdList.write(out);
+ if ((checkMessage & CheckMessage.START) != 0)
+ startVertexId.write(out);
+ out.writeFloat(averageCoverage);
+ out.writeBoolean(isFlip);
+ out.writeByte(flag);
+ out.writeBoolean(updateMsg);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ kmerlength = in.readInt();
+ this.reset(kmerlength);
+ checkMessage = in.readByte();
+ if ((checkMessage & CheckMessage.SOURCE) != 0)
+ sourceVertexId.readFields(in);
+ if ((checkMessage & CheckMessage.ACUTUALKMER) != 0)
+ actualKmer.readFields(in);
+ if ((checkMessage & CheckMessage.NEIGHBER) != 0)
+ neighberNode.readFields(in);
+ if ((checkMessage & CheckMessage.NODEIDLIST) != 0)
+ nodeIdList.readFields(in);
+ if ((checkMessage & CheckMessage.START) != 0)
+ startVertexId.readFields(in);
+ averageCoverage = in.readFloat();
+ isFlip = in.readBoolean();
+ flag = in.readByte();
+ updateMsg = in.readBoolean();
+ }
+
+ @Override
+ public int hashCode() {
+ return sourceVertexId.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o instanceof MessageWritable) {
+ MessageWritable tp = (MessageWritable) o;
+ return sourceVertexId.equals(tp.sourceVertexId);
+ }
+ return false;
+ }
+
+ @Override
+ public String toString() {
+ return sourceVertexId.toString();
+ }
+
+ @Override
+ public int compareTo(MessageWritable tp) {
+ return sourceVertexId.compareTo(tp.sourceVertexId);
+ }
+
+ public static final class SortByCoverage implements Comparator<MessageWritable> {
+ @Override
+ public int compare(MessageWritable left, MessageWritable right) {
+ return Float.compare(left.averageCoverage, right.averageCoverage);
+ }
+ }
+
+ /**
+ * Update my coverage to be the average of this and other. Used when merging paths.
+ */
+ public void mergeCoverage(MessageWritable other) {
+ // sequence considered in the average doesn't include anything overlapping with other kmers
+ float adjustedLength = actualKmer.getKmerLetterLength() + other.actualKmer.getKmerLetterLength() - (KmerBytesWritable.getKmerLength() - 1) * 2;
+
+ float myCount = (actualKmer.getKmerLetterLength() - KmerBytesWritable.getKmerLength() - 1) * averageCoverage;
+ float otherCount = (other.actualKmer.getKmerLetterLength() - KmerBytesWritable.getKmerLength() - 1) * other.averageCoverage;
+ averageCoverage = (myCount + otherCount) / adjustedLength;
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/OutgoingListWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/OutgoingListWritable.java
new file mode 100644
index 0000000..275954d
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/OutgoingListWritable.java
@@ -0,0 +1,56 @@
+package edu.uci.ics.genomix.pregelix.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+
+import edu.uci.ics.genomix.type.PositionListWritable;
+
+public class OutgoingListWritable implements WritableComparable<OutgoingListWritable>{
+ private PositionListWritable forwardForwardList;
+ private PositionListWritable forwardReverseList;
+
+ public OutgoingListWritable(){
+ forwardForwardList = new PositionListWritable();
+ forwardReverseList = new PositionListWritable();
+ }
+
+ public PositionListWritable getForwardForwardList() {
+ return forwardForwardList;
+ }
+
+ public void setForwardForwardList(PositionListWritable forwardForwardList) {
+ this.forwardForwardList = forwardForwardList;
+ }
+
+ public PositionListWritable getForwardReverseList() {
+ return forwardReverseList;
+ }
+
+ public void setForwardReverseList(PositionListWritable forwardReverseList) {
+ this.forwardReverseList = forwardReverseList;
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ forwardForwardList.readFields(in);
+ forwardReverseList.readFields(in);
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ forwardForwardList.write(out);
+ forwardReverseList.write(out);
+ }
+
+ @Override
+ public int compareTo(OutgoingListWritable o) {
+ return 0;
+ }
+
+ public int outDegree(){
+ return forwardForwardList.getCountOfPosition() + forwardReverseList.getCountOfPosition();
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/VertexValueWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/VertexValueWritable.java
new file mode 100644
index 0000000..237d9ce
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/VertexValueWritable.java
@@ -0,0 +1,379 @@
+package edu.uci.ics.genomix.pregelix.io;
+
+import java.io.*;
+import org.apache.hadoop.io.WritableComparable;
+
+import edu.uci.ics.genomix.type.PositionListWritable;
+import edu.uci.ics.genomix.pregelix.type.MessageFlag;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerListWritable;
+
+public class VertexValueWritable implements WritableComparable<VertexValueWritable>, Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ public static class State extends VertexStateFlag{
+ public static final byte NO_MERGE = 0b00 << 3;
+ public static final byte SHOULD_MERGEWITHNEXT = 0b01 << 3;
+ public static final byte SHOULD_MERGEWITHPREV = 0b10 << 3;
+ public static final byte SHOULD_MERGE_MASK = 0b11 << 3;
+ public static final byte SHOULD_MERGE_CLEAR = 0b1100111;
+
+ public static final byte UNCHANGE = 0b0 << 3;
+ public static final byte KILL = 0b1 << 3;
+ public static final byte KILL_MASK = 0b1 << 3;
+
+ public static final byte DIR_FROM_DEADVERTEX = 0b10 << 3;
+ public static final byte DEAD_MASK = 0b10 << 3;
+ }
+
+ public static class VertexStateFlag extends FakeFlag {
+ public static final byte IS_NON = 0b00 << 5;
+ public static final byte IS_RANDOMTAIL = 0b00 << 5;
+ public static final byte IS_HEAD = 0b01 << 5;
+ public static final byte IS_FINAL = 0b10 << 5;
+ public static final byte IS_RANDOMHEAD = 0b11 << 5;
+ public static final byte IS_OLDHEAD = 0b11 << 5;
+
+ public static final byte VERTEX_MASK = 0b11 << 5;
+ public static final byte VERTEX_CLEAR = (byte) 11001111;
+ }
+
+ public static class FakeFlag{
+ public static final byte IS_NONFAKE = 0 << 0;
+ public static final byte IS_FAKE = 1 << 0;
+
+ public static final byte FAKEFLAG_MASK = (byte) 00000001;
+ }
+
+ private PositionListWritable nodeIdList;
+ private AdjacencyListWritable incomingList;
+ private AdjacencyListWritable outgoingList;
+ private VKmerBytesWritable actualKmer;
+ private float averageCoverage;
+ private byte state;
+ private boolean isFakeVertex = false;
+ private HashMapWritable<VKmerBytesWritable, VKmerListWritable> traverseMap = new HashMapWritable<VKmerBytesWritable, VKmerListWritable>();
+
+ public VertexValueWritable() {
+ this(0);
+ }
+
+ public VertexValueWritable(int kmerSize){
+ nodeIdList = new PositionListWritable();
+ incomingList = new AdjacencyListWritable();
+ outgoingList = new AdjacencyListWritable();
+ actualKmer = new VKmerBytesWritable();
+ state = State.IS_NON;
+ averageCoverage = 0;
+ }
+
+ public VertexValueWritable(PositionListWritable nodeIdList, VKmerListWritable forwardForwardList, VKmerListWritable forwardReverseList,
+ VKmerListWritable reverseForwardList, VKmerListWritable reverseReverseList, VKmerBytesWritable actualKmer,
+ float averageCoverage, byte state) {
+ set(nodeIdList, forwardForwardList, forwardReverseList,
+ reverseForwardList, reverseReverseList, actualKmer,
+ averageCoverage, state);
+ }
+
+ public void set(PositionListWritable nodeIdList, VKmerListWritable forwardForwardList, VKmerListWritable forwardReverseList,
+ VKmerListWritable reverseForwardList, VKmerListWritable reverseReverseList, VKmerBytesWritable actualKmer,
+ float averageCoverage, byte state) {
+ this.incomingList.setForwardList(reverseForwardList);
+ this.incomingList.setReverseList(reverseReverseList);
+ this.outgoingList.setForwardList(forwardForwardList);
+ this.outgoingList.setReverseList(forwardReverseList);
+ this.actualKmer.setAsCopy(actualKmer);
+ this.averageCoverage = averageCoverage;
+ this.state = state;
+ }
+
+ public void set(VertexValueWritable value) {
+ set(value.getNodeIdList(), value.getFFList(),value.getFRList(),value.getRFList(),value.getRRList(),
+ value.getActualKmer(), value.getAverageCoverage(), value.getState());
+ }
+
+
+ public PositionListWritable getNodeIdList() {
+ return nodeIdList;
+ }
+
+ //for testing
+ public long getHeadReadId(){
+ return 1;
+ }
+
+ public void setNodeIdList(PositionListWritable nodeIdList) {
+ this.nodeIdList.set(nodeIdList);
+ }
+
+ public VKmerListWritable getFFList() {
+ return outgoingList.getForwardList();
+ }
+
+ public VKmerListWritable getFRList() {
+ return outgoingList.getReverseList();
+ }
+
+ public VKmerListWritable getRFList() {
+ return incomingList.getForwardList();
+ }
+
+ public VKmerListWritable getRRList() {
+ return incomingList.getReverseList();
+ }
+
+ public void setFFList(VKmerListWritable forwardForwardList){
+ outgoingList.setForwardList(forwardForwardList);
+ }
+
+ public void setFRList(VKmerListWritable forwardReverseList){
+ outgoingList.setReverseList(forwardReverseList);
+ }
+
+ public void setRFList(VKmerListWritable reverseForwardList){
+ incomingList.setForwardList(reverseForwardList);
+ }
+
+ public void setRRList(VKmerListWritable reverseReverseList){
+ incomingList.setReverseList(reverseReverseList);
+ }
+
+ public AdjacencyListWritable getIncomingList() {
+ return incomingList;
+ }
+
+ public void setIncomingList(AdjacencyListWritable incomingList) {
+ this.incomingList.set(incomingList);
+ }
+
+ public AdjacencyListWritable getOutgoingList() {
+ return outgoingList;
+ }
+
+ public void setOutgoingList(AdjacencyListWritable outgoingList) {
+ this.outgoingList.set(outgoingList);
+ }
+
+ public byte getState() {
+ return state;
+ }
+
+ public VKmerBytesWritable getActualKmer() {
+ return actualKmer;
+ }
+
+ public void setActualKmer(VKmerBytesWritable kmer) {
+ this.actualKmer.setAsCopy(kmer);
+ }
+
+ public float getAverageCoverage() {
+ return averageCoverage;
+ }
+
+ public void setAverageCoverage(float averageCoverage) {
+ this.averageCoverage = averageCoverage;
+ }
+
+ public boolean isFakeVertex() {
+ return isFakeVertex;
+ }
+
+ public void setFakeVertex(boolean isFakeVertex) {
+ this.isFakeVertex = isFakeVertex;
+ }
+
+ public void setState(byte state) {
+ this.state = state;
+ }
+
+ public int getLengthOfKmer() {
+ return actualKmer.getKmerLetterLength();
+ }
+
+
+ public HashMapWritable<VKmerBytesWritable, VKmerListWritable> getTraverseMap() {
+ return traverseMap;
+ }
+
+ public void setTraverseMap(HashMapWritable<VKmerBytesWritable, VKmerListWritable> traverseMap) {
+ this.traverseMap = traverseMap;
+ }
+
+ public void reset() {
+ this.reset(0);
+ }
+
+ public void reset(int kmerSize) {
+ this.nodeIdList.reset();
+ this.incomingList.getForwardList().reset();
+ this.incomingList.getReverseList().reset();
+ this.outgoingList.getForwardList().reset();
+ this.outgoingList.getReverseList().reset();
+ this.actualKmer.reset(0);
+ averageCoverage = 0;
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ reset();
+ this.nodeIdList.readFields(in);
+ this.outgoingList.getForwardList().readFields(in);
+ this.outgoingList.getReverseList().readFields(in);
+ this.incomingList.getForwardList().readFields(in);
+ this.incomingList.getReverseList().readFields(in);
+ this.actualKmer.readFields(in);
+ averageCoverage = in.readFloat();
+ this.state = in.readByte();
+ this.isFakeVertex = in.readBoolean();
+ this.traverseMap.readFields(in);
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ this.nodeIdList.write(out);
+ this.outgoingList.getForwardList().write(out);
+ this.outgoingList.getReverseList().write(out);
+ this.incomingList.getForwardList().write(out);
+ this.incomingList.getReverseList().write(out);
+ this.actualKmer.write(out);
+ out.writeFloat(averageCoverage);
+ out.writeByte(this.state);
+ out.writeBoolean(this.isFakeVertex);
+ this.traverseMap.write(out);
+ }
+
+ @Override
+ public int compareTo(VertexValueWritable o) {
+ return 0;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sbuilder = new StringBuilder();
+ sbuilder.append('{');
+ sbuilder.append(nodeIdList.toString()).append('\t');
+ sbuilder.append(outgoingList.getForwardList().toString()).append('\t');
+ sbuilder.append(outgoingList.getReverseList().toString()).append('\t');
+ sbuilder.append(incomingList.getForwardList().toString()).append('\t');
+ sbuilder.append(incomingList.getReverseList().toString()).append('\t');
+ sbuilder.append(actualKmer.toString()).append('}');
+ return sbuilder.toString();
+ }
+
+ public int inDegree(){
+ return incomingList.getForwardList().getCountOfPosition() + incomingList.getReverseList().getCountOfPosition();
+ }
+
+ public int outDegree(){
+ return outgoingList.getForwardList().getCountOfPosition() + outgoingList.getReverseList().getCountOfPosition();
+ }
+
+ public int getDegree(){
+ return inDegree() + outDegree();
+ }
+
+ /*
+ * Delete the corresponding edge
+ */
+ public void processDelete(byte neighborToDeleteDir, VKmerBytesWritable nodeToDelete){
+ switch (neighborToDeleteDir & MessageFlag.DIR_MASK) {
+ case MessageFlag.DIR_FF:
+ this.getFFList().remove(nodeToDelete);
+ break;
+ case MessageFlag.DIR_FR:
+ this.getFRList().remove(nodeToDelete);
+ break;
+ case MessageFlag.DIR_RF:
+ this.getRFList().remove(nodeToDelete);
+ break;
+ case MessageFlag.DIR_RR:
+ this.getRRList().remove(nodeToDelete);
+ break;
+ }
+ }
+
+ /*
+ * Process any changes to value. This is for edge updates
+ */
+ public void processUpdates(byte neighborToDeleteDir, VKmerBytesWritable nodeToDelete,
+ byte neighborToMergeDir, VKmerBytesWritable nodeToAdd){
+// TODO
+// this.getListFromDir(neighborToDeleteDir).remove(nodeToDelete);
+// this.getListFromDir(neighborToMergeDir).append(nodeToDelete);
+
+ switch (neighborToDeleteDir & MessageFlag.DIR_MASK) {
+ case MessageFlag.DIR_FF:
+ this.getFFList().remove(nodeToDelete);
+ break;
+ case MessageFlag.DIR_FR:
+ this.getFRList().remove(nodeToDelete);
+ break;
+ case MessageFlag.DIR_RF:
+ this.getRFList().remove(nodeToDelete);
+ break;
+ case MessageFlag.DIR_RR:
+ this.getRRList().remove(nodeToDelete);
+ break;
+ }
+ switch (neighborToMergeDir & MessageFlag.DIR_MASK) {
+ case MessageFlag.DIR_FF:
+ this.getFFList().append(nodeToAdd);
+ break;
+ case MessageFlag.DIR_FR:
+ this.getFRList().append(nodeToAdd);
+ break;
+ case MessageFlag.DIR_RF:
+ this.getRFList().append(nodeToAdd);
+ break;
+ case MessageFlag.DIR_RR:
+ this.getRRList().append(nodeToAdd);
+ break;
+ }
+ }
+
+ /*
+ * Process any changes to value. This is for merging
+ */
+ public void processMerges(byte neighborToDeleteDir, VKmerBytesWritable nodeToDelete,
+ byte neighborToMergeDir, VKmerBytesWritable nodeToAdd,
+ int kmerSize, VKmerBytesWritable kmer){
+ switch (neighborToDeleteDir & MessageFlag.DIR_MASK) {
+ case MessageFlag.DIR_FF:
+ this.getFFList().remove(nodeToDelete); //set(null);
+ this.getActualKmer().mergeWithFFKmer(kmerSize, kmer);
+ break;
+ case MessageFlag.DIR_FR:
+ this.getFRList().remove(nodeToDelete);
+ this.getActualKmer().mergeWithFRKmer(kmerSize, kmer);
+ break;
+ case MessageFlag.DIR_RF:
+ this.getRFList().remove(nodeToDelete);
+ this.getActualKmer().mergeWithRFKmer(kmerSize, kmer);
+ break;
+ case MessageFlag.DIR_RR:
+ this.getRRList().remove(nodeToDelete);
+ this.getActualKmer().mergeWithRRKmer(kmerSize, kmer);
+ break;
+ }
+ // TODO: remove switch below and replace with general direction merge
+// this.getKmer().mergeWithDirKmer(neighborToMergeDir);
+ if(nodeToAdd != null){ //if null, nodeToAdd is empty and so another node may be head or tail
+ switch (neighborToMergeDir & MessageFlag.DIR_MASK) {
+ case MessageFlag.DIR_FF:
+ this.getFFList().append(nodeToAdd);
+ break;
+ case MessageFlag.DIR_FR:
+ this.getFRList().append(nodeToAdd);
+ break;
+ case MessageFlag.DIR_RF:
+ this.getRFList().append(nodeToAdd);
+ break;
+ case MessageFlag.DIR_RR:
+ this.getRRList().append(nodeToAdd);
+ break;
+ }
+ }
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/DataLoadLogFormatter.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/DataLoadLogFormatter.java
new file mode 100644
index 0000000..95e070f
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/DataLoadLogFormatter.java
@@ -0,0 +1,33 @@
+package edu.uci.ics.genomix.pregelix.log;
+
+import java.util.logging.Formatter;
+import java.util.logging.Handler;
+import java.util.logging.LogRecord;
+
+import edu.uci.ics.genomix.type.NodeWritable;
+
+public class DataLoadLogFormatter extends Formatter {
+ private NodeWritable key;
+
+ public void set(NodeWritable key) {
+ this.key.set(key);
+ }
+
+ public String format(LogRecord record) {
+ StringBuilder builder = new StringBuilder(1000);
+
+ builder.append(key.toString() + "\r\n");
+
+ if (!formatMessage(record).equals(""))
+ builder.append(formatMessage(record) + "\r\n");
+ return builder.toString();
+ }
+
+ public String getHead(Handler h) {
+ return super.getHead(h);
+ }
+
+ public String getTail(Handler h) {
+ return super.getTail(h);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/LogAlgorithmLogFormatter.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/LogAlgorithmLogFormatter.java
new file mode 100644
index 0000000..90eefa1
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/LogAlgorithmLogFormatter.java
@@ -0,0 +1,116 @@
+package edu.uci.ics.genomix.pregelix.log;
+
+import java.util.logging.*;
+
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.type.Message;
+import edu.uci.ics.genomix.pregelix.type.State2;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+
+public class LogAlgorithmLogFormatter extends Formatter {
+ //
+ // Create a DateFormat to format the logger timestamp.
+ //
+ private long step;
+ private KmerBytesWritable sourceVertexId = new KmerBytesWritable();
+ private KmerBytesWritable destVertexId = new KmerBytesWritable();
+ private MessageWritable msg = new MessageWritable();
+ private byte state;
+ private KmerBytesWritable mergeChain = new KmerBytesWritable();
+ //private boolean testDelete = false;
+ /**
+ * 0: general operation
+ * 1: testDelete
+ * 2: testMergeChain
+ * 3: testVoteToHalt
+ */
+ private int operation;
+
+ public LogAlgorithmLogFormatter() {
+ }
+
+ public void set(long step, KmerBytesWritable sourceVertexId, KmerBytesWritable destVertexId,
+ MessageWritable msg, byte state) {
+ this.step = step;
+ this.sourceVertexId.setAsCopy(sourceVertexId);
+ this.destVertexId.setAsCopy(destVertexId);
+ this.msg = msg;
+ this.state = state;
+ this.operation = 0;
+ }
+
+ public void setMergeChain(long step, KmerBytesWritable sourceVertexId, KmerBytesWritable mergeChain) {
+ this.reset();
+ this.step = step;
+ this.sourceVertexId.setAsCopy(sourceVertexId);
+ this.mergeChain.setAsCopy(mergeChain);
+ this.operation = 2;
+ }
+
+ public void setVotoToHalt(long step, KmerBytesWritable sourceVertexId) {
+ this.reset();
+ this.step = step;
+ this.sourceVertexId.setAsCopy(sourceVertexId);
+ this.operation = 3;
+ }
+
+ public void reset() {
+ this.sourceVertexId = new KmerBytesWritable();
+ this.destVertexId = new KmerBytesWritable();
+ this.msg = new MessageWritable();
+ this.state = 0;
+ this.mergeChain = new KmerBytesWritable();
+ }
+
+ public String format(LogRecord record) {
+ StringBuilder builder = new StringBuilder(1000);
+ String source = sourceVertexId.toString();
+ String chain = "";
+
+ builder.append("Step: " + step + "\r\n");
+ builder.append("Source Code: " + source + "\r\n");
+ if (operation == 0) {
+ if (KmerBytesWritable.getKmerLength() != -1) {
+ String dest = destVertexId.toString();
+ builder.append("Send message to " + "\r\n");
+ builder.append("Destination Code: " + dest + "\r\n");
+ }
+ builder.append("Message is: " + Message.MESSAGE_CONTENT.getContentFromCode(msg.getFlag()) + "\r\n");
+
+ if (msg.getLengthOfChain() != -1) {
+ chain = msg.getActualKmer().toString();
+ builder.append("Chain Message: " + chain + "\r\n");
+ builder.append("Chain Length: " + msg.getLengthOfChain() + "\r\n");
+ }
+
+ builder.append("State is: " + State2.STATE_CONTENT.getContentFromCode(state) + "\r\n");
+ }
+ if (operation == 2) {
+ chain = mergeChain.toString();
+ builder.append("Merge Chain: " + chain + "\r\n");
+ builder.append("Merge Chain Length: " + KmerBytesWritable.getKmerLength() + "\r\n");
+ }
+ if (operation == 3)
+ builder.append("Vote to halt!");
+ if (!formatMessage(record).equals(""))
+ builder.append(formatMessage(record) + "\r\n");
+ builder.append("\n");
+ return builder.toString();
+ }
+
+ public String getHead(Handler h) {
+ return super.getHead(h);
+ }
+
+ public String getTail(Handler h) {
+ return super.getTail(h);
+ }
+
+ public int getOperation() {
+ return operation;
+ }
+
+ public void setOperation(int operation) {
+ this.operation = operation;
+ }
+}
\ No newline at end of file
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/NaiveAlgorithmLogFormatter.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/NaiveAlgorithmLogFormatter.java
new file mode 100644
index 0000000..6b23074
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/NaiveAlgorithmLogFormatter.java
@@ -0,0 +1,47 @@
+package edu.uci.ics.genomix.pregelix.log;
+
+import java.util.logging.*;
+
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+
+public class NaiveAlgorithmLogFormatter extends Formatter {
+ //
+ // Create a DateFormat to format the logger timestamp.
+ //
+ //private static final DateFormat df = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss.SSS");
+ private long step;
+ private KmerBytesWritable sourceVertexId;
+ private KmerBytesWritable destVertexId;
+
+ public void set(long step, KmerBytesWritable sourceVertexId, KmerBytesWritable destVertexId) {
+ this.step = step;
+ this.sourceVertexId.setAsCopy(sourceVertexId);
+ this.destVertexId.setAsCopy(destVertexId);
+ }
+
+ public String format(LogRecord record) {
+ StringBuilder builder = new StringBuilder(1000);
+ String source = sourceVertexId.toString();
+
+ builder.append("Step: " + step + "\r\n");
+ builder.append("Source Code: " + source + "\r\n");
+
+ if (destVertexId != null) {
+ builder.append("Send message to " + "\r\n");
+ String dest = destVertexId.toString();
+ builder.append("Destination Code: " + dest + "\r\n");
+ }
+ if (!formatMessage(record).equals(""))
+ builder.append(formatMessage(record) + "\r\n");
+ builder.append("\n");
+ return builder.toString();
+ }
+
+ public String getHead(Handler h) {
+ return super.getHead(h);
+ }
+
+ public String getTail(Handler h) {
+ return super.getTail(h);
+ }
+}
\ No newline at end of file
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/bridgeremove/BridgeAddVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/bridgeremove/BridgeAddVertex.java
new file mode 100644
index 0000000..13a6223
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/bridgeremove/BridgeAddVertex.java
@@ -0,0 +1,128 @@
+package edu.uci.ics.genomix.pregelix.operator.bridgeremove;
+
+import java.util.Iterator;
+import org.apache.hadoop.io.NullWritable;
+
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerListWritable;
+
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.api.util.BspUtils;
+import edu.uci.ics.genomix.pregelix.client.Client;
+import edu.uci.ics.genomix.pregelix.format.GraphCleanInputFormat;
+import edu.uci.ics.genomix.pregelix.format.GraphCleanOutputFormat;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+
+/*
+ * vertexId: BytesWritable
+ * vertexValue: ByteWritable
+ * edgeValue: NullWritable
+ * message: MessageWritable
+ *
+ * DNA:
+ * A: 00
+ * C: 01
+ * G: 10
+ * T: 11
+ *
+ * succeed node
+ * A 00000001 1
+ * G 00000010 2
+ * C 00000100 4
+ * T 00001000 8
+ * precursor node
+ * A 00010000 16
+ * G 00100000 32
+ * C 01000000 64
+ * T 10000000 128
+ *
+ * For example, ONE LINE in input file: 00,01,10 0001,0010,
+ * That means that vertexId is ACG, its succeed node is A and its precursor node is C.
+ * The succeed node and precursor node will be stored in vertexValue and we don't use edgeValue.
+ * The details about message are in edu.uci.ics.pregelix.example.io.MessageWritable.
+ */
+/**
+ * Naive Algorithm for path merge graph
+ */
+public class BridgeAddVertex extends
+ Vertex<VKmerBytesWritable, VertexValueWritable, NullWritable, MessageWritable> {
+ public static final String KMER_SIZE = "BridgeRemoveVertex.kmerSize"; // TODO consolidate config options
+ public static final String LENGTH = "BridgeRemoveVertex.length";
+ public static int kmerSize = -1;
+ private int length = -1;
+
+ /**
+ * initiate kmerSize, maxIteration
+ */
+ public void initVertex() {
+ if (kmerSize == -1) {
+ kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
+ KmerBytesWritable.setGlobalKmerLength(kmerSize);
+ }
+ if (length == -1)
+ length = getContext().getConfiguration().getInt(LENGTH, kmerSize + 5); // TODO fail on parse
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void compute(Iterator<MessageWritable> msgIterator) {
+ initVertex();
+ if(getSuperstep() == 1){
+ if(getVertexId().toString().equals("ATA")){
+ VKmerBytesWritable vertexId = new VKmerBytesWritable(kmerSize);
+ vertexId.setByRead(kmerSize, "GTA".getBytes(), 0);
+ getVertexValue().getFRList().append(vertexId);
+
+ //add bridge vertex
+ @SuppressWarnings("rawtypes")
+ Vertex vertex = (Vertex) BspUtils.createVertex(getContext().getConfiguration());
+ vertex.getMsgList().clear();
+ vertex.getEdges().clear();
+ VertexValueWritable vertexValue = new VertexValueWritable(kmerSize);
+ /**
+ * set the src vertex id
+ */
+ vertex.setVertexId(vertexId);
+ /**
+ * set the vertex value
+ */
+ VKmerListWritable kmerFRList = new VKmerListWritable();
+
+ kmerFRList.append(getVertexId());
+ vertexValue.setFRList(kmerFRList);
+ VKmerBytesWritable otherVertexId = new VKmerBytesWritable(kmerSize);
+ otherVertexId.setByRead(kmerSize, "ACG".getBytes(), 0);
+ VKmerListWritable kmerRFList = new VKmerListWritable();
+ kmerRFList.append(otherVertexId);
+ vertexValue.setRFList(kmerRFList);
+ vertexValue.setActualKmer(vertexId);
+ vertex.setVertexValue(vertexValue);
+
+ addVertex(vertexId, vertex);
+ }
+ else if(getVertexId().toString().equals("ACG")){
+ VKmerBytesWritable brdgeVertexId = new VKmerBytesWritable(kmerSize);
+ brdgeVertexId.setByRead(kmerSize, "GTA".getBytes(), 0);
+ getVertexValue().getRFList().append(brdgeVertexId);
+ }
+ }
+ voteToHalt();
+ }
+
+ public static void main(String[] args) throws Exception {
+ PregelixJob job = new PregelixJob(BridgeAddVertex.class.getSimpleName());
+ job.setVertexClass(BridgeAddVertex.class);
+ /**
+ * BinaryInput and BinaryOutput
+ */
+ job.setVertexInputFormatClass(GraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(KmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ Client.run(args, job);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/bridgeremove/BridgeRemoveVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/bridgeremove/BridgeRemoveVertex.java
new file mode 100644
index 0000000..50b1518
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/bridgeremove/BridgeRemoveVertex.java
@@ -0,0 +1,122 @@
+package edu.uci.ics.genomix.pregelix.operator.bridgeremove;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.genomix.type.PositionWritable;
+import edu.uci.ics.genomix.pregelix.client.Client;
+import edu.uci.ics.genomix.pregelix.format.GraphCleanInputFormat;
+import edu.uci.ics.genomix.pregelix.format.GraphCleanOutputFormat;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.pregelix.operator.pathmerge.BasicGraphCleanVertex;
+import edu.uci.ics.genomix.pregelix.util.VertexUtil;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+/*
+ * vertexId: BytesWritable
+ * vertexValue: ByteWritable
+ * edgeValue: NullWritable
+ * message: MessageWritable
+ *
+ * DNA:
+ * A: 00
+ * C: 01
+ * G: 10
+ * T: 11
+ *
+ * succeed node
+ * A 00000001 1
+ * G 00000010 2
+ * C 00000100 4
+ * T 00001000 8
+ * precursor node
+ * A 00010000 16
+ * G 00100000 32
+ * C 01000000 64
+ * T 10000000 128
+ *
+ * For example, ONE LINE in input file: 00,01,10 0001,0010,
+ * That means that vertexId is ACG, its succeed node is A and its precursor node is C.
+ * The succeed node and precursor node will be stored in vertexValue and we don't use edgeValue.
+ * The details about message are in edu.uci.ics.pregelix.example.io.MessageWritable.
+ */
+/**
+ * Naive Algorithm for path merge graph
+ */
+public class BridgeRemoveVertex extends
+ BasicGraphCleanVertex {
+ public static final String LENGTH = "BridgeRemoveVertex.length";
+ private int length = -1;
+
+ private ArrayList<MessageWritable> receivedMsgList = new ArrayList<MessageWritable>();
+
+ /**
+ * initiate kmerSize, maxIteration
+ */
+ public void initVertex() {
+ if (kmerSize == -1)
+ kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
+ if(length == -1)
+ length = getContext().getConfiguration().getInt(LENGTH, kmerSize);
+ if(incomingMsg == null)
+ incomingMsg = new MessageWritable(kmerSize);
+ if(outgoingMsg == null)
+ outgoingMsg = new MessageWritable(kmerSize);
+ else
+ outgoingMsg.reset(kmerSize);
+ if(destVertexId == null)
+ destVertexId = new VKmerBytesWritable(kmerSize);
+ receivedMsgList.clear();
+ }
+
+ @Override
+ public void compute(Iterator<MessageWritable> msgIterator) {
+ initVertex();
+ if (getSuperstep() == 1) {
+ if(VertexUtil.isUpBridgeVertex(getVertexValue())){
+ sendSettledMsgToAllNextNodes(getVertexValue());
+ }
+ else if(VertexUtil.isDownBridgeVertex(getVertexValue())){
+ sendSettledMsgToAllPreviousNodes(getVertexValue());
+ }
+ }
+ else if (getSuperstep() == 2){
+ int i = 0;
+ while (msgIterator.hasNext()) {
+ if(i == 3)
+ break;
+ receivedMsgList.add(msgIterator.next());
+ i++;
+ }
+ if(receivedMsgList.size() == 2){
+ if(getVertexValue().getLengthOfKmer() <= length
+ && getVertexValue().getDegree() == 2){
+ broadcaseKillself();
+ }
+ }
+ }
+ else if(getSuperstep() == 3){
+ while(msgIterator.hasNext()){
+ incomingMsg = msgIterator.next();
+ responseToDeadVertex();
+ }
+ }
+ voteToHalt();
+ }
+
+ public static void main(String[] args) throws Exception {
+ PregelixJob job = new PregelixJob(BridgeRemoveVertex.class.getSimpleName());
+ job.setVertexClass(BridgeRemoveVertex.class);
+ /**
+ * BinaryInput and BinaryOutput
+ */
+ job.setVertexInputFormatClass(GraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(PositionWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ Client.run(args, job);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/bubblemerge/BubbleAddVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/bubblemerge/BubbleAddVertex.java
new file mode 100644
index 0000000..aca9d95
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/bubblemerge/BubbleAddVertex.java
@@ -0,0 +1,122 @@
+package edu.uci.ics.genomix.pregelix.operator.bubblemerge;
+
+import java.util.Iterator;
+import org.apache.hadoop.io.NullWritable;
+
+import edu.uci.ics.genomix.type.VKmerListWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.api.util.BspUtils;
+import edu.uci.ics.genomix.pregelix.client.Client;
+import edu.uci.ics.genomix.pregelix.format.GraphCleanInputFormat;
+import edu.uci.ics.genomix.pregelix.format.GraphCleanOutputFormat;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+
+/*
+ * vertexId: BytesWritable
+ * vertexValue: ByteWritable
+ * edgeValue: NullWritable
+ * message: MessageWritable
+ *
+ * DNA:
+ * A: 00
+ * C: 01
+ * G: 10
+ * T: 11
+ *
+ * succeed node
+ * A 00000001 1
+ * G 00000010 2
+ * C 00000100 4
+ * T 00001000 8
+ * precursor node
+ * A 00010000 16
+ * G 00100000 32
+ * C 01000000 64
+ * T 10000000 128
+ *
+ * For example, ONE LINE in input file: 00,01,10 0001,0010,
+ * That means that vertexId is ACG, its succeed node is A and its precursor node is C.
+ * The succeed node and precursor node will be stored in vertexValue and we don't use edgeValue.
+ * The details about message are in edu.uci.ics.pregelix.example.io.MessageWritable.
+ */
+/**
+ * Remove tip or single node when l > constant
+ */
+public class BubbleAddVertex extends
+ Vertex<VKmerBytesWritable, VertexValueWritable, NullWritable, MessageWritable> {
+ public static final String KMER_SIZE = "BubbleAddVertex.kmerSize";
+ public static int kmerSize = -1;
+
+ /**
+ * initiate kmerSize, length
+ */
+ public void initVertex() {
+ if (kmerSize == -1)
+ kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void compute(Iterator<MessageWritable> msgIterator) {
+ initVertex();
+ if(getSuperstep() == 1){
+ if(getVertexId().toString().equals("ATA")){
+ VKmerBytesWritable vertexId = new VKmerBytesWritable();
+ vertexId.setByRead(kmerSize, "GTA".getBytes(), 0);
+ getVertexValue().getFRList().append(vertexId);
+
+ //add bridge vertex
+ @SuppressWarnings("rawtypes")
+ Vertex vertex = (Vertex) BspUtils.createVertex(getContext().getConfiguration());
+ vertex.getMsgList().clear();
+ vertex.getEdges().clear();
+ VertexValueWritable vertexValue = new VertexValueWritable(kmerSize);
+ /**
+ * set the src vertex id
+ */
+ vertex.setVertexId(vertexId);
+ /**
+ * set the vertex value
+ */
+ VKmerListWritable kmerFRList = new VKmerListWritable();
+
+ kmerFRList.append(getVertexId());
+ vertexValue.setFRList(kmerFRList);
+ VKmerBytesWritable otherVertexId = new VKmerBytesWritable();
+ otherVertexId.setByRead(kmerSize, "AGA".getBytes(), 0);
+ VKmerListWritable kmerRFList = new VKmerListWritable();
+ kmerRFList.append(otherVertexId);
+ vertexValue.setRFList(kmerRFList);
+ vertexValue.setActualKmer(vertexId);
+
+ vertex.setVertexValue(vertexValue);
+
+ addVertex(vertexId, vertex);
+ }
+ else if(getVertexId().toString().equals("AGA")){
+ VKmerBytesWritable brdgeVertexId = new VKmerBytesWritable();
+ brdgeVertexId.setByRead(kmerSize, "GTA".getBytes(), 0);
+ getVertexValue().getRFList().append(brdgeVertexId);
+ }
+ }
+ voteToHalt();
+ }
+
+ public static void main(String[] args) throws Exception {
+ PregelixJob job = new PregelixJob(BubbleAddVertex.class.getSimpleName());
+ job.setVertexClass(BubbleAddVertex.class);
+ /**
+ * BinaryInput and BinaryOutput
+ */
+ job.setVertexInputFormatClass(GraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ Client.run(args, job);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/bubblemerge/BubbleMergeVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/bubblemerge/BubbleMergeVertex.java
new file mode 100644
index 0000000..c97244a
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/bubblemerge/BubbleMergeVertex.java
@@ -0,0 +1,212 @@
+package edu.uci.ics.genomix.pregelix.operator.bubblemerge;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.genomix.pregelix.client.Client;
+import edu.uci.ics.genomix.pregelix.format.GraphCleanInputFormat;
+import edu.uci.ics.genomix.pregelix.format.GraphCleanOutputFormat;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.pregelix.operator.pathmerge.BasicGraphCleanVertex;
+import edu.uci.ics.genomix.pregelix.type.MessageFlag;
+import edu.uci.ics.genomix.pregelix.util.VertexUtil;
+
+/**
+ * Naive Algorithm for path merge graph
+ */
+public class BubbleMergeVertex extends
+ BasicGraphCleanVertex {
+ public static final String DISSIMILARITY_THRESHOLD = "BubbleMergeVertex.dissimilarThreshold";
+ private float dissimilarThreshold = -1;
+
+ private Map<VKmerBytesWritable, ArrayList<MessageWritable>> receivedMsgMap = new HashMap<VKmerBytesWritable, ArrayList<MessageWritable>>();
+ private ArrayList<MessageWritable> receivedMsgList = new ArrayList<MessageWritable>();
+ private Set<MessageWritable> unchangedSet = new HashSet<MessageWritable>();
+ private Set<MessageWritable> deletedSet = new HashSet<MessageWritable>();
+
+ /**
+ * initiate kmerSize, maxIteration
+ */
+ public void initVertex() {
+ if (kmerSize == -1)
+ kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
+ if (maxIteration < 0)
+ maxIteration = getContext().getConfiguration().getInt(ITERATIONS, 1000000);
+ if(dissimilarThreshold == -1)
+ dissimilarThreshold = getContext().getConfiguration().getFloat(DISSIMILARITY_THRESHOLD, (float) 0.05);
+ if(incomingMsg == null)
+ incomingMsg = new MessageWritable();
+ if(outgoingMsg == null)
+ outgoingMsg = new MessageWritable();
+ else
+ outgoingMsg.reset(kmerSize);
+ if(destVertexId == null)
+ destVertexId = new VKmerBytesWritable();
+ outFlag = 0;
+ }
+
+ public void sendBubbleAndMajorVertexMsgToMinorVertex(){
+ byte meToNeighborDir = (byte) (incomingMsg.getFlag() & MessageFlag.DIR_MASK);
+ byte neighborToMeDir = mirrorDirection(meToNeighborDir);
+ switch(neighborToMeDir){
+ case MessageFlag.DIR_RF:
+ case MessageFlag.DIR_RR:
+ if(hasNextDest(getVertexValue())){
+ outgoingMsg.setStartVertexId(incomingMsg.getSourceVertexId());
+ outgoingMsg.setSourceVertexId(getVertexId());
+ outgoingMsg.setActualKmer(getVertexValue().getActualKmer());
+ destVertexId.setAsCopy(getNextDestVertexId(getVertexValue()));
+ sendMsg(destVertexId, outgoingMsg);
+ }
+ break;
+ case MessageFlag.DIR_FF:
+ case MessageFlag.DIR_FR:
+ if(hasPrevDest(getVertexValue())){
+ outgoingMsg.setStartVertexId(incomingMsg.getSourceVertexId());
+ outgoingMsg.setSourceVertexId(getVertexId());
+ outgoingMsg.setActualKmer(getVertexValue().getActualKmer());
+ destVertexId.setAsCopy(getPrevDestVertexId(getVertexValue()));
+ sendMsg(destVertexId, outgoingMsg);
+ }
+ break;
+ }
+ }
+
+ @SuppressWarnings({ "unchecked" })
+ public void aggregateBubbleNodesByMajorNode(Iterator<MessageWritable> msgIterator){
+ while (msgIterator.hasNext()) {
+ incomingMsg = msgIterator.next();
+ if(!receivedMsgMap.containsKey(incomingMsg.getStartVertexId())){
+ receivedMsgList.clear();
+ receivedMsgList.add(incomingMsg);
+ receivedMsgMap.put(incomingMsg.getStartVertexId(), (ArrayList<MessageWritable>)receivedMsgList.clone());
+ }
+ else{
+ receivedMsgList.clear();
+ receivedMsgList.addAll(receivedMsgMap.get(incomingMsg.getStartVertexId()));
+ receivedMsgList.add(incomingMsg);
+ receivedMsgMap.put(incomingMsg.getStartVertexId(), (ArrayList<MessageWritable>)receivedMsgList.clone());
+ }
+ }
+ }
+
+ public void processSimilarSetToUnchangeSetAndDeletedSet(){
+ unchangedSet.clear();
+ deletedSet.clear();
+ MessageWritable topCoverageMessage = new MessageWritable();
+ MessageWritable tmpMessage = new MessageWritable();
+ Iterator<MessageWritable> it;
+ while(!receivedMsgList.isEmpty()){
+ it = receivedMsgList.iterator();
+ topCoverageMessage.set(it.next());
+ it.remove(); //delete topCoverage node
+ while(it.hasNext()){
+ tmpMessage.set(it.next());
+ //compute the similarity
+ float fracDissimilar = topCoverageMessage.getSourceVertexId().fracDissimilar(tmpMessage.getSourceVertexId());
+ if(fracDissimilar < dissimilarThreshold){ //If similar with top node, delete this node and put it in deletedSet
+ //add coverage to top node
+ topCoverageMessage.mergeCoverage(tmpMessage);
+ deletedSet.add(tmpMessage);
+ it.remove();
+ }
+ }
+ unchangedSet.add(topCoverageMessage);
+ }
+ }
+
+ public void processUnchangedSet(){
+ for(MessageWritable msg : unchangedSet){
+ outFlag = MessageFlag.UNCHANGE;
+ outgoingMsg.setFlag(outFlag);
+ outgoingMsg.setAverageCoverage(msg.getAverageCoverage());
+ sendMsg(msg.getSourceVertexId(), outgoingMsg);
+ }
+ }
+
+ public void processDeletedSet(){
+ for(MessageWritable msg : deletedSet){
+ outFlag = MessageFlag.KILL;
+ outgoingMsg.setFlag(outFlag);
+ sendMsg(msg.getSourceVertexId(), outgoingMsg);
+ }
+ }
+
+ @Override
+ public void compute(Iterator<MessageWritable> msgIterator) {
+ initVertex();
+ if (getSuperstep() == 1) {
+ if(VertexUtil.isHeadVertexWithIndegree(getVertexValue())
+ || VertexUtil.isHeadWithoutIndegree(getVertexValue())){
+ sendSettledMsgToAllNextNodes(getVertexValue());
+ }
+ } else if (getSuperstep() == 2){
+ while (msgIterator.hasNext()) {
+ incomingMsg = msgIterator.next();
+ if(VertexUtil.isPathVertex(getVertexValue())){
+ /** send bubble and major vertex msg to minor vertex **/
+ sendBubbleAndMajorVertexMsgToMinorVertex();
+ }
+ }
+ } else if (getSuperstep() == 3){
+ /** aggregate bubble nodes and grouped by major vertex **/
+ aggregateBubbleNodesByMajorNode(msgIterator);
+
+
+ for(VKmerBytesWritable prevId : receivedMsgMap.keySet()){
+ if(receivedMsgList.size() > 1){ // filter bubble
+ /** for each startVertex, sort the node by decreasing order of coverage **/
+ receivedMsgList = receivedMsgMap.get(prevId);
+ Collections.sort(receivedMsgList, new MessageWritable.SortByCoverage());
+
+ /** process similarSet, keep the unchanged set and deleted set & add coverage to unchange node **/
+ processSimilarSetToUnchangeSetAndDeletedSet();
+
+ /** send message to the unchanged set for updating coverage & send kill message to the deleted set **/
+ processUnchangedSet();
+ processDeletedSet();
+ }
+ }
+ } else if (getSuperstep() == 4){
+ if(msgIterator.hasNext()) {
+ incomingMsg = msgIterator.next();
+ if(incomingMsg.getFlag() == MessageFlag.KILL){
+ broadcaseKillself();
+ } else if(incomingMsg.getFlag() == MessageFlag.UNCHANGE){
+ /** update average coverage **/
+ getVertexValue().setAverageCoverage(incomingMsg.getAverageCoverage());
+ }
+ }
+ } else if(getSuperstep() == 5){
+ if(msgIterator.hasNext()) {
+ incomingMsg = msgIterator.next();
+ if(isResponseKillMsg()){
+ responseToDeadVertex();
+ }
+ }
+ }
+ voteToHalt();
+ }
+
+ public static void main(String[] args) throws Exception {
+ PregelixJob job = new PregelixJob(BubbleMergeVertex.class.getSimpleName());
+ job.setVertexClass(BubbleMergeVertex.class);
+ /**
+ * BinaryInput and BinaryOutput
+ */
+ job.setVertexInputFormatClass(GraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ Client.run(args, job);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/BasicGraphCleanVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/BasicGraphCleanVertex.java
new file mode 100644
index 0000000..4936d17
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/BasicGraphCleanVertex.java
@@ -0,0 +1,832 @@
+package edu.uci.ics.genomix.pregelix.operator.pathmerge;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.NullWritable;
+
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable.State;
+import edu.uci.ics.genomix.pregelix.type.MessageFlag;
+import edu.uci.ics.genomix.pregelix.util.VertexUtil;
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+/**
+ * Naive Algorithm for path merge graph
+ */
+public class BasicGraphCleanVertex extends
+ Vertex<VKmerBytesWritable, VertexValueWritable, NullWritable, MessageWritable> {
+ public static final String KMER_SIZE = "BasicGraphCleanVertex.kmerSize";
+ public static final String ITERATIONS = "BasicGraphCleanVertex.iteration";
+ public static int kmerSize = -1;
+ public static int maxIteration = -1;
+
+ protected MessageWritable incomingMsg = null;
+ protected MessageWritable outgoingMsg = null;
+ protected VKmerBytesWritable destVertexId = null;
+ protected Iterator<VKmerBytesWritable> kmerIterator;
+ protected VKmerBytesWritable tmpKmer = null;
+ protected byte headFlag;
+ protected byte outFlag;
+ protected byte inFlag;
+ protected byte selfFlag;
+
+ /**
+ * initiate kmerSize, maxIteration
+ */
+ public void initVertex() {
+ }
+
+ /**
+ * reset headFlag
+ */
+ public void resetHeadFlag(){
+ headFlag = (byte)(getVertexValue().getState() & State.IS_HEAD);
+ }
+
+ public byte getHeadFlag(){
+ return (byte)(getVertexValue().getState() & State.IS_HEAD);
+ }
+
+ /**
+ * check if prev/next destination exists
+ */
+ public boolean hasNextDest(VertexValueWritable value){
+ return value.getFFList().getCountOfPosition() > 0 || value.getFRList().getCountOfPosition() > 0;
+ }
+
+ public boolean hasPrevDest(VertexValueWritable value){
+ return value.getRFList().getCountOfPosition() > 0 || value.getRRList().getCountOfPosition() > 0;
+ }
+
+ /**
+ * get destination vertex
+ */
+ public VKmerBytesWritable getNextDestVertexId(VertexValueWritable value) {
+ if (value.getFFList().getCountOfPosition() > 0){ //#FFList() > 0
+ kmerIterator = value.getFFList().iterator();
+ return kmerIterator.next();
+ } else if (value.getFRList().getCountOfPosition() > 0){ //#FRList() > 0
+ kmerIterator = value.getFRList().iterator();
+ return kmerIterator.next();
+ } else {
+ return null;
+ }
+ }
+
+ public VKmerBytesWritable getPrevDestVertexId(VertexValueWritable value) {
+ if (value.getRFList().getCountOfPosition() > 0){ //#RFList() > 0
+ kmerIterator = value.getRFList().iterator();
+ return kmerIterator.next();
+ } else if (value.getRRList().getCountOfPosition() > 0){ //#RRList() > 0
+ kmerIterator = value.getRRList().iterator();
+ return kmerIterator.next();
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * get destination vertex
+ */
+ public VKmerBytesWritable getNextDestVertexIdAndSetFlag(VertexValueWritable value) {
+ if (value.getFFList().getCountOfPosition() > 0){ // #FFList() > 0
+ kmerIterator = value.getFFList().iterator();
+ outFlag &= MessageFlag.DIR_CLEAR;
+ outFlag |= MessageFlag.DIR_FF;
+ return kmerIterator.next();
+ } else if (value.getFRList().getCountOfPosition() > 0){ // #FRList() > 0
+ kmerIterator = value.getFRList().iterator();
+ outFlag &= MessageFlag.DIR_CLEAR;
+ outFlag |= MessageFlag.DIR_FR;
+ return kmerIterator.next();
+ } else {
+ return null;
+ }
+
+ }
+
+ public VKmerBytesWritable getPrevDestVertexIdAndSetFlag(VertexValueWritable value) {
+ if (value.getRFList().getCountOfPosition() > 0){ // #RFList() > 0
+ kmerIterator = value.getRFList().iterator();
+ outFlag &= MessageFlag.DIR_CLEAR;
+ outFlag |= MessageFlag.DIR_RF;
+ return kmerIterator.next();
+ } else if (value.getRRList().getCountOfPosition() > 0){ // #RRList() > 0
+ kmerIterator = value.getRRList().iterator();
+ outFlag &= MessageFlag.DIR_CLEAR;
+ outFlag |= MessageFlag.DIR_RR;
+ return kmerIterator.next();
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * head send message to all previous nodes
+ */
+ public void sendMsgToAllPreviousNodes(VertexValueWritable value) {
+ kmerIterator = value.getRFList().iterator(); // RFList
+ while(kmerIterator.hasNext()){
+ destVertexId.setAsCopy(kmerIterator.next());
+ sendMsg(destVertexId, outgoingMsg);
+ }
+ kmerIterator = value.getRRList().iterator(); // RRList
+ while(kmerIterator.hasNext()){
+ destVertexId.setAsCopy(kmerIterator.next());
+ sendMsg(destVertexId, outgoingMsg);
+ }
+ }
+
+ /**
+ * head send message to all next nodes
+ */
+ public void sendMsgToAllNextNodes(VertexValueWritable value) {
+ kmerIterator = value.getFFList().iterator(); // FFList
+ while(kmerIterator.hasNext()){
+ destVertexId.setAsCopy(kmerIterator.next());
+ sendMsg(destVertexId, outgoingMsg);
+ }
+ kmerIterator = value.getFRList().iterator(); // FRList
+ while(kmerIterator.hasNext()){
+ destVertexId.setAsCopy(kmerIterator.next());
+ sendMsg(destVertexId, outgoingMsg);
+ }
+ }
+
+ /**
+ * one vertex send message to previous and next vertices (neighbor)
+ */
+ public void sendMsgToAllNeighborNodes(VertexValueWritable value){
+ sendMsgToAllNextNodes(value);
+ sendMsgToAllPreviousNodes(value);
+ }
+
+ /**
+ * tip send message with sourceId and dir to previous node
+ * tip only has one incoming
+ */
+ public void sendSettledMsgToPreviousNode(){
+ if(getVertexValue().getFFList().getCountOfPosition() > 0)
+ outgoingMsg.setFlag(MessageFlag.DIR_FF);
+ else if(getVertexValue().getFRList().getCountOfPosition() > 0)
+ outgoingMsg.setFlag(MessageFlag.DIR_FR);
+ outgoingMsg.setSourceVertexId(getVertexId());
+ destVertexId.setAsCopy(getNextDestVertexId(getVertexValue()));
+ sendMsg(destVertexId, outgoingMsg);
+ }
+
+ /**
+ * tip send message with sourceId and dir to next node
+ * tip only has one outgoing
+ */
+ public void sendSettledMsgToNextNode(){
+ if(getVertexValue().getRFList().getCountOfPosition() > 0)
+ outgoingMsg.setFlag(MessageFlag.DIR_RF);
+ else if(getVertexValue().getRRList().getCountOfPosition() > 0)
+ outgoingMsg.setFlag(MessageFlag.DIR_RR);
+ outgoingMsg.setSourceVertexId(getVertexId());
+ destVertexId.setAsCopy(getPrevDestVertexId(getVertexValue()));
+ sendMsg(destVertexId, outgoingMsg);
+ }
+
+ /**
+ * head send message to all previous nodes
+ */
+ public void sendSettledMsgToAllPreviousNodes(VertexValueWritable value) {
+ kmerIterator = value.getRFList().iterator(); // RFList
+ while(kmerIterator.hasNext()){
+ outgoingMsg.setFlag(MessageFlag.DIR_RF);
+ outgoingMsg.setSourceVertexId(getVertexId());
+ destVertexId.setAsCopy(kmerIterator.next());
+ sendMsg(destVertexId, outgoingMsg);
+ }
+ kmerIterator = value.getRRList().iterator(); // RRList
+ while(kmerIterator.hasNext()){
+ outgoingMsg.setFlag(MessageFlag.DIR_RR);
+ outgoingMsg.setSourceVertexId(getVertexId());
+ destVertexId.setAsCopy(kmerIterator.next());
+ sendMsg(destVertexId, outgoingMsg);
+ }
+ }
+
+ /**
+ * head send message to all next nodes
+ */
+ public void sendSettledMsgToAllNextNodes(VertexValueWritable value) {
+ kmerIterator = value.getFFList().iterator(); // FFList
+ while(kmerIterator.hasNext()){
+ outgoingMsg.setFlag(MessageFlag.DIR_FF);
+ outgoingMsg.setSourceVertexId(getVertexId());
+ destVertexId.setAsCopy(kmerIterator.next());
+ sendMsg(destVertexId, outgoingMsg);
+ }
+ kmerIterator = value.getFRList().iterator(); // FRList
+ while(kmerIterator.hasNext()){
+ outgoingMsg.setFlag(MessageFlag.DIR_FR);
+ outgoingMsg.setSourceVertexId(getVertexId());
+ destVertexId.setAsCopy(kmerIterator.next());
+ sendMsg(destVertexId, outgoingMsg);
+ }
+ }
+
+ /**
+ * start sending message
+ */
+ public void startSendMsg() {
+ if (VertexUtil.isHeadVertexWithIndegree(getVertexValue())) {
+ outgoingMsg.setFlag((byte)(MessageFlag.IS_HEAD));//| MessageFlag.HEAD_SHOULD_MERGEWITHNEXT
+ sendMsgToAllNextNodes(getVertexValue());
+ voteToHalt();
+ }
+ if (VertexUtil.isRearVertexWithOutdegree(getVertexValue())) {
+ outgoingMsg.setFlag((byte)(MessageFlag.IS_HEAD));//| MessageFlag.HEAD_SHOULD_MERGEWITHPREV
+ sendMsgToAllPreviousNodes(getVertexValue());
+ voteToHalt();
+ }
+ if (VertexUtil.isHeadWithoutIndegree(getVertexValue())){
+ outgoingMsg.setFlag((byte)(MessageFlag.IS_HEAD));//| MessageFlag.HEAD_SHOULD_MERGEWITHNEXT
+ sendMsg(getVertexId(), outgoingMsg); //send to itself
+ voteToHalt();
+ }
+ if (VertexUtil.isRearWithoutOutdegree(getVertexValue())){
+ outgoingMsg.setFlag((byte)(MessageFlag.IS_HEAD));//| MessageFlag.HEAD_SHOULD_MERGEWITHPREV
+ sendMsg(getVertexId(), outgoingMsg); //send to itself
+ voteToHalt();
+ }
+ }
+
+ /**
+ * initiate head, rear and path node
+ */
+ public void initState(Iterator<MessageWritable> msgIterator) {
+ while (msgIterator.hasNext()) {
+ if (!VertexUtil.isPathVertex(getVertexValue())
+ && !VertexUtil.isHeadWithoutIndegree(getVertexValue())
+ && !VertexUtil.isRearWithoutOutdegree(getVertexValue())) {
+ msgIterator.next();
+ voteToHalt();
+ } else {
+ incomingMsg = msgIterator.next();
+ if(getHeadFlag() > 0)
+ voteToHalt();
+ else
+ getVertexValue().setState(incomingMsg.getFlag());
+ }
+ }
+ }
+
+ /**
+ * check if A need to be flipped with successor
+ */
+ public boolean ifFilpWithSuccessor(){
+ if(getVertexValue().getFRList().getCountOfPosition() > 0)
+ return true;
+ else
+ return false;
+ }
+
+ /**
+ * check if A need to be filpped with predecessor
+ */
+ public boolean ifFlipWithPredecessor(){
+ if(getVertexValue().getRFList().getCountOfPosition() > 0)
+ return true;
+ else
+ return false;
+ }
+
+ /**
+ * set adjMessage to successor(from predecessor)
+ */
+ public void setSuccessorAdjMsg(){
+ outFlag &= MessageFlag.DIR_CLEAR;
+ if(getVertexValue().getFFList().getCountOfPosition() > 0)
+ outFlag |= MessageFlag.DIR_FF;
+ else if(getVertexValue().getFRList().getCountOfPosition() > 0)
+ outFlag |= MessageFlag.DIR_FR;
+ else
+ outFlag |= MessageFlag.DIR_NO;
+ }
+
+ /**
+ * set adjMessage to predecessor(from successor)
+ */
+ public void setPredecessorAdjMsg(){
+ outFlag &= MessageFlag.DIR_CLEAR;
+ if(getVertexValue().getRFList().getCountOfPosition() > 0)
+ outFlag |= MessageFlag.DIR_RF;
+ else if(getVertexValue().getRRList().getCountOfPosition() > 0)
+ outFlag |= MessageFlag.DIR_RR;
+ else
+ outFlag |= MessageFlag.DIR_NO;
+ }
+
+ /**
+ * send update message to neighber
+ * @throws IOException
+ */
+ public void broadcastUpdateMsg(){
+ if((getVertexValue().getState() & State.IS_HEAD) > 0)
+ outFlag |= MessageFlag.IS_HEAD;
+ switch(getVertexValue().getState() & State.SHOULD_MERGE_MASK){
+ case State.SHOULD_MERGEWITHPREV:
+ setSuccessorAdjMsg();
+ if(ifFlipWithPredecessor())
+ outgoingMsg.setFlip(true);
+ outgoingMsg.setFlag(outFlag);
+ outgoingMsg.setNeighberNode(getVertexValue().getIncomingList());
+ outgoingMsg.setSourceVertexId(getVertexId());
+ if(getNextDestVertexId(getVertexValue()) != null)
+ sendMsg(getNextDestVertexId(getVertexValue()), outgoingMsg);
+ break;
+ case State.SHOULD_MERGEWITHNEXT:
+ setPredecessorAdjMsg();
+ if(ifFilpWithSuccessor())
+ outgoingMsg.setFlip(true);
+ outgoingMsg.setFlag(outFlag);
+ outgoingMsg.setNeighberNode(getVertexValue().getOutgoingList());
+ outgoingMsg.setSourceVertexId(getVertexId());
+ if(getPrevDestVertexId(getVertexValue()) != null)
+ sendMsg(getPrevDestVertexId(getVertexValue()), outgoingMsg);
+ break;
+ }
+ }
+
+ /**
+ * send update message to neighber for P2
+ * @throws IOException
+ */
+ public void sendUpdateMsg(){
+ outgoingMsg.setUpdateMsg(true);
+ byte meToNeighborDir = (byte) (incomingMsg.getFlag() & MessageFlag.DIR_MASK);
+ byte neighborToMeDir = mirrorDirection(meToNeighborDir);
+ switch(neighborToMeDir){
+ case MessageFlag.DIR_FF:
+ case MessageFlag.DIR_FR:
+ sendUpdateMsgToPredecessor();
+ break;
+ case MessageFlag.DIR_RF:
+ case MessageFlag.DIR_RR:
+ sendUpdateMsgToSuccessor();
+ break;
+ }
+ }
+
+ /**
+ * send merge message to neighber for P2
+ * @throws IOException
+ */
+ public void sendMergeMsg(){
+ outgoingMsg.setUpdateMsg(false);
+ if(selfFlag == State.IS_HEAD){
+ byte newState = getVertexValue().getState();
+ newState &= ~State.IS_HEAD;
+ newState |= State.IS_OLDHEAD;
+ getVertexValue().setState(newState);
+ resetSelfFlag();
+ outFlag |= MessageFlag.IS_HEAD;
+ } else if(selfFlag == State.IS_OLDHEAD){
+ outFlag |= MessageFlag.IS_OLDHEAD;
+ voteToHalt();
+ }
+ byte meToNeighborDir = (byte) (incomingMsg.getFlag() & MessageFlag.DIR_MASK);
+ byte neighborToMeDir = mirrorDirection(meToNeighborDir);
+ switch(neighborToMeDir){
+ case MessageFlag.DIR_FF:
+ case MessageFlag.DIR_FR:
+ setSuccessorAdjMsg();
+ if(ifFlipWithPredecessor())
+ outgoingMsg.setFlip(true);
+ else
+ outgoingMsg.setFlip(false);
+ outgoingMsg.setFlag(outFlag);
+ outgoingMsg.setNeighberNode(getVertexValue().getIncomingList());
+ outgoingMsg.setSourceVertexId(getVertexId());
+ outgoingMsg.setActualKmer(getVertexValue().getActualKmer());
+ sendMsg(incomingMsg.getSourceVertexId(), outgoingMsg); //getNextDestVertexId(getVertexValue())
+ break;
+ case MessageFlag.DIR_RF:
+ case MessageFlag.DIR_RR:
+ setPredecessorAdjMsg();
+ if(ifFilpWithSuccessor())
+ outgoingMsg.setFlip(true);
+ else
+ outgoingMsg.setFlip(false);
+ outgoingMsg.setFlag(outFlag);
+ outgoingMsg.setNeighberNode(getVertexValue().getOutgoingList());
+ outgoingMsg.setSourceVertexId(getVertexId());
+ outgoingMsg.setActualKmer(getVertexValue().getActualKmer());
+ sendMsg(incomingMsg.getSourceVertexId(), outgoingMsg); //getPreDestVertexId(getVertexValue())
+ break;
+ }
+ }
+
+ /**
+ * send final merge message to neighber for P2
+ * @throws IOException
+ */
+ public void sendFinalMergeMsg(){
+ outFlag |= MessageFlag.IS_FINAL;
+ byte meToNeighborDir = (byte) (incomingMsg.getFlag() & MessageFlag.DIR_MASK);
+ byte neighborToMeDir = mirrorDirection(meToNeighborDir);
+ switch(neighborToMeDir){
+ case MessageFlag.DIR_FF:
+ case MessageFlag.DIR_FR:
+ setSuccessorAdjMsg();
+ if(ifFlipWithPredecessor())
+ outgoingMsg.setFlip(true);
+ else
+ outgoingMsg.setFlip(false);
+ outgoingMsg.setFlag(outFlag);
+ outgoingMsg.setNeighberNode(getVertexValue().getIncomingList());
+ outgoingMsg.setSourceVertexId(getVertexId());
+ outgoingMsg.setActualKmer(getVertexValue().getActualKmer());
+ sendMsg(incomingMsg.getSourceVertexId(), outgoingMsg); //getNextDestVertexId(getVertexValue())
+ break;
+ case MessageFlag.DIR_RF:
+ case MessageFlag.DIR_RR:
+ setPredecessorAdjMsg();
+ if(ifFilpWithSuccessor())
+ outgoingMsg.setFlip(true);
+ else
+ outgoingMsg.setFlip(false);
+ outgoingMsg.setFlag(outFlag);
+ outgoingMsg.setNeighberNode(getVertexValue().getOutgoingList());
+ outgoingMsg.setSourceVertexId(getVertexId());
+ outgoingMsg.setActualKmer(getVertexValue().getActualKmer());
+ sendMsg(incomingMsg.getSourceVertexId(), outgoingMsg); //getPreDestVertexId(getVertexValue())
+ break;
+ }
+ }
+
+ /**
+ * send merge message to neighber for P4
+ * @throws IOException
+ */
+ public void broadcastMergeMsg(){
+ if(headFlag > 0)
+ outFlag |= MessageFlag.IS_HEAD;
+ switch(getVertexValue().getState() & State.SHOULD_MERGE_MASK) {
+ case State.SHOULD_MERGEWITHNEXT:
+ setSuccessorAdjMsg();
+ if(ifFlipWithPredecessor())
+ outgoingMsg.setFlip(true);
+ else
+ outgoingMsg.setFlip(false);
+ outgoingMsg.setFlag(outFlag);
+ outgoingMsg.setNeighberNode(getVertexValue().getIncomingList());
+ outgoingMsg.setSourceVertexId(getVertexId());
+ outgoingMsg.setActualKmer(getVertexValue().getActualKmer());
+ sendMsg(getNextDestVertexId(getVertexValue()), outgoingMsg);
+ deleteVertex(getVertexId());
+ break;
+ case State.SHOULD_MERGEWITHPREV:
+ setPredecessorAdjMsg();
+ if(ifFilpWithSuccessor())
+ outgoingMsg.setFlip(true);
+ else
+ outgoingMsg.setFlip(false);
+ outgoingMsg.setFlag(outFlag);
+ outgoingMsg.setNeighberNode(getVertexValue().getOutgoingList());
+ outgoingMsg.setSourceVertexId(getVertexId());
+ outgoingMsg.setActualKmer(getVertexValue().getActualKmer());
+ sendMsg(getPrevDestVertexId(getVertexValue()), outgoingMsg);
+ deleteVertex(getVertexId());
+ break;
+ }
+ }
+
+ public void setStateAsMergeWithNext(){
+ byte state = getVertexValue().getState();
+ state &= State.SHOULD_MERGE_CLEAR;
+ state |= State.SHOULD_MERGEWITHNEXT;
+ getVertexValue().setState(state);
+ }
+
+ /**
+ * This vertex tries to merge with next vertex and send update msg to neighber
+ * @throws IOException
+ */
+ public void sendUpdateMsgToPredecessor(){
+ if(hasNextDest(getVertexValue())){
+ setStateAsMergeWithNext();
+ broadcastUpdateMsg();
+ }
+ }
+
+ public void setStateAsMergeWithPrev(){
+ byte state = getVertexValue().getState();
+ state &= State.SHOULD_MERGE_CLEAR;
+ state |= State.SHOULD_MERGEWITHPREV;
+ getVertexValue().setState(state);
+ }
+
+ /**
+ * This vertex tries to merge with next vertex and send update msg to neighber
+ * @throws IOException
+ */
+ public void sendUpdateMsgToSuccessor(){
+ if(hasPrevDest(getVertexValue())){
+ setStateAsMergeWithPrev();
+ broadcastUpdateMsg();
+ }
+ }
+
+ /**
+ * set state as no_merge
+ */
+ public void setStateAsNoMerge(){
+ byte state = getVertexValue().getState();
+ //state |= State.SHOULD_MERGE_CLEAR;
+ state |= State.NO_MERGE;
+ getVertexValue().setState(state);
+ }
+
+ /**
+ * Returns the edge dir for B->A when the A->B edge is type @dir
+ */
+ public byte mirrorDirection(byte dir) {
+ switch (dir) {
+ case MessageFlag.DIR_FF:
+ return MessageFlag.DIR_RR;
+ case MessageFlag.DIR_FR:
+ return MessageFlag.DIR_FR;
+ case MessageFlag.DIR_RF:
+ return MessageFlag.DIR_RF;
+ case MessageFlag.DIR_RR:
+ return MessageFlag.DIR_FF;
+ default:
+ throw new RuntimeException("Unrecognized direction in flipDirection: " + dir);
+ }
+ }
+
+ /**
+ * check if need filp
+ */
+ public byte flipDirection(byte neighborDir, boolean flip){
+ if(flip){
+ switch (neighborDir) {
+ case MessageFlag.DIR_FF:
+ return MessageFlag.DIR_FR;
+ case MessageFlag.DIR_FR:
+ return MessageFlag.DIR_FF;
+ case MessageFlag.DIR_RF:
+ return MessageFlag.DIR_RR;
+ case MessageFlag.DIR_RR:
+ return MessageFlag.DIR_RF;
+ default:
+ throw new RuntimeException("Unrecognized direction for neighborDir: " + neighborDir);
+ }
+ } else
+ return neighborDir;
+ }
+
+ /**
+ * updateAdjList
+ */
+ public void processUpdate(){
+ inFlag = incomingMsg.getFlag();
+ byte meToNeighborDir = (byte) (inFlag & MessageFlag.DIR_MASK);
+ byte neighborToMeDir = mirrorDirection(meToNeighborDir);
+
+ byte neighborToMergeDir = flipDirection(neighborToMeDir, incomingMsg.isFlip());
+
+ getVertexValue().processUpdates(neighborToMeDir, incomingMsg.getSourceVertexId(),
+ neighborToMergeDir, VertexUtil.getNodeIdFromAdjacencyList(incomingMsg.getNeighberNode()));
+ }
+
+ /**
+ * merge and updateAdjList merge with one neighbor
+ */
+ public void processMerge(){
+ inFlag = incomingMsg.getFlag();
+ byte meToNeighborDir = (byte) (inFlag & MessageFlag.DIR_MASK);
+ byte neighborToMeDir = mirrorDirection(meToNeighborDir);
+
+ if((inFlag & MessageFlag.IS_HEAD) > 0){
+ byte state = getVertexValue().getState();
+ state |= State.IS_HEAD;
+ getVertexValue().setState(state);
+ }
+
+ byte neighborToMergeDir = flipDirection(neighborToMeDir, incomingMsg.isFlip());
+
+ getVertexValue().processMerges(neighborToMeDir, incomingMsg.getSourceVertexId(),
+ neighborToMergeDir, VertexUtil.getNodeIdFromAdjacencyList(incomingMsg.getNeighberNode()),
+ kmerSize, incomingMsg.getActualKmer());
+ }
+
+ /**
+ * merge and updateAdjList having parameter
+ */
+ public void processMerge(MessageWritable msg){
+ byte meToNeighborDir = (byte) (msg.getFlag() & MessageFlag.DIR_MASK);
+ byte neighborToMeDir = mirrorDirection(meToNeighborDir);
+
+ byte neighborToMergeDir = flipDirection(neighborToMeDir, msg.isFlip());
+
+ getVertexValue().processMerges(neighborToMeDir, msg.getSourceVertexId(),
+ neighborToMergeDir, VertexUtil.getNodeIdFromAdjacencyList(msg.getNeighberNode()),
+ kmerSize, msg.getActualKmer());
+ }
+
+ /**
+ * final merge and updateAdjList having parameter for p2
+ */
+ public void processFinalMerge(MessageWritable msg){
+ byte meToNeighborDir = (byte) (msg.getFlag() & MessageFlag.DIR_MASK);
+ byte neighborToMeDir = mirrorDirection(meToNeighborDir);
+
+ byte neighborToMergeDir = flipDirection(neighborToMeDir, msg.isFlip());
+
+ String selfString;
+ String match;
+ String msgString;
+ int index;
+ switch(neighborToMeDir){
+ case MessageFlag.DIR_FF:
+ selfString = getVertexValue().getActualKmer().toString();
+ match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length());
+ msgString = msg.getActualKmer().toString();
+ index = msgString.indexOf(match);
+ tmpKmer.setByRead(msgString.length() - index, msgString.substring(index).getBytes(), 0);
+ break;
+ case MessageFlag.DIR_FR:
+ selfString = getVertexValue().getActualKmer().toString();
+ match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length());
+ msgString = GeneCode.reverseComplement(msg.getActualKmer().toString());
+ index = msgString.indexOf(match);
+ tmpKmer.setByReadReverse(msgString.length() - index, msgString.substring(index).getBytes(), 0);
+ break;
+ case MessageFlag.DIR_RF:
+ selfString = getVertexValue().getActualKmer().toString();
+ match = selfString.substring(0, kmerSize - 1);
+ msgString = GeneCode.reverseComplement(msg.getActualKmer().toString());
+ index = msgString.lastIndexOf(match) + kmerSize - 2;
+ tmpKmer.setByReadReverse(index + 1, msgString.substring(0, index + 1).getBytes(), 0);
+ break;
+ case MessageFlag.DIR_RR:
+ selfString = getVertexValue().getActualKmer().toString();
+ match = selfString.substring(0, kmerSize - 1);
+ msgString = msg.getActualKmer().toString();
+ index = msgString.lastIndexOf(match) + kmerSize - 2;
+ tmpKmer.setByRead(index + 1, msgString.substring(0, index + 1).getBytes(), 0);
+ break;
+ }
+
+ getVertexValue().processMerges(neighborToMeDir, msg.getSourceVertexId(),
+ neighborToMergeDir, VertexUtil.getNodeIdFromAdjacencyList(msg.getNeighberNode()),
+ kmerSize, tmpKmer);
+ }
+
+ /**
+ * set head state
+ */
+ public void setHeadState(){
+ byte state = getVertexValue().getState();
+ state &= State.VERTEX_CLEAR;
+ state |= State.IS_HEAD;
+ getVertexValue().setState(state);
+ }
+
+ /**
+ * set final state
+ */
+ public void setFinalState(){
+ byte state = getVertexValue().getState();
+ state &= State.VERTEX_CLEAR;
+ state |= State.IS_FINAL;
+ getVertexValue().setState(state);
+ }
+
+ /**
+ * set stop flag
+ */
+ public void setStopFlag(){
+ byte state = getVertexValue().getState();
+ state &= State.VERTEX_CLEAR;
+ state |= State.IS_FINAL;
+ getVertexValue().setState(state);
+ }
+
+ /**
+ * get Vertex state
+ */
+ public byte getMsgFlag(){
+ return (byte)(incomingMsg.getFlag() & MessageFlag.VERTEX_MASK);
+ }
+
+ /**
+ * reset selfFlag
+ */
+ public void resetSelfFlag(){
+ selfFlag =(byte)(getVertexValue().getState() & MessageFlag.VERTEX_MASK);
+ }
+
+ /**
+ * broadcast kill self to all neighbers Pre-condition: vertex is a path vertex
+ */
+ public void broadcaseKillself(){
+ outFlag = 0;
+ outFlag |= MessageFlag.KILL;
+ outFlag |= MessageFlag.DIR_FROM_DEADVERTEX;
+ outgoingMsg.setSourceVertexId(getVertexId());
+
+ if(getVertexValue().getFFList().getCountOfPosition() > 0){//#FFList() > 0
+ outFlag |= MessageFlag.DIR_FF;
+ outgoingMsg.setFlag(outFlag);
+ sendMsg(getVertexValue().getFFList().getPosition(0), outgoingMsg);
+ }
+ else if(getVertexValue().getFRList().getCountOfPosition() > 0){//#FRList() > 0
+ outFlag |= MessageFlag.DIR_FR;
+ outgoingMsg.setFlag(outFlag);
+ sendMsg(getVertexValue().getFRList().getPosition(0), outgoingMsg);
+ }
+
+
+ if(getVertexValue().getRFList().getCountOfPosition() > 0){//#RFList() > 0
+ outFlag |= MessageFlag.DIR_RF;
+ outgoingMsg.setFlag(outFlag);
+ sendMsg(getVertexValue().getRFList().getPosition(0), outgoingMsg);
+ }
+ else if(getVertexValue().getRRList().getCountOfPosition() > 0){//#RRList() > 0
+ outFlag |= MessageFlag.DIR_RR;
+ outgoingMsg.setFlag(outFlag);
+ sendMsg(getVertexValue().getRRList().getPosition(0), outgoingMsg);
+ }
+
+ deleteVertex(getVertexId());
+ }
+
+ /**
+ * do some remove operations on adjMap after receiving the info about dead Vertex
+ */
+ public void responseToDeadVertex(){
+ switch(incomingMsg.getFlag() & MessageFlag.DIR_MASK){
+ case MessageFlag.DIR_FF:
+ //remove incomingMsg.getSourceId from RR positionList
+ kmerIterator = getVertexValue().getRRList().iterator();
+ while(kmerIterator.hasNext()){
+ tmpKmer = kmerIterator.next();
+ if(tmpKmer.equals(incomingMsg.getSourceVertexId())){
+ kmerIterator.remove();
+ break;
+ }
+ }
+ break;
+ case MessageFlag.DIR_FR:
+ //remove incomingMsg.getSourceId from FR positionList
+ kmerIterator = getVertexValue().getFRList().iterator();
+ while(kmerIterator.hasNext()){
+ tmpKmer = kmerIterator.next();
+ if(tmpKmer.equals(incomingMsg.getSourceVertexId())){
+ kmerIterator.remove();
+ break;
+ }
+ }
+ break;
+ case MessageFlag.DIR_RF:
+ //remove incomingMsg.getSourceId from RF positionList
+ kmerIterator = getVertexValue().getRFList().iterator();
+ while(kmerIterator.hasNext()){
+ tmpKmer = kmerIterator.next();
+ if(tmpKmer.equals(incomingMsg.getSourceVertexId())){
+ kmerIterator.remove();
+ break;
+ }
+ }
+ break;
+ case MessageFlag.DIR_RR:
+ //remove incomingMsg.getSourceId from FF positionList
+ kmerIterator = getVertexValue().getFFList().iterator();
+ while(kmerIterator.hasNext()){
+ tmpKmer = kmerIterator.next();
+ if(tmpKmer.equals(incomingMsg.getSourceVertexId())){
+ kmerIterator.remove();
+ break;
+ }
+ }
+ break;
+ }
+ }
+
+ public boolean isReceiveKillMsg(){
+ byte killFlag = (byte) (incomingMsg.getFlag() & MessageFlag.KILL_MASK);
+ byte deadFlag = (byte) (incomingMsg.getFlag() & MessageFlag.DEAD_MASK);
+ return killFlag == MessageFlag.KILL & deadFlag != MessageFlag.DIR_FROM_DEADVERTEX;
+ }
+
+ public boolean isResponseKillMsg(){
+ byte killFlag = (byte) (incomingMsg.getFlag() & MessageFlag.KILL_MASK);
+ byte deadFlag = (byte) (incomingMsg.getFlag() & MessageFlag.DEAD_MASK);
+ return killFlag == MessageFlag.KILL & deadFlag == MessageFlag.DIR_FROM_DEADVERTEX;
+ }
+
+ @Override
+ public void compute(Iterator<MessageWritable> msgIterator) {
+ }
+}
\ No newline at end of file
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/MapReduceVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/MapReduceVertex.java
new file mode 100644
index 0000000..5baf492
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/MapReduceVertex.java
@@ -0,0 +1,193 @@
+package edu.uci.ics.genomix.pregelix.operator.pathmerge;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Random;
+
+import edu.uci.ics.genomix.pregelix.client.Client;
+import edu.uci.ics.genomix.pregelix.format.GraphCleanInputFormat;
+import edu.uci.ics.genomix.pregelix.format.P2PathMergeOutputFormat;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable.State;
+import edu.uci.ics.genomix.pregelix.type.MessageFlag;
+import edu.uci.ics.genomix.type.VKmerListWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.api.util.BspUtils;
+
+public class MapReduceVertex extends
+ BasicGraphCleanVertex {
+
+ public static boolean fakeVertexExist = false;
+ protected static VKmerBytesWritable fakeVertex = null;
+
+ protected VKmerBytesWritable reverseKmer;
+ protected VKmerListWritable kmerList = null;
+ protected Map<VKmerBytesWritable, VKmerListWritable> kmerMapper = new HashMap<VKmerBytesWritable, VKmerListWritable>();
+
+ /**
+ * initiate kmerSize, maxIteration
+ */
+ public void initVertex() {
+ if (kmerSize == -1)
+ kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
+ if (maxIteration < 0)
+ maxIteration = getContext().getConfiguration().getInt(ITERATIONS, 1000000);
+ if(incomingMsg == null)
+ incomingMsg = new MessageWritable(kmerSize);
+ if(outgoingMsg == null)
+ outgoingMsg = new MessageWritable(kmerSize);
+ else
+ outgoingMsg.reset(kmerSize);
+ if(reverseKmer == null)
+ reverseKmer = new VKmerBytesWritable();
+ if(kmerList == null)
+ kmerList = new VKmerListWritable();
+ else
+ kmerList.reset();
+ if(fakeVertex == null){
+ fakeVertex = new VKmerBytesWritable();
+ String random = generaterRandomString(kmerSize + 1);
+ fakeVertex.setByRead(kmerSize + 1, random.getBytes(), 0);
+ }
+ if(destVertexId == null)
+ destVertexId = new VKmerBytesWritable(kmerSize);
+ }
+
+ /**
+ * Generate random string from [ACGT]
+ */
+ public String generaterRandomString(int n){
+ char[] chars = "ACGT".toCharArray();
+ StringBuilder sb = new StringBuilder();
+ Random random = new Random();
+ for (int i = 0; i < n; i++) {
+ char c = chars[random.nextInt(chars.length)];
+ sb.append(c);
+ }
+ return sb.toString();
+ }
+
+ /**
+ * add fake vertex
+ */
+ @SuppressWarnings({ "rawtypes", "unchecked" })
+ public void addFakeVertex(){
+ if(!fakeVertexExist){
+ //add a fake vertex
+ Vertex vertex = (Vertex) BspUtils.createVertex(getContext().getConfiguration());
+ vertex.getMsgList().clear();
+ vertex.getEdges().clear();
+ VertexValueWritable vertexValue = new VertexValueWritable(kmerSize + 1);
+ vertexValue.setState(State.IS_FAKE);
+ vertexValue.setFakeVertex(true);
+
+ vertex.setVertexId(fakeVertex);
+ vertex.setVertexValue(vertexValue);
+
+ addVertex(fakeVertex, vertex);
+ fakeVertexExist = true;
+ }
+ }
+
+ public void sendMsgToFakeVertex(){
+ if(!getVertexValue().isFakeVertex()){
+ outgoingMsg.setSourceVertexId(getVertexId());
+ outgoingMsg.setActualKmer(getVertexValue().getActualKmer());
+ sendMsg(fakeVertex, outgoingMsg);
+ voteToHalt();
+ }
+ }
+
+ public void mapKeyByActualKmer(Iterator<MessageWritable> msgIterator){
+ while(msgIterator.hasNext()){
+ incomingMsg = msgIterator.next();
+ String kmerString = incomingMsg.getActualKmer().toString();
+ tmpKmer.reset(kmerString.length());
+ tmpKmer.setByRead(kmerString.length(), kmerString.getBytes(), 0);
+ reverseKmer.setByReadReverse(kmerString.length(), kmerString.getBytes(), 0);
+
+ if(reverseKmer.compareTo(tmpKmer) < 0)
+ tmpKmer.setAsCopy(reverseKmer);
+ if(!kmerMapper.containsKey(tmpKmer)){
+ kmerList.reset();
+ kmerList.append(incomingMsg.getSourceVertexId());
+ kmerMapper.put(tmpKmer, kmerList);
+ } else{
+ kmerList.setCopy(kmerMapper.get(tmpKmer));
+ kmerList.append(incomingMsg.getSourceVertexId());
+ kmerMapper.put(tmpKmer, kmerList);
+ }
+ }
+ }
+
+ public void reduceKeyByActualKmer(){
+ for(VKmerBytesWritable key : kmerMapper.keySet()){
+ kmerList = kmerMapper.get(key);
+ for(int i = 1; i < kmerList.getCountOfPosition(); i++){
+ //send kill message
+ outgoingMsg.setFlag(MessageFlag.KILL);
+ destVertexId.setAsCopy(kmerList.getPosition(i));
+ sendMsg(destVertexId, outgoingMsg);
+ }
+ }
+ }
+
+ public void finalVertexResponseToFakeVertex(Iterator<MessageWritable> msgIterator){
+ while(msgIterator.hasNext()){
+ incomingMsg = msgIterator.next();
+ inFlag = incomingMsg.getFlag();
+ if(inFlag == MessageFlag.KILL){
+ broadcaseKillself();
+ }
+ }
+ }
+
+ @Override
+ public void compute(Iterator<MessageWritable> msgIterator) {
+ initVertex();
+ if(getSuperstep() == 1){
+ addFakeVertex();
+ }
+ else if(getSuperstep() == 2){
+ /** NON-FAKE and Final vertice send msg to FAKE vertex **/
+ sendMsgToFakeVertex();
+ } else if(getSuperstep() == 3){
+ kmerMapper.clear();
+ /** Mapper **/
+ mapKeyByActualKmer(msgIterator);
+ /** Reducer **/
+ reduceKeyByActualKmer();
+ } else if(getSuperstep() == 4){
+ /** only for test single MapReduce job**/
+ if(!msgIterator.hasNext() && getVertexValue().getState() == State.IS_FAKE){
+ fakeVertexExist = false;
+ deleteVertex(fakeVertex);
+ }
+ finalVertexResponseToFakeVertex(msgIterator);
+ } else if(getSuperstep() == 5){
+ while (msgIterator.hasNext()) {
+ incomingMsg = msgIterator.next();
+ if(isReceiveKillMsg())
+ responseToDeadVertex();
+ }
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ PregelixJob job = new PregelixJob(MapReduceVertex.class.getSimpleName());
+ job.setVertexClass(MapReduceVertex.class);
+ /**
+ * BinaryInput and BinaryOutput
+ */
+ job.setVertexInputFormatClass(GraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(P2PathMergeOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ Client.run(args, job);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/P2ForPathMergeVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/P2ForPathMergeVertex.java
new file mode 100644
index 0000000..d16e6d5
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/P2ForPathMergeVertex.java
@@ -0,0 +1,308 @@
+package edu.uci.ics.genomix.pregelix.operator.pathmerge;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.genomix.pregelix.client.Client;
+import edu.uci.ics.genomix.pregelix.format.InitialGraphCleanInputFormat;
+import edu.uci.ics.genomix.pregelix.format.P2PathMergeOutputFormat;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable.State;
+import edu.uci.ics.genomix.pregelix.type.MessageFlag;
+import edu.uci.ics.genomix.pregelix.type.MessageFromHead;
+import edu.uci.ics.genomix.type.VKmerListWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+/*
+ * vertexId: BytesWritable
+ * vertexValue: VertexValueWritable
+ * edgeValue: NullWritable
+ * message: MessageWritable
+ *
+ * DNA:
+ * A: 00
+ * C: 01
+ * G: 10
+ * T: 11
+ *
+ * succeed node
+ * A 00000001 1
+ * G 00000010 2
+ * C 00000100 4
+ * T 00001000 8
+ * precursor node
+ * A 00010000 16
+ * G 00100000 32
+ * C 01000000 64
+ * T 10000000 128
+ *
+ * For example, ONE LINE in input file: 00,01,10 0001,0010,
+ * That means that vertexId is ACG, its succeed node is A and its precursor node is C.
+ * The succeed node and precursor node will be stored in vertexValue and we don't use edgeValue.
+ * The details about message are in edu.uci.ics.pregelix.example.io.MessageWritable.
+ */
+public class P2ForPathMergeVertex extends
+ MapReduceVertex {
+
+ private ArrayList<MessageWritable> receivedMsgList = new ArrayList<MessageWritable>();
+
+ private boolean isFakeVertex = false;
+ /**
+ * initiate kmerSize, maxIteration
+ */
+ public void initVertex() {
+ if (kmerSize == -1)
+ kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
+ if (maxIteration < 0)
+ maxIteration = getContext().getConfiguration().getInt(ITERATIONS, 1000000);
+ headFlag = (byte)(getVertexValue().getState() & State.IS_HEAD);
+ selfFlag = (byte)(getVertexValue().getState() & State.VERTEX_MASK);
+ if(incomingMsg == null)
+ incomingMsg = new MessageWritable();
+ if(outgoingMsg == null)
+ outgoingMsg = new MessageWritable();
+ else
+ outgoingMsg.reset();
+ receivedMsgList.clear();
+ if(reverseKmer == null)
+ reverseKmer = new VKmerBytesWritable();
+ if(kmerList == null)
+ kmerList = new VKmerListWritable();
+ else
+ kmerList.reset();
+ if(fakeVertex == null){
+// fakeVertex = new KmerBytesWritable(kmerSize + 1);
+ fakeVertex = new VKmerBytesWritable();
+ String random = generaterRandomString(kmerSize + 1);
+ fakeVertex.setByRead(kmerSize + 1, random.getBytes(), 0);
+ }
+ isFakeVertex = ((byte)getVertexValue().getState() & State.FAKEFLAG_MASK) > 0 ? true : false;
+ if(destVertexId == null)
+ destVertexId = new VKmerBytesWritable();
+ if(tmpKmer == null)
+ tmpKmer = new VKmerBytesWritable();
+ }
+
+ /**
+ * head send message to path
+ */
+ public void sendOutMsg() {
+ //send wantToMerge to next
+ tmpKmer = getNextDestVertexIdAndSetFlag(getVertexValue());
+ if(tmpKmer != null){
+ destVertexId.setAsCopy(tmpKmer);
+ outgoingMsg.setFlag(outFlag);
+ outgoingMsg.setSourceVertexId(getVertexId());
+ sendMsg(destVertexId, outgoingMsg);
+ }
+
+ //send wantToMerge to prev
+ tmpKmer = getPrevDestVertexIdAndSetFlag(getVertexValue());
+ if(tmpKmer != null){
+ destVertexId.setAsCopy(tmpKmer);
+ outgoingMsg.setFlag(outFlag);
+ outgoingMsg.setSourceVertexId(getVertexId());
+ sendMsg(destVertexId, outgoingMsg);
+ }
+ }
+
+ /**
+ * check received message
+ */
+ public byte checkNumOfMsgsFromHead(){
+ int countHead = 0;
+ int countOldHead = 0;
+ for(int i = 0; i < receivedMsgList.size(); i++){
+ inFlag = receivedMsgList.get(i).getFlag();
+ switch(inFlag & MessageFlag.VERTEX_MASK){
+ case MessageFlag.IS_HEAD:
+ countHead++;
+ break;
+ case MessageFlag.IS_OLDHEAD:
+ countOldHead++;
+ break;
+ }
+ }
+ if(countHead == 2)
+ return MessageFromHead.BothMsgsFromHead;
+ else if(countHead == 1 && countOldHead == 1)
+ return MessageFromHead.OneMsgFromOldHeadAndOneFromHead;
+ else if(countHead == 1 && countOldHead == 0)
+ return MessageFromHead.OneMsgFromHeadAndOneFromNonHead;
+ else if(countHead == 0 && countOldHead == 0)
+ return MessageFromHead.BothMsgsFromNonHead;
+ else
+ return MessageFromHead.NO_MSG;
+ }
+
+ /**
+ * head send message to path
+ */
+ public void sendMsgToPathVertex(Iterator<MessageWritable> msgIterator) {
+ //send out wantToMerge msg
+ if(selfFlag != State.IS_HEAD && selfFlag != State.IS_OLDHEAD){
+ sendOutMsg();
+ }
+ }
+
+ /**
+ * path response message to head
+ */
+ public void responseMsgToHeadVertex(Iterator<MessageWritable> msgIterator) {
+ if(!msgIterator.hasNext() && selfFlag == State.IS_HEAD){
+ outFlag |= MessageFlag.IS_FINAL;
+ sendOutMsg();
+ }
+ while (msgIterator.hasNext()) {
+ incomingMsg = msgIterator.next();
+ /** final Vertex Responses To FakeVertex **/
+ if((byte)(incomingMsg.getFlag() & MessageFlag.KILL_MASK) == MessageFlag.KILL){
+ if((byte)(incomingMsg.getFlag() & MessageFlag.DIR_MASK) == MessageFlag.DIR_FROM_DEADVERTEX){
+ responseToDeadVertex();
+ } else{
+ broadcaseKillself();
+ }
+ }else if(getMsgFlag() == MessageFlag.IS_FINAL){
+ processMerge(incomingMsg);
+ getVertexValue().setState(State.IS_FINAL);
+ }else{
+ sendUpdateMsg();
+ outFlag = 0;
+ sendMergeMsg();
+ }
+ }
+ }
+
+ /**
+ * head vertex process merge
+ */
+ public void processMergeInHeadVertex(Iterator<MessageWritable> msgIterator){
+ //process merge when receiving msg
+ while (msgIterator.hasNext()) {
+ incomingMsg = msgIterator.next();
+ /** final Vertex Responses To FakeVertex **/
+ if((byte)(incomingMsg.getFlag() & MessageFlag.KILL_MASK) == MessageFlag.KILL){
+ if((byte)(incomingMsg.getFlag() & MessageFlag.DEAD_MASK) == MessageFlag.DIR_FROM_DEADVERTEX){
+ responseToDeadVertex();
+ } else{
+ broadcaseKillself();
+ }
+ } else {
+ /** for final processing (2) **/
+ if(getMsgFlag() == MessageFlag.IS_FINAL){
+ sendFinalMergeMsg();
+ break;
+ }
+ if(incomingMsg.isUpdateMsg() && selfFlag == State.IS_OLDHEAD)
+ processUpdate();
+ else if(!incomingMsg.isUpdateMsg())
+ receivedMsgList.add(incomingMsg);
+ }
+ }
+ if(receivedMsgList.size() != 0){
+ byte numOfMsgsFromHead = checkNumOfMsgsFromHead();
+ switch(numOfMsgsFromHead){
+ case MessageFromHead.BothMsgsFromHead:
+ case MessageFromHead.OneMsgFromOldHeadAndOneFromHead:
+ for(int i = 0; i < 2; i++)
+ processFinalMerge(receivedMsgList.get(i)); //processMerge()
+ getVertexValue().setState(State.IS_FINAL);
+ /** NON-FAKE and Final vertice send msg to FAKE vertex **/
+ sendMsgToFakeVertex();
+ voteToHalt();
+ break;
+ case MessageFromHead.OneMsgFromHeadAndOneFromNonHead:
+ for(int i = 0; i < 2; i++)
+ processFinalMerge(receivedMsgList.get(i));
+ setHeadState();
+ break;
+ case MessageFromHead.BothMsgsFromNonHead:
+ for(int i = 0; i < 2; i++)
+ processFinalMerge(receivedMsgList.get(i));
+ break;
+ case MessageFromHead.NO_MSG:
+ //halt
+ voteToHalt(); //deleteVertex(getVertexId());
+ break;
+ }
+ }
+ }
+ @Override
+ public void compute(Iterator<MessageWritable> msgIterator) {
+ initVertex();
+ if (getSuperstep() == 1){
+ addFakeVertex();
+ startSendMsg();
+ }
+ else if (getSuperstep() == 2){
+ if(!msgIterator.hasNext() && isFakeVertex)
+ voteToHalt();
+ initState(msgIterator);
+ }
+ else if (getSuperstep() % 3 == 0 && getSuperstep() <= maxIteration) {
+ if(!isFakeVertex){
+ /** for processing final merge (1) **/
+ if(msgIterator.hasNext()){
+ incomingMsg = msgIterator.next();
+ if(getMsgFlag() == MessageFlag.IS_FINAL){
+ setFinalState();
+ processFinalMerge(incomingMsg);
+ /** NON-FAKE and Final vertice send msg to FAKE vertex **/
+ sendMsgToFakeVertex();
+ } else if(isReceiveKillMsg()){
+ responseToDeadVertex();
+ }
+ }
+ /** processing general case **/
+ else{
+ sendMsgToPathVertex(msgIterator);
+ if(selfFlag != State.IS_HEAD)
+ voteToHalt();
+ }
+ }
+ /** Fake vertex agregates message and group them by actual kmer (2) **/
+ else{
+ kmerMapper.clear();
+ /** Mapper **/
+ mapKeyByActualKmer(msgIterator);
+ /** Reducer **/
+ reduceKeyByActualKmer();
+ voteToHalt();
+ }
+ } else if (getSuperstep() % 3 == 1 && getSuperstep() <= maxIteration) {
+ if(!isFakeVertex){
+ responseMsgToHeadVertex(msgIterator);
+ if(selfFlag != State.IS_HEAD)
+ voteToHalt();
+ }
+ /** Fake vertex agregates message and group them by actual kmer (1) **/
+ else{
+ kmerMapper.clear();
+ /** Mapper **/
+ mapKeyByActualKmer(msgIterator);
+ /** Reducer **/
+ reduceKeyByActualKmer();
+ voteToHalt();
+ }
+ } else if (getSuperstep() % 3 == 2 && getSuperstep() <= maxIteration){
+ if(!isFakeVertex)
+ processMergeInHeadVertex(msgIterator);
+ }else
+ voteToHalt();
+ }
+
+ public static void main(String[] args) throws Exception {
+ PregelixJob job = new PregelixJob(P2ForPathMergeVertex.class.getSimpleName());
+ job.setVertexClass(P2ForPathMergeVertex.class);
+ /**
+ * BinaryInput and BinaryOutput~/
+ */
+ job.setVertexInputFormatClass(InitialGraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(P2PathMergeOutputFormat.class);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ job.setDynamicVertexValueSize(true);
+ Client.run(args, job);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/P4ForPathMergeVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/P4ForPathMergeVertex.java
new file mode 100644
index 0000000..28d0563
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/P4ForPathMergeVertex.java
@@ -0,0 +1,240 @@
+package edu.uci.ics.genomix.pregelix.operator.pathmerge;
+
+import java.util.Iterator;
+import java.util.Random;
+
+
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.genomix.pregelix.client.Client;
+import edu.uci.ics.genomix.pregelix.format.GraphCleanOutputFormat;
+import edu.uci.ics.genomix.pregelix.format.InitialGraphCleanInputFormat;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable.State;
+import edu.uci.ics.genomix.pregelix.type.MessageFlag;
+import edu.uci.ics.genomix.pregelix.util.VertexUtil;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+/*
+ * vertexId: BytesWritable
+ * vertexValue: ByteWritable
+ * edgeValue: NullWritable
+ * message: MessageWritable
+ *
+ * DNA:
+ * A: 00
+ * C: 01
+ * G: 10
+ * T: 11
+ *
+ * succeed node
+ * A 00000001 1
+ * G 00000010 2
+ * C 00000100 4
+ * T 00001000 8
+ * precursor node
+ * A 00010000 16
+ * G 00100000 32
+ * C 01000000 64
+ * T 10000000 128
+ *
+ * For example, ONE LINE in input file: 00,01,10 0001,0010,
+ * That means that vertexId is ACG, its succeed node is A and its precursor node is C.
+ * The succeed node and precursor node will be stored in vertexValue and we don't use edgeValue.
+ * The details about message are in edu.uci.ics.pregelix.example.io.MessageWritable.
+ */
+/**
+ * Naive Algorithm for path merge graph
+ */
+public class P4ForPathMergeVertex extends
+ BasicGraphCleanVertex {
+ public static final String RANDSEED = "P4ForPathMergeVertex.randSeed";
+ public static final String PROBBEINGRANDOMHEAD = "P4ForPathMergeVertex.probBeingRandomHead";
+
+ private static long randSeed = 1;
+ private float probBeingRandomHead = -1;
+ private Random randGenerator;
+
+ private VKmerBytesWritable curKmer = new VKmerBytesWritable();
+ private VKmerBytesWritable nextKmer = new VKmerBytesWritable();
+ private VKmerBytesWritable prevKmer = new VKmerBytesWritable();
+ private boolean hasNext;
+ private boolean hasPrev;
+ private boolean curHead;
+ private boolean nextHead;
+ private boolean prevHead;
+ private byte selfFlag;
+
+ /**
+ * initiate kmerSize, maxIteration
+ */
+ public void initVertex() {
+ if (kmerSize == -1)
+ kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
+ if (maxIteration < 0)
+ maxIteration = getContext().getConfiguration().getInt(ITERATIONS, 1000000);
+ if(incomingMsg == null)
+ incomingMsg = new MessageWritable(kmerSize);
+ if(outgoingMsg == null)
+ outgoingMsg = new MessageWritable(kmerSize);
+ else
+ outgoingMsg.reset(kmerSize);
+ if(destVertexId == null)
+ destVertexId = new VKmerBytesWritable(kmerSize);
+ randSeed = getSuperstep();
+ randGenerator = new Random(randSeed);
+ if (probBeingRandomHead < 0)
+ probBeingRandomHead = getContext().getConfiguration().getFloat("probBeingRandomHead", 0.5f);
+ hasNext = false;
+ hasPrev = false;
+ curHead = false;
+ nextHead = false;
+ prevHead = false;
+ outFlag = (byte)0;
+ inFlag = (byte)0;
+ // Node may be marked as head b/c it's a real head or a real tail
+ headFlag = (byte) (State.IS_HEAD & getVertexValue().getState());
+ }
+
+ protected boolean isNodeRandomHead(VKmerBytesWritable nodeKmer) {
+ // "deterministically random", based on node id
+ //randGenerator.setSeed(randSeed);
+ //randSeed = randGenerator.nextInt();
+ randGenerator.setSeed((randSeed ^ nodeKmer.hashCode()) * 100000 * getSuperstep());//randSeed + nodeID.hashCode()
+ for(int i = 0; i < 500; i++)
+ randGenerator.nextFloat();
+ return randGenerator.nextFloat() < probBeingRandomHead;
+ }
+
+ /**
+ * set nextKmer to the element that's next (in the node's FF or FR list), returning true when there is a next neighbor
+ */
+ protected boolean setNextInfo(VertexValueWritable value) {
+ if (value.getFFList().getCountOfPosition() > 0) {
+ nextKmer.setAsCopy(value.getFFList().getPosition(0));
+ nextHead = isNodeRandomHead(nextKmer);
+ return true;
+ }
+ if (value.getFRList().getCountOfPosition() > 0) {
+ nextKmer.setAsCopy(value.getFRList().getPosition(0));
+ nextHead = isNodeRandomHead(nextKmer);
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * set prevKmer to the element that's previous (in the node's RR or RF list), returning true when there is a previous neighbor
+ */
+ protected boolean setPrevInfo(VertexValueWritable value) {
+ if (value.getRRList().getCountOfPosition() > 0) {
+ prevKmer.setAsCopy(value.getRRList().getPosition(0));
+ prevHead = isNodeRandomHead(prevKmer);
+ return true;
+ }
+ if (value.getRFList().getCountOfPosition() > 0) {
+ prevKmer.setAsCopy(value.getRFList().getPosition(0));
+ prevHead = isNodeRandomHead(prevKmer);
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public void compute(Iterator<MessageWritable> msgIterator) {
+ initVertex();
+ if (getSuperstep() == 1)
+ startSendMsg();
+ else if (getSuperstep() == 2)
+ initState(msgIterator);
+ else if (getSuperstep() % 4 == 3){
+ //tailFlag = (byte) (MessageFlag.IS_TAIL & getVertexValue().getState());
+ //outFlag = (byte) (headFlag | tailFlag);
+ outFlag |= headFlag;
+
+ outFlag |= MessageFlag.NO_MERGE;
+ setStateAsNoMerge();
+
+ // only PATH vertices are present. Find the ID's for my neighbors
+ curKmer.setAsCopy(getVertexId());
+
+ curHead = isNodeRandomHead(curKmer);
+
+
+ // the headFlag and tailFlag's indicate if the node is at the beginning or end of a simple path.
+ // We prevent merging towards non-path nodes
+ hasNext = setNextInfo(getVertexValue());//&& headFlag == 0;
+ hasPrev = setPrevInfo(getVertexValue());//&& headFlag == 0;
+ if (hasNext || hasPrev) {
+ if (curHead) {
+ if (hasNext && !nextHead) {
+ // compress this head to the forward tail
+ sendUpdateMsgToPredecessor();
+ } else if (hasPrev && !prevHead) {
+ // compress this head to the reverse tail
+ sendUpdateMsgToSuccessor();
+ }
+ }
+ else {
+ // I'm a tail
+ if (hasNext && hasPrev) {
+ if ((!nextHead && !prevHead) && (curKmer.compareTo(nextKmer) < 0 && curKmer.compareTo(prevKmer) < 0)) {
+ // tails on both sides, and I'm the "local minimum"
+ // compress me towards the tail in forward dir
+ sendUpdateMsgToPredecessor();
+ }
+ } else if (!hasPrev) {
+ // no previous node
+ if (!nextHead && curKmer.compareTo(nextKmer) < 0) {
+ // merge towards tail in forward dir
+ sendUpdateMsgToPredecessor();
+ }
+ } else if (!hasNext) {
+ // no next node
+ if (!prevHead && curKmer.compareTo(prevKmer) < 0) {
+ // merge towards tail in reverse dir
+ sendUpdateMsgToSuccessor();
+ }
+ }
+ }
+ }
+ }
+ else if (getSuperstep() % 4 == 0){
+ //update neighber
+ while (msgIterator.hasNext()) {
+ incomingMsg = msgIterator.next();
+ processUpdate();
+ if(VertexUtil.isHeadOrRearVertexWithDegree(getVertexValue()))
+ voteToHalt();
+ }
+ } else if (getSuperstep() % 4 == 1){
+ //send message to the merge object and kill self
+ broadcastMergeMsg();
+ } else if (getSuperstep() % 4 == 2){
+ //merge tmpKmer
+ while (msgIterator.hasNext()) {
+ incomingMsg = msgIterator.next();
+ selfFlag = (byte) (State.VERTEX_MASK & getVertexValue().getState());
+ processMerge();
+
+ //head meets head, stop
+ if(getMsgFlag() == MessageFlag.IS_HEAD && selfFlag == MessageFlag.IS_HEAD)
+ voteToHalt();
+ }
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ PregelixJob job = new PregelixJob(P4ForPathMergeVertex.class.getSimpleName());
+ job.setVertexClass(P4ForPathMergeVertex.class);
+ /**
+ * BinaryInput and BinaryOutput
+ */
+ job.setVertexInputFormatClass(InitialGraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ Client.run(args, job);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/removelowcoverage/RemoveLowCoverageVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/removelowcoverage/RemoveLowCoverageVertex.java
new file mode 100644
index 0000000..3e822e4
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/removelowcoverage/RemoveLowCoverageVertex.java
@@ -0,0 +1,55 @@
+package edu.uci.ics.genomix.pregelix.operator.removelowcoverage;
+
+import java.util.Iterator;
+
+import org.apache.hadoop.io.NullWritable;
+
+import edu.uci.ics.genomix.pregelix.client.Client;
+import edu.uci.ics.genomix.pregelix.format.GraphCleanInputFormat;
+import edu.uci.ics.genomix.pregelix.format.GraphCleanOutputFormat;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+
+public class RemoveLowCoverageVertex extends
+ Vertex<VKmerBytesWritable, VertexValueWritable, NullWritable, MessageWritable> {
+ public static final String KMER_SIZE = "RemoveLowCoverageVertex.kmerSize";
+ public static final String MIN_AVERAGECOVERAGE = "RemoveLowCoverageVertex.minAverageCoverage";
+ public static int kmerSize = -1;
+ private static float minAverageCoverage = -1;
+
+ /**
+ * initiate kmerSize, length
+ */
+ public void initVertex() {
+ if (kmerSize == -1)
+ kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
+ if(minAverageCoverage == -1)
+ minAverageCoverage = getContext().getConfiguration().getFloat(MIN_AVERAGECOVERAGE, 5);
+ }
+
+ @Override
+ public void compute(Iterator<MessageWritable> msgIterator) {
+ initVertex();
+ if(getVertexValue().getAverageCoverage() < minAverageCoverage)
+ deleteVertex(getVertexId());
+ else
+ voteToHalt();
+ }
+
+ public static void main(String[] args) throws Exception {
+ PregelixJob job = new PregelixJob(RemoveLowCoverageVertex.class.getSimpleName());
+ job.setVertexClass(RemoveLowCoverageVertex.class);
+ /**
+ * BinaryInput and BinaryOutput
+ */
+ job.setVertexInputFormatClass(GraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ Client.run(args, job);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/scaffolding/ScaffoldingVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/scaffolding/ScaffoldingVertex.java
new file mode 100644
index 0000000..be43658
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/scaffolding/ScaffoldingVertex.java
@@ -0,0 +1,89 @@
+package edu.uci.ics.genomix.pregelix.operator.scaffolding;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+import edu.uci.ics.genomix.pregelix.io.HashMapWritable;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.operator.pathmerge.MapReduceVertex;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerListWritable;
+
+public class ScaffoldingVertex extends
+ MapReduceVertex{
+
+ public static Map<Long, VKmerListWritable> scaffoldingMap = new HashMap<Long, VKmerListWritable>();
+
+ private HashMapWritable<VKmerBytesWritable, VKmerListWritable> traverseMap = new HashMapWritable<VKmerBytesWritable, VKmerListWritable>();
+
+ public void initVertex() {
+ if (kmerSize == -1)
+ kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
+ if (maxIteration < 0)
+ maxIteration = getContext().getConfiguration().getInt(ITERATIONS, 1000000);
+ if(incomingMsg == null)
+ incomingMsg = new MessageWritable(kmerSize);
+ if(outgoingMsg == null)
+ outgoingMsg = new MessageWritable(kmerSize);
+ else
+ outgoingMsg.reset(kmerSize);
+ if(fakeVertex == null){
+ fakeVertex = new VKmerBytesWritable();
+ String random = generaterRandomString(kmerSize + 1);
+ fakeVertex.setByRead(kmerSize + 1, random.getBytes(), 0);
+ }
+ if(destVertexId == null)
+ destVertexId = new VKmerBytesWritable(kmerSize);
+ if(kmerList == null)
+ kmerList = new VKmerListWritable();
+ }
+
+ @Override
+ public void compute(Iterator<MessageWritable> msgIterator) {
+ initVertex();
+ if(getSuperstep() == 1){
+ /** add a fake vertex **/
+ addFakeVertex();
+ /** grouped by 5' readId **/
+ long mainReadId = getVertexValue().getHeadReadId();
+ if(mainReadId != 0){ //empty or not
+ if(scaffoldingMap.containsKey(mainReadId)){
+ kmerList.setCopy(scaffoldingMap.get(mainReadId));
+ kmerList.append(getVertexId());
+ } else{
+ kmerList.reset();
+ kmerList.append(getVertexId());
+ }
+ scaffoldingMap.put(mainReadId, kmerList);
+ }
+ voteToHalt();
+ } else if(getSuperstep() == 2){
+ /** process scaffoldingMap **/
+ for(Long readId : scaffoldingMap.keySet()){
+ kmerList.setCopy(scaffoldingMap.get(readId));
+ if(kmerList.getCountOfPosition() == 2){
+ outgoingMsg.setSeekedVertexId(kmerList.getPosition(1));
+ sendMsg(kmerList.getPosition(0), outgoingMsg);
+ outgoingMsg.setSeekedVertexId(kmerList.getPosition(0));
+ sendMsg(kmerList.getPosition(1), outgoingMsg);
+ }
+ }
+ deleteVertex(getVertexId());
+ } else if(getSuperstep() == 3){
+ if(msgIterator.hasNext()){
+ incomingMsg = msgIterator.next();
+
+ /** initiate the traverseMap in vertexValue **/
+ kmerList.reset();
+ kmerList.append(incomingMsg.getSeekedVertexId());
+ traverseMap.clear();
+ traverseMap.put(incomingMsg.getSeekedVertexId(), kmerList);
+ getVertexValue().setTraverseMap(traverseMap);
+
+ /** begin to traverse **/
+
+ }
+ }
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/splitrepeat/SplitRepeatVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/splitrepeat/SplitRepeatVertex.java
new file mode 100644
index 0000000..ee493f9
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/splitrepeat/SplitRepeatVertex.java
@@ -0,0 +1,397 @@
+package edu.uci.ics.genomix.pregelix.operator.splitrepeat;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+
+import edu.uci.ics.genomix.pregelix.client.Client;
+import edu.uci.ics.genomix.pregelix.format.GraphCleanOutputFormat;
+import edu.uci.ics.genomix.pregelix.format.InitialGraphCleanInputFormat;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.pregelix.operator.pathmerge.BasicGraphCleanVertex;
+import edu.uci.ics.genomix.pregelix.type.MessageFlag;
+import edu.uci.ics.genomix.type.PositionWritable;
+import edu.uci.ics.genomix.type.VKmerListWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.api.util.BspUtils;
+
+public class SplitRepeatVertex extends
+ BasicGraphCleanVertex{
+
+ public class EdgeDir{
+ public static final byte DIR_FF = 0 << 0;
+ public static final byte DIR_FR = 1 << 0;
+ public static final byte DIR_RF = 2 << 0;
+ public static final byte DIR_RR = 3 << 0;
+ }
+
+ public class DeletedEdge{
+ private byte dir;
+ private VKmerBytesWritable edge;
+
+ public DeletedEdge(){
+ dir = 0;
+ edge = new VKmerBytesWritable(kmerSize);
+ }
+
+ public byte getDir() {
+ return dir;
+ }
+
+ public void setDir(byte dir) {
+ this.dir = dir;
+ }
+
+ public VKmerBytesWritable getEdge() {
+ return edge;
+ }
+
+ public void setEdge(VKmerBytesWritable edge) {
+ this.edge.setAsCopy(edge);
+ }
+ }
+
+ private byte[][] connectedTable = new byte[][]{
+ {EdgeDir.DIR_RF, EdgeDir.DIR_FF},
+ {EdgeDir.DIR_RF, EdgeDir.DIR_FR},
+ {EdgeDir.DIR_RR, EdgeDir.DIR_FF},
+ {EdgeDir.DIR_RR, EdgeDir.DIR_FR}
+ };
+ public static Set<String> existKmerString = new HashSet<String>();
+ private Set<Long> readIdSet;
+ private Set<Long> incomingReadIdSet = new HashSet<Long>();
+ private Set<Long> outgoingReadIdSet = new HashSet<Long>();
+ private Set<Long> selfReadIdSet = new HashSet<Long>();
+ private Set<Long> neighborEdgeIntersection = new HashSet<Long>();
+ private Map<VKmerBytesWritable, Set<Long>> kmerMap = new HashMap<VKmerBytesWritable, Set<Long>>();
+ private VKmerListWritable incomingEdgeList = null;
+ private VKmerListWritable outgoingEdgeList = null;
+ private byte incomingEdgeDir = 0;
+ private byte outgoingEdgeDir = 0;
+
+ protected VKmerBytesWritable createdVertexId = null;
+
+ /**
+ * initiate kmerSize, maxIteration
+ */
+ public void initVertex() {
+ if (kmerSize == -1)
+ kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
+ if (maxIteration < 0)
+ maxIteration = getContext().getConfiguration().getInt(ITERATIONS, 1000000);
+ if(incomingMsg == null)
+ incomingMsg = new MessageWritable(kmerSize);
+ if(outgoingMsg == null)
+ outgoingMsg = new MessageWritable(kmerSize);
+ else
+ outgoingMsg.reset(kmerSize);
+ if(incomingEdgeList == null)
+ incomingEdgeList = new VKmerListWritable();
+ if(outgoingEdgeList == null)
+ outgoingEdgeList = new VKmerListWritable();
+ if(createdVertexId == null)
+ createdVertexId = new VKmerBytesWritable(kmerSize + 1);
+ if(destVertexId == null)
+ destVertexId = new VKmerBytesWritable(kmerSize);
+ if(tmpKmer == null)
+ tmpKmer = new VKmerBytesWritable();
+ }
+
+ /**
+ * Generate random string from [ACGT]
+ */
+ public String generaterRandomString(int n){
+ char[] chars = "ACGT".toCharArray();
+ StringBuilder sb = new StringBuilder();
+ Random random = new Random();
+ while(true){
+ for (int i = 0; i < n; i++) {
+ char c = chars[random.nextInt(chars.length)];
+ sb.append(c);
+ }
+ if(!existKmerString.contains(sb.toString()))
+ break;
+ }
+ existKmerString.add(sb.toString());
+ return sb.toString();
+ }
+
+ /**
+ * GenerateString only for test
+ */
+ public String generateString(){
+ if(existKmerString.isEmpty()){
+ existKmerString.add("AAA");
+ return "AAA";
+ }
+ else
+ return "GGG";
+ }
+
+ public void randomGenerateVertexId(int numOfSuffix){
+ String newVertexId = getVertexId().toString() + generaterRandomString(numOfSuffix);;
+ createdVertexId.setByRead(kmerSize + numOfSuffix, newVertexId.getBytes(), 0);
+ }
+
+ public void generateKmerMap(Iterator<MessageWritable> msgIterator){
+ kmerMap.clear();
+ while(msgIterator.hasNext()){
+ incomingMsg = msgIterator.next();
+ readIdSet = new HashSet<Long>();
+ for(PositionWritable nodeId : incomingMsg.getNodeIdList()){
+ readIdSet.add(nodeId.getReadId());
+ }
+ kmerMap.put(incomingMsg.getSourceVertexId(), readIdSet);
+ }
+ }
+
+ public void setSelfReadIdSet(){
+ selfReadIdSet.clear();
+ for(PositionWritable nodeId : getVertexValue().getNodeIdList()){
+ selfReadIdSet.add(nodeId.getReadId());
+ }
+ }
+
+ @SuppressWarnings({ "rawtypes", "unchecked" })
+ public void createNewVertex(int i, VKmerBytesWritable incomingEdge, VKmerBytesWritable outgoingEdge){
+ Vertex vertex = (Vertex) BspUtils.createVertex(getContext().getConfiguration());
+ vertex.getMsgList().clear();
+ vertex.getEdges().clear();
+ VKmerBytesWritable vertexId = new VKmerBytesWritable(kmerSize);
+ VertexValueWritable vertexValue = new VertexValueWritable(kmerSize);
+ //add the corresponding edge to new vertex
+ switch(connectedTable[i][0]){
+ case EdgeDir.DIR_RF:
+ vertexValue.getRFList().append(incomingEdge);
+ break;
+ case EdgeDir.DIR_RR:
+ vertexValue.getRRList().append(incomingEdge);
+ break;
+ }
+ switch(connectedTable[i][1]){
+ case EdgeDir.DIR_FF:
+ vertexValue.getFFList().append(outgoingEdge);
+ break;
+ case EdgeDir.DIR_FR:
+ vertexValue.getFRList().append(outgoingEdge);
+ break;
+ }
+ vertexId.setAsCopy(createdVertexId);
+ vertex.setVertexId(vertexId);
+ vertex.setVertexValue(vertexValue);
+
+ addVertex(vertexId, vertex);
+ }
+
+ public void sendMsgToUpdateEdge(VKmerBytesWritable incomingEdge, VKmerBytesWritable outgoingEdge){
+ outgoingMsg.setCreatedVertexId(createdVertexId);
+ outgoingMsg.setSourceVertexId(getVertexId());
+
+ outgoingMsg.setFlag(incomingEdgeDir);
+ destVertexId.setAsCopy(incomingEdge);
+ sendMsg(destVertexId, outgoingMsg);
+
+ outgoingMsg.setFlag(outgoingEdgeDir);
+ destVertexId.setAsCopy(outgoingEdge);
+ sendMsg(destVertexId, outgoingMsg);
+ }
+
+ public void storeDeletedEdge(Set<DeletedEdge> deletedEdges, int i, VKmerBytesWritable incomingEdge, VKmerBytesWritable outgoingEdge){
+ DeletedEdge deletedIncomingEdge = new DeletedEdge();
+ DeletedEdge deletedOutgoingEdge = new DeletedEdge();
+ switch(connectedTable[i][0]){
+ case EdgeDir.DIR_RF:
+ deletedIncomingEdge.setDir(EdgeDir.DIR_RF);
+ deletedIncomingEdge.setEdge(incomingEdge);
+ break;
+ case EdgeDir.DIR_RR:
+ deletedIncomingEdge.setDir(EdgeDir.DIR_RR);
+ deletedIncomingEdge.setEdge(incomingEdge);
+ break;
+ }
+ switch(connectedTable[i][1]){
+ case EdgeDir.DIR_FF:
+ deletedOutgoingEdge.setDir(EdgeDir.DIR_FF);
+ deletedOutgoingEdge.setEdge(outgoingEdge);
+ break;
+ case EdgeDir.DIR_FR:
+ deletedOutgoingEdge.setDir(EdgeDir.DIR_FR);
+ deletedOutgoingEdge.setEdge(outgoingEdge);
+ break;
+ }
+ deletedEdges.add(deletedIncomingEdge);
+ deletedEdges.add(deletedOutgoingEdge);
+ }
+ public void deleteEdgeFromOldVertex(DeletedEdge deleteEdge){
+ switch(deleteEdge.dir){
+ case EdgeDir.DIR_RF:
+ getVertexValue().getRFList().remove(deleteEdge.getEdge());
+ break;
+ case EdgeDir.DIR_RR:
+ getVertexValue().getRRList().remove(deleteEdge.getEdge());
+ break;
+ case EdgeDir.DIR_FF:
+ getVertexValue().getFFList().remove(deleteEdge.getEdge());
+ break;
+ case EdgeDir.DIR_FR:
+ getVertexValue().getFRList().remove(deleteEdge.getEdge());
+ break;
+ }
+ }
+
+ public void setEdgeListAndEdgeDir(int i){
+ switch(connectedTable[i][0]){
+ case EdgeDir.DIR_RF:
+ incomingEdgeList.setCopy(getVertexValue().getRFList());
+ incomingEdgeDir = MessageFlag.DIR_RF;
+ break;
+ case EdgeDir.DIR_RR:
+ incomingEdgeList.setCopy(getVertexValue().getRRList());
+ incomingEdgeDir = MessageFlag.DIR_RR;
+ break;
+ }
+ switch(connectedTable[i][1]){
+ case EdgeDir.DIR_FF:
+ outgoingEdgeList.setCopy(getVertexValue().getFFList());
+ outgoingEdgeDir = MessageFlag.DIR_FF;
+ break;
+ case EdgeDir.DIR_FR:
+ outgoingEdgeList.setCopy(getVertexValue().getFRList());
+ outgoingEdgeDir = MessageFlag.DIR_FR;
+ break;
+ }
+ }
+
+ public void setNeighborEdgeIntersection(VKmerBytesWritable incomingEdge, VKmerBytesWritable outgoingEdge){
+ outgoingReadIdSet.clear();
+ incomingReadIdSet.clear();
+ tmpKmer.setAsCopy(incomingEdge);
+ incomingReadIdSet.addAll(kmerMap.get(tmpKmer));
+ tmpKmer.setAsCopy(outgoingEdge);
+ outgoingReadIdSet.addAll(kmerMap.get(tmpKmer));
+
+ //set all neighberEdge readId intersection
+ neighborEdgeIntersection.addAll(selfReadIdSet);
+ neighborEdgeIntersection.retainAll(incomingReadIdSet);
+ neighborEdgeIntersection.retainAll(outgoingReadIdSet);
+ }
+
+ public void updateEdgeListPointToNewVertex(){
+ byte meToNeighborDir = incomingMsg.getFlag();
+ byte neighborToMeDir = mirrorDirection(meToNeighborDir);
+ switch(neighborToMeDir){
+ case MessageFlag.DIR_FF:
+ getVertexValue().getFFList().remove(incomingMsg.getSourceVertexId());
+ getVertexValue().getFFList().append(incomingMsg.getCreatedVertexId());
+ break;
+ case MessageFlag.DIR_FR:
+ getVertexValue().getFRList().remove(incomingMsg.getSourceVertexId());
+ getVertexValue().getFRList().append(incomingMsg.getCreatedVertexId());
+ break;
+ case MessageFlag.DIR_RF:
+ getVertexValue().getRFList().remove(incomingMsg.getSourceVertexId());
+ getVertexValue().getRFList().append(incomingMsg.getCreatedVertexId());
+ break;
+ case MessageFlag.DIR_RR:
+ getVertexValue().getRRList().remove(incomingMsg.getSourceVertexId());
+ getVertexValue().getRRList().append(incomingMsg.getCreatedVertexId());
+ break;
+ }
+ }
+
+ @Override
+ public void compute(Iterator<MessageWritable> msgIterator) {
+ initVertex();
+ if(getSuperstep() == 1){
+ if(getVertexValue().getDegree() > 2){
+ outgoingMsg.setSourceVertexId(getVertexId());
+ sendMsgToAllNeighborNodes(getVertexValue());
+ }
+ voteToHalt();
+ } else if(getSuperstep() == 2){
+ while(msgIterator.hasNext()){
+ incomingMsg = msgIterator.next();
+ outgoingMsg.setNodeIdList(getVertexValue().getNodeIdList());
+ outgoingMsg.setSourceVertexId(getVertexId());
+ sendMsg(incomingMsg.getSourceVertexId(), outgoingMsg);
+ }
+ voteToHalt();
+ } else if(getSuperstep() == 3){
+ /** generate KmerMap map kmer(key) to readIdSet(value) **/
+ generateKmerMap(msgIterator);
+
+ /** set self readId set **/
+ setSelfReadIdSet();
+
+ //A set storing deleted edges
+ Set<DeletedEdge> deletedEdges = new HashSet<DeletedEdge>();
+ /** process connectedTable **/
+ for(int i = 0; i < 4; i++){
+ /** set edgeList and edgeDir based on connectedTable **/
+ setEdgeListAndEdgeDir(i);
+
+ VKmerBytesWritable incomingEdge = new VKmerBytesWritable();
+ VKmerBytesWritable outgoingEdge = new VKmerBytesWritable();
+ for(int x = 0; x < incomingEdgeList.getCountOfPosition(); x++){
+ for(int y = 0; y < outgoingEdgeList.getCountOfPosition(); y++){
+ incomingEdge.setAsCopy(incomingEdgeList.getPosition(x));
+ outgoingEdge.setAsCopy(outgoingEdgeList.getPosition(y));
+ /** set neighborEdge readId intersection **/
+ setNeighborEdgeIntersection(incomingEdge, outgoingEdge);
+
+ if(!neighborEdgeIntersection.isEmpty()){
+ /** random generate vertexId of new vertex **/
+ randomGenerateVertexId(3);
+
+ /** create new/created vertex **/
+ createNewVertex(i, incomingEdge, outgoingEdge);
+
+ /** send msg to neighbors to update their edges to new vertex **/
+ sendMsgToUpdateEdge(incomingEdge, outgoingEdge);
+
+ /** store deleted edge **/
+ storeDeletedEdge(deletedEdges, i, incomingEdge, outgoingEdge);
+ }
+ }
+ }
+ }
+ /** delete extra edges from old vertex **/
+ for(DeletedEdge deletedEdge : deletedEdges){
+ deleteEdgeFromOldVertex(deletedEdge);
+ }
+
+ /** Old vertex delete or voteToHalt **/
+ if(getVertexValue().getDegree() == 0)//if no any edge, delete
+ deleteVertex(getVertexId());
+ else
+ voteToHalt();
+ } else if(getSuperstep() == 4){
+ while(msgIterator.hasNext()){
+ incomingMsg = msgIterator.next();
+ /** update edgelist to new/created vertex **/
+ updateEdgeListPointToNewVertex();
+ }
+ voteToHalt();
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ PregelixJob job = new PregelixJob(SplitRepeatVertex.class.getSimpleName());
+ job.setVertexClass(SplitRepeatVertex.class);
+ /**
+ * BinaryInput and BinaryOutput
+ */
+ job.setVertexInputFormatClass(InitialGraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ Client.run(args, job);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/tipremove/TipAddVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/tipremove/TipAddVertex.java
new file mode 100644
index 0000000..0738208
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/tipremove/TipAddVertex.java
@@ -0,0 +1,114 @@
+package edu.uci.ics.genomix.pregelix.operator.tipremove;
+
+import java.util.Iterator;
+import org.apache.hadoop.io.NullWritable;
+
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerListWritable;
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.api.util.BspUtils;
+import edu.uci.ics.genomix.pregelix.client.Client;
+import edu.uci.ics.genomix.pregelix.format.GraphCleanInputFormat;
+import edu.uci.ics.genomix.pregelix.format.GraphCleanOutputFormat;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+
+/*
+ * vertexId: BytesWritable
+ * vertexValue: ByteWritable
+ * edgeValue: NullWritable
+ * message: MessageWritable
+ *
+ * DNA:
+ * A: 00
+ * C: 01
+ * G: 10
+ * T: 11
+ *
+ * succeed node
+ * A 00000001 1
+ * G 00000010 2
+ * C 00000100 4
+ * T 00001000 8
+ * precursor node
+ * A 00010000 16
+ * G 00100000 32
+ * C 01000000 64
+ * T 10000000 128
+ *
+ * For example, ONE LINE in input file: 00,01,10 0001,0010,
+ * That means that vertexId is ACG, its succeed node is A and its precursor node is C.
+ * The succeed node and precursor node will be stored in vertexValue and we don't use edgeValue.
+ * The details about message are in edu.uci.ics.pregelix.example.io.MessageWritable.
+ */
+/**
+ * Remove tip or single node when l > constant
+ */
+public class TipAddVertex extends
+ Vertex<VKmerBytesWritable, VertexValueWritable, NullWritable, MessageWritable> {
+ public static final String KMER_SIZE = "TipAddVertex.kmerSize";
+ public static int kmerSize = -1;
+
+ /**
+ * initiate kmerSize, length
+ */
+ public void initVertex() {
+ if (kmerSize == -1)
+ kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
+ }
+
+ /**
+ * create a new vertex point to split node
+ */
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void compute(Iterator<MessageWritable> msgIterator) {
+ initVertex();
+ if(getSuperstep() == 1){
+ if(getVertexId().toString().equals("CTA")){
+ VKmerBytesWritable vertexId = new VKmerBytesWritable(kmerSize);
+ vertexId.setByRead(kmerSize, "AGC".getBytes(), 0);
+ getVertexValue().getRFList().append(vertexId);
+
+ //add tip vertex
+ @SuppressWarnings("rawtypes")
+ Vertex vertex = (Vertex) BspUtils.createVertex(getContext().getConfiguration());
+ vertex.getMsgList().clear();
+ vertex.getEdges().clear();
+
+ VertexValueWritable vertexValue = new VertexValueWritable(kmerSize);
+ /**
+ * set the src vertex id
+ */
+ vertex.setVertexId(vertexId);
+ /**
+ * set the vertex value
+ */
+ VKmerListWritable kmerList = new VKmerListWritable();
+ kmerList.append(getVertexId());
+ vertexValue.setRFList(kmerList);
+ vertexValue.setActualKmer(vertexId);
+ vertex.setVertexValue(vertexValue);
+
+ addVertex(vertexId, vertex);
+ }
+ }
+ voteToHalt();
+ }
+
+ public static void main(String[] args) throws Exception {
+ PregelixJob job = new PregelixJob(TipAddVertex.class.getSimpleName());
+ job.setVertexClass(TipAddVertex.class);
+ /**
+ * BinaryInput and BinaryOutput
+ */
+ job.setVertexInputFormatClass(GraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ Client.run(args, job);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/tipremove/TipRemoveVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/tipremove/TipRemoveVertex.java
new file mode 100644
index 0000000..527fb66
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/tipremove/TipRemoveVertex.java
@@ -0,0 +1,112 @@
+package edu.uci.ics.genomix.pregelix.operator.tipremove;
+
+import java.util.Iterator;
+
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.genomix.pregelix.client.Client;
+import edu.uci.ics.genomix.pregelix.format.GraphCleanInputFormat;
+import edu.uci.ics.genomix.pregelix.format.GraphCleanOutputFormat;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.pregelix.operator.pathmerge.BasicGraphCleanVertex;
+import edu.uci.ics.genomix.pregelix.util.VertexUtil;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+/*
+ * vertexId: BytesWritable
+ * vertexValue: ByteWritable
+ * edgeValue: NullWritable
+ * message: MessageWritable
+ *
+ * DNA:
+ * A: 00
+ * C: 01
+ * G: 10
+ * T: 11
+ *
+ * succeed node
+ * A 00000001 1
+ * G 00000010 2
+ * C 00000100 4
+ * T 00001000 8
+ * precursor node
+ * A 00010000 16
+ * G 00100000 32
+ * C 01000000 64
+ * T 10000000 128
+ *
+ * For example, ONE LINE in input file: 00,01,10 0001,0010,
+ * That means that vertexId is ACG, its succeed node is A and its precursor node is C.
+ * The succeed node and precursor node will be stored in vertexValue and we don't use edgeValue.
+ * The details about message are in edu.uci.ics.pregelix.example.io.MessageWritable.
+ */
+/**
+ * Remove tip or single node when l > constant
+ */
+public class TipRemoveVertex extends
+ BasicGraphCleanVertex {
+ public static final String LENGTH = "TipRemoveVertex.length";
+ private int length = -1;
+
+ /**
+ * initiate kmerSize, length
+ */
+ public void initVertex() {
+ if (kmerSize == -1)
+ kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
+ if(length == -1)
+ length = getContext().getConfiguration().getInt(LENGTH, kmerSize); //kmerSize + 5
+ if(incomingMsg == null)
+ incomingMsg = new MessageWritable(kmerSize);
+ if(outgoingMsg == null)
+ outgoingMsg = new MessageWritable(kmerSize);
+ else
+ outgoingMsg.reset(kmerSize);
+ if(destVertexId == null)
+ destVertexId = new VKmerBytesWritable(kmerSize);
+ }
+
+ @Override
+ public void compute(Iterator<MessageWritable> msgIterator) {
+ initVertex();
+ if(getSuperstep() == 1){
+ if(VertexUtil.isIncomingTipVertex(getVertexValue())){
+ if(getVertexValue().getLengthOfKmer() <= length){
+ sendSettledMsgToPreviousNode();
+ deleteVertex(getVertexId());
+ }
+ }
+ else if(VertexUtil.isOutgoingTipVertex(getVertexValue())){
+ if(getVertexValue().getLengthOfKmer() <= length){
+ sendSettledMsgToNextNode();
+ deleteVertex(getVertexId());
+ }
+ }
+ else if(VertexUtil.isSingleVertex(getVertexValue())){
+ if(getVertexValue().getLengthOfKmer() <= length)
+ deleteVertex(getVertexId());
+ }
+ }
+ else if(getSuperstep() == 2){
+ while(msgIterator.hasNext()){
+ incomingMsg = msgIterator.next();
+ responseToDeadVertex();
+ }
+ }
+ voteToHalt();
+ }
+
+ public static void main(String[] args) throws Exception {
+ PregelixJob job = new PregelixJob(TipRemoveVertex.class.getSimpleName());
+ job.setVertexClass(TipRemoveVertex.class);
+ /**
+ * BinaryInput and BinaryOutput
+ */
+ job.setVertexInputFormatClass(GraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ Client.run(args, job);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateTextFile.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateTextFile.java
new file mode 100644
index 0000000..b3a3d37
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateTextFile.java
@@ -0,0 +1,125 @@
+package edu.uci.ics.genomix.pregelix.sequencefile;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.FilenameFilter;
+import java.io.IOException;
+
+import org.apache.commons.io.filefilter.WildcardFileFilter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile;
+
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable.State;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+public class GenerateTextFile {
+
+ public static void generateFromPathmergeResult(int kmerSize, String strSrcDir, String outPutDir) throws IOException {
+ Configuration conf = new Configuration();
+ FileSystem fileSys = FileSystem.getLocal(conf);
+
+ fileSys.create(new Path(outPutDir));
+ BufferedWriter bw = new BufferedWriter(new FileWriter(outPutDir));
+ File srcPath = new File(strSrcDir);
+ for (File f : srcPath.listFiles((FilenameFilter) (new WildcardFileFilter("part*")))) {
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, new Path(f.getAbsolutePath()), conf);
+ VKmerBytesWritable key = new VKmerBytesWritable();
+ VertexValueWritable value = new VertexValueWritable();
+
+ while (reader.next(key, value)) {
+ if (key == null) {
+ break;
+ }
+ bw.write(key.toString() + "\t" + value.toString());
+ System.out.println(key.toString());
+ bw.newLine();
+ }
+ reader.close();
+ }
+ bw.close();
+ }
+
+ public static void generateSpecificLengthChainFromNaivePathmergeResult(int maxLength) throws IOException {
+ BufferedWriter bw = new BufferedWriter(new FileWriter("naive_text_" + maxLength));
+ Configuration conf = new Configuration();
+ FileSystem fileSys = FileSystem.get(conf);
+ for (int i = 0; i < 2; i++) {
+ Path path = new Path("/home/anbangx/genomix_result/final_naive/part-" + i);
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, path, conf);
+ VKmerBytesWritable key = new VKmerBytesWritable();
+ VertexValueWritable value = new VertexValueWritable();
+
+ while (reader.next(key, value)) {
+ if (key == null || value == null) {
+ break;
+ }
+ if (value.getLengthOfKmer() != -1 && value.getLengthOfKmer() <= maxLength) {
+ bw.write(value.toString());
+ bw.newLine();
+ }
+ }
+ reader.close();
+ }
+ bw.close();
+ }
+
+ public static void generateSpecificLengthChainFromLogPathmergeResult(int maxLength) throws IOException {
+ BufferedWriter bw = new BufferedWriter(new FileWriter("log_text_" + maxLength));
+ Configuration conf = new Configuration();
+ FileSystem fileSys = FileSystem.get(conf);
+ for (int i = 0; i < 2; i++) {
+ Path path = new Path("/home/anbangx/genomix_result/improvelog2/part-" + i);
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, path, conf);
+ VKmerBytesWritable key = new VKmerBytesWritable();
+ VertexValueWritable value = new VertexValueWritable();
+
+ while (reader.next(key, value)) {
+ if (key == null || value == null) {
+ break;
+ }
+ if (value.getLengthOfKmer() != -1 && value.getLengthOfKmer() <= maxLength
+ && value.getState() == State.IS_FINAL) {
+ bw.write(key.toString() + "\t" + value.toString());
+ bw.newLine();
+ }
+ }
+ reader.close();
+ }
+ bw.close();
+ }
+
+ public static void generateFromGraphbuildResult() throws IOException {
+ BufferedWriter bw = new BufferedWriter(new FileWriter("textfile"));
+ Configuration conf = new Configuration();
+ FileSystem fileSys = FileSystem.get(conf);
+ Path path = new Path("data/input/part-0-out-3000000");
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, path, conf);
+ VKmerBytesWritable key = new VKmerBytesWritable();
+
+ while (reader.next(key, null)) {
+ if (key == null) {
+ break;
+ }
+ bw.write(key.toString());
+ bw.newLine();
+ }
+ reader.close();
+ bw.close();
+ }
+
+ /**
+ * @param args
+ * @throws IOException
+ */
+ public static void main(String[] args) throws IOException {
+ //generateFromPathmergeResult();
+ //generateFromGraphbuildResult();
+ //generateSpecificLengthChainFromPathmergeResult(68);
+ //generateSpecificLengthChainFromLogPathmergeResult(68);
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/testcase/GenerateTestInput.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/testcase/GenerateTestInput.java
new file mode 100644
index 0000000..f30512c
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/testcase/GenerateTestInput.java
@@ -0,0 +1,80 @@
+package edu.uci.ics.genomix.pregelix.testcase;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+
+public class GenerateTestInput {
+
+ /**
+ * Simple Path
+ */
+ public static String simplePath(int k, int length, int numLines) {
+ RandomString rs = new RandomString(k, length);
+ String output = "";
+ for (int i = 0; i < numLines; i++)
+ output += rs.nextString(0) + "\r\n";
+ return output;
+ }
+
+ /**
+ * Tree Path
+ */
+ public static String treePath(int k, int x, int y, int z) {
+ RandomString rs = new RandomString(k, x + y + k - 1);
+ String s1 = rs.nextString(0);
+ rs.setLength(x + y + z + k - 1);
+ rs.addString(s1.substring(0, x));
+ String s2 = rs.nextString(x);
+ rs.setLength(x + y + z + k - 1);
+ rs.addString(s2.substring(0, x + y));
+ String s3 = rs.nextString(x + y);
+ return s1 + "\r\n" + s2 + "\r\n" + s3;
+ }
+
+ /**
+ * Cycle Path
+ */
+ public static String cyclePath(int k, int length) {
+ RandomString rs = new RandomString(k, length);
+ String s1 = rs.nextString(0);
+ String s2 = s1 + s1.substring(1, k + 1);
+ return s2;
+ }
+
+ /**
+ * Bridge Path
+ */
+ public static String bridgePath(int k, int x) {
+ RandomString rs = new RandomString(k, x + k + 2 + k - 1);
+ String s1 = rs.nextString(0);
+ rs.setLength(x + k + 2);
+ rs.addString(s1.substring(0, k + 2));
+ String s2 = rs.nextString(k + 2) + s1.substring(x + k + 2, x + k + 2 + k - 1);
+ return s1 + "\r\n" + s2;
+ }
+
+ public static void main(String[] args) {
+ // TODO Auto-generated method stub
+ OutputStreamWriter writer;
+ try {
+ writer = new OutputStreamWriter(new FileOutputStream("graph/7/SinglePath"));
+ writer.write(simplePath(7, 10, 1));
+ writer.close();
+ writer = new OutputStreamWriter(new FileOutputStream("graph/7/SimplePath"));
+ writer.write(simplePath(7, 10, 3));
+ writer.close();
+ writer = new OutputStreamWriter(new FileOutputStream("graph/7/TreePath"));
+ writer.write(treePath(7, 7, 7, 7));
+ writer.close();
+ writer = new OutputStreamWriter(new FileOutputStream("graph/7/CyclePath"));
+ writer.write(cyclePath(7, 10));
+ writer.close();
+ writer = new OutputStreamWriter(new FileOutputStream("graph/7/BridgePath"));
+ writer.write(bridgePath(7, 2));
+ writer.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/testcase/RandomString.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/testcase/RandomString.java
new file mode 100644
index 0000000..cd83171
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/testcase/RandomString.java
@@ -0,0 +1,59 @@
+package edu.uci.ics.genomix.pregelix.testcase;
+
+import java.util.ArrayList;
+import java.util.Random;
+
+public class RandomString {
+
+ private static final char[] symbols = new char[4];
+
+ static {
+ symbols[0] = 'A';
+ symbols[1] = 'C';
+ symbols[2] = 'G';
+ symbols[3] = 'T';
+ }
+
+ private final Random random = new Random();
+
+ private char[] buf;
+
+ private ArrayList<String> existKmer = new ArrayList<String>();;
+
+ private int k;
+
+ public RandomString(int k, int length) {
+ if (length < 1)
+ throw new IllegalArgumentException("length < 1: " + length);
+ buf = new char[length];
+ this.k = k;
+ }
+
+ public String nextString(int startIdx) {
+ String tmp = "";
+ for (int idx = startIdx; idx < buf.length;) {
+ buf[idx] = symbols[random.nextInt(4)];
+ if (idx >= k - 1) {
+ tmp = new String(buf, idx - k + 1, k);
+ if (!existKmer.contains(tmp)) {
+ existKmer.add(tmp);
+ idx++;
+ }
+ } else
+ idx++;
+ }
+
+ return new String(buf);
+ }
+
+ public void setLength(int length) {
+ buf = new char[length];
+ }
+
+ public void addString(String s) {
+ char[] tmp = s.toCharArray();
+ for (int i = 0; i < tmp.length; i++)
+ buf[i] = tmp[i];
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/AdjMessage.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/AdjMessage.java
new file mode 100644
index 0000000..ca8d795
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/AdjMessage.java
@@ -0,0 +1,45 @@
+package edu.uci.ics.genomix.pregelix.type;
+
+public class AdjMessage {
+ public static final byte FROMFF = 0;
+ public static final byte FROMFR = 1;
+ public static final byte FROMRF = 2;
+ public static final byte FROMRR = 3;
+ public static final byte NON = 4;
+ public static final byte UNCHANGE = 5;
+ public static final byte MERGE = 6;
+ public static final byte KILL = 7;
+
+ public final static class ADJMESSAGE_CONTENT {
+ public static String getContentFromCode(byte code) {
+ String r = "";
+ switch (code) {
+ case FROMFF:
+ r = "FROMFF";
+ break;
+ case FROMFR:
+ r = "FROMFR";
+ break;
+ case FROMRF:
+ r = "FROMRF";
+ break;
+ case FROMRR:
+ r = "FROMRR";
+ break;
+ case NON:
+ r = "NON";
+ break;
+ case UNCHANGE:
+ r = "UNCHANGE";
+ break;
+ case MERGE:
+ r = "MERGE";
+ break;
+ case KILL:
+ r = "KILL";
+ break;
+ }
+ return r;
+ }
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/CheckMessage.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/CheckMessage.java
new file mode 100644
index 0000000..4b32a51
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/CheckMessage.java
@@ -0,0 +1,43 @@
+package edu.uci.ics.genomix.pregelix.type;
+
+public class CheckMessage {
+
+ public static final byte SOURCE = 1 << 0;
+ public static final byte ACUTUALKMER = 1 << 1;
+ public static final byte NEIGHBER = 1 << 2;
+ public static final byte MESSAGE = 1 << 3;
+ public static final byte NODEIDLIST = 1 << 4;
+ public static final byte ADJMSG = 1 << 5;
+ public static final byte START = 1 << 6;
+
+ public final static class CheckMessage_CONTENT {
+
+ public static String getContentFromCode(byte code) {
+ String r = "";
+ switch (code) {
+ case SOURCE:
+ r = "SOURCE";
+ break;
+ case ACUTUALKMER:
+ r = "ACUTUALKMER";
+ break;
+ case NEIGHBER:
+ r = "NEIGHBER";
+ break;
+ case MESSAGE:
+ r = "MESSAGE";
+ break;
+ case NODEIDLIST:
+ r = "READID";
+ break;
+ case ADJMSG:
+ r = "ADJMSG";
+ break;
+ case START:
+ r = "START";
+ break;
+ }
+ return r;
+ }
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/DirectionFlag.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/DirectionFlag.java
new file mode 100644
index 0000000..51a73f1
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/DirectionFlag.java
@@ -0,0 +1,15 @@
+package edu.uci.ics.genomix.pregelix.type;
+
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable.State;
+
+public class DirectionFlag extends State {
+ public static final byte DIR_NO = 0b000 << 0;
+ public static final byte DIR_FF = 0b001 << 0;
+ public static final byte DIR_FR = 0b010 << 0;
+ public static final byte DIR_RF = 0b011 << 0;
+ public static final byte DIR_RR = 0b100 << 0;
+ public static final byte DIR_MASK = 0b111 << 0;
+ public static final byte DIR_CLEAR = 0b1111000 << 0;
+
+// public static final byte DIR_FROM_DEADVERTEX = 0b101 << 0;
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/Message.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/Message.java
new file mode 100644
index 0000000..4644383
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/Message.java
@@ -0,0 +1,47 @@
+package edu.uci.ics.genomix.pregelix.type;
+
+public class Message {
+
+ public static final byte NON = 0;
+ public static final byte START = 1;
+ public static final byte END = 2;
+ public static final byte STOP = 3;
+ public static final byte FROMPSEUDOHEAD = 4;
+ public static final byte FROMPSEUDOREAR = 5;
+ public static final byte IN = 6;
+ public static final byte OUT = 7;
+
+ public final static class MESSAGE_CONTENT {
+
+ public static String getContentFromCode(byte code) {
+ String r = "";
+ switch (code) {
+ case NON:
+ r = "NON";
+ break;
+ case START:
+ r = "START";
+ break;
+ case END:
+ r = "END";
+ break;
+ case STOP:
+ r = "STOP";
+ break;
+ case FROMPSEUDOHEAD:
+ r = "FROMPSEUDOHEAD";
+ break;
+ case FROMPSEUDOREAR:
+ r = "FROMPSEUDOREAR";
+ break;
+ case IN:
+ r = "IN";
+ break;
+ case OUT:
+ r = "OUT";
+ break;
+ }
+ return r;
+ }
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/MessageFlag.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/MessageFlag.java
new file mode 100644
index 0000000..3ce6c5a
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/MessageFlag.java
@@ -0,0 +1,11 @@
+package edu.uci.ics.genomix.pregelix.type;
+
+public class MessageFlag extends DirectionFlag {
+
+ public static String getFlagAsString(byte code) {
+ // TODO: allow multiple flags to be set
+ return "ERROR_BAD_MESSAGE";
+ }
+}
+
+
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/MessageFromHead.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/MessageFromHead.java
new file mode 100644
index 0000000..05a2f95
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/MessageFromHead.java
@@ -0,0 +1,17 @@
+package edu.uci.ics.genomix.pregelix.type;
+
+public class MessageFromHead {
+ public static final byte BothMsgsFromHead = 0b0000 << 1;
+ public static final byte BothMsgsFromNonHead = 0b0001 << 1;
+ public static final byte BothMsgsFromOldHead = 0b0010 << 1;
+ public static final byte OneMsgFromHead = 0b0011 << 1;
+ public static final byte OneMsgFromNonHead = 0b0100 << 1;
+ public static final byte OneMsgFromHeadAndOneFromNonHead = 0b0101 << 1;
+ public static final byte OneMsgFromHeadToHead = 0b0110 << 1;
+ public static final byte OneMsgFromOldHeadToNonHead = 0b0111 << 1;
+ public static final byte OneMsgFromOldHeadToHead = 0b1000 << 1;
+ public static final byte OneMsgFromOldHeadAndOneFromHead = 0b1001 << 1;
+ public static final byte NO_MSG = 0b1010 << 1;
+
+ public static final byte NO_INFO = 0 << 0;
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/State2.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/State2.java
new file mode 100644
index 0000000..aa22972
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/type/State2.java
@@ -0,0 +1,47 @@
+package edu.uci.ics.genomix.pregelix.type;
+
+public class State2 {
+
+ public static final byte NON_VERTEX = 0;
+ public static final byte START_VERTEX = 1;
+ public static final byte END_VERTEX = 2;
+ public static final byte START_HALT = 3;
+ public static final byte PSEUDOHEAD = 4;
+ public static final byte PSEUDOREAR = 5;
+ public static final byte FINAL_VERTEX = 6;
+ public static final byte CYCLE = 7;
+
+ public final static class STATE_CONTENT {
+
+ public static String getContentFromCode(byte code) {
+ String r = "";
+ switch (code) {
+ case NON_VERTEX:
+ r = "NON_VERTEX";
+ break;
+ case START_VERTEX:
+ r = "START_VERTEX";
+ break;
+ case END_VERTEX:
+ r = "END_VERTEX";
+ break;
+ case START_HALT:
+ r = "START_HALT";
+ break;
+ case PSEUDOHEAD:
+ r = "PSEUDOHEAD";
+ break;
+ case PSEUDOREAR:
+ r = "PSEUDOREAR";
+ break;
+ case FINAL_VERTEX:
+ r = "FINAL_VERTEX";
+ break;
+ case CYCLE:
+ r = "CYCLE";
+ break;
+ }
+ return r;
+ }
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/util/VertexUtil.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/util/VertexUtil.java
new file mode 100644
index 0000000..e2e7dfb
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/util/VertexUtil.java
@@ -0,0 +1,129 @@
+package edu.uci.ics.genomix.pregelix.util;
+
+import edu.uci.ics.genomix.pregelix.io.AdjacencyListWritable;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+public class VertexUtil {
+ /**
+ * Single Vertex: in-degree = out-degree = 1
+ *
+ * @param vertexValue
+ */
+ public static boolean isPathVertex(VertexValueWritable value) {
+ return value.inDegree() == 1 && value.outDegree() == 1;
+ }
+
+ /**
+ * Head Vertex: out-degree > 0
+ */
+ public static boolean isHead(VertexValueWritable value){
+ return value.outDegree() > 0 && !isPathVertex(value);
+ }
+
+ public static boolean isHeadOrRearVertexWithDegree(VertexValueWritable value){
+ return isHeadVertexWithIndegree(value) || isRearVertexWithOutdegree(value);
+ }
+ /**
+ * Head Vertex: out-degree > 0, and has indegress
+ *
+ * @param vertexValue
+ */
+ public static boolean isHeadVertexWithIndegree(VertexValueWritable value) {
+ return isHead(value) && !isHeadWithoutIndegree(value);
+ }
+
+ /**
+ * Head Vertex without indegree: indegree = 0, outdegree = 1
+ */
+ public static boolean isHeadWithoutIndegree(VertexValueWritable value){
+ return value.inDegree() == 0 && value.outDegree() == 1;
+ }
+
+ /**
+ * Head Vertex: out-degree > 0
+ */
+ public static boolean isRear(VertexValueWritable value){
+ return value.inDegree() > 0 && !isPathVertex(value);
+ }
+
+ /**
+ * Rear Vertex: in-degree > 0, and has outdegree
+ *
+ * @param vertexValue
+ */
+ public static boolean isRearVertexWithOutdegree(VertexValueWritable value) {
+ return isRear(value) && !isRearWithoutOutdegree(value);
+ }
+
+
+ /**
+ * Rear Vertex without outdegree: indegree = 1, outdegree = 0
+ */
+ public static boolean isRearWithoutOutdegree(VertexValueWritable value){
+ return value.inDegree() == 1 && value.outDegree() == 0;
+ }
+
+ /**
+ * check if mergeChain is cycle
+ */
+ public static boolean isCycle(VKmerBytesWritable kmer, VKmerBytesWritable mergeChain, int kmerSize) {
+ String chain = mergeChain.toString().substring(1);
+ return chain.contains(kmer.toString());
+
+ /*subKmer.set(vertexId);
+ for(int istart = 1; istart < mergeChain.getKmerLength() - kmerSize + 1; istart++){
+ byte nextgene = mergeChain.getGeneCodeAtPosition(istart+kmerSize);
+ subKmer.shiftKmerWithNextCode(nextgene);
+ if(subKmer.equals(vertexId))
+ return true;
+ }
+ return false;*/
+ }
+
+ /**
+ * check if vertex is a tip
+ */
+ public static boolean isIncomingTipVertex(VertexValueWritable value){
+ return value.inDegree() == 0 && value.outDegree() == 1;
+ }
+
+ public static boolean isOutgoingTipVertex(VertexValueWritable value){
+ return value.inDegree() == 1 && value.outDegree() == 0;
+ }
+
+ /**
+ * check if vertex is single
+ */
+ public static boolean isSingleVertex(VertexValueWritable value){
+ return value.inDegree() == 0 && value.outDegree() == 0;
+ }
+
+ /**
+ * check if vertex is upbridge
+ */
+ public static boolean isUpBridgeVertex(VertexValueWritable value){
+ return value.inDegree() == 1 && value.outDegree() > 1;
+ }
+
+ /**
+ * check if vertex is downbridge
+ */
+ public static boolean isDownBridgeVertex(VertexValueWritable value){
+ return value.inDegree() > 1 && value.outDegree() == 1;
+ }
+
+ /**
+ * get nodeId from Ad
+ */
+ public static VKmerBytesWritable getNodeIdFromAdjacencyList(AdjacencyListWritable adj){
+ if(adj.getForwardList().getCountOfPosition() > 0)
+ return adj.getForwardList().getPosition(0);
+ else if(adj.getReverseList().getCountOfPosition() > 0)
+ return adj.getReverseList().getPosition(0);
+ else
+ return null;
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java
new file mode 100644
index 0000000..98219d7
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java
@@ -0,0 +1,284 @@
+package edu.uci.ics.genomix.pregelix.JobGen;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import edu.uci.ics.genomix.pregelix.format.GraphCleanInputFormat;
+import edu.uci.ics.genomix.pregelix.format.GraphCleanOutputFormat;
+import edu.uci.ics.genomix.pregelix.format.InitialGraphCleanInputFormat;
+import edu.uci.ics.genomix.pregelix.format.P2PathMergeOutputFormat;
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.pregelix.operator.bridgeremove.BridgeAddVertex;
+import edu.uci.ics.genomix.pregelix.operator.bridgeremove.BridgeRemoveVertex;
+import edu.uci.ics.genomix.pregelix.operator.bubblemerge.BubbleAddVertex;
+import edu.uci.ics.genomix.pregelix.operator.bubblemerge.BubbleMergeVertex;
+import edu.uci.ics.genomix.pregelix.operator.pathmerge.P2ForPathMergeVertex;
+import edu.uci.ics.genomix.pregelix.operator.pathmerge.MapReduceVertex;
+import edu.uci.ics.genomix.pregelix.operator.pathmerge.P4ForPathMergeVertex;
+import edu.uci.ics.genomix.pregelix.operator.removelowcoverage.RemoveLowCoverageVertex;
+import edu.uci.ics.genomix.pregelix.operator.scaffolding.ScaffoldingVertex;
+import edu.uci.ics.genomix.pregelix.operator.splitrepeat.SplitRepeatVertex;
+import edu.uci.ics.genomix.pregelix.operator.tipremove.TipAddVertex;
+import edu.uci.ics.genomix.pregelix.operator.tipremove.TipRemoveVertex;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+
+public class JobGenerator {
+
+ public static String outputBase = "src/test/resources/jobs/";
+
+ private static void generateMapReduceGraphJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(MapReduceVertex.class);
+ job.setVertexInputFormatClass(GraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(P2PathMergeOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ job.getConfiguration().setInt(MapReduceVertex.KMER_SIZE, 3);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
+ private static void genMapReduceGraph() throws IOException {
+ generateMapReduceGraphJob("MapReduceGraph", outputBase + "MapReduceGraph.xml");
+ }
+
+// private static void generateNaiveAlgorithmForMergeGraphJob(String jobName, String outputPath) throws IOException {
+// PregelixJob job = new PregelixJob(jobName);
+// job.setVertexClass(P1ForPathMergeVertex.class);
+// job.setVertexInputFormatClass(NaiveAlgorithmForPathMergeInputFormat.class); //GraphCleanInputFormat
+// job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+// job.setDynamicVertexValueSize(true);
+// job.setOutputKeyClass(PositionWritable.class);
+// job.setOutputValueClass(VertexValueWritable.class);
+// job.getConfiguration().setInt(P1ForPathMergeVertex.KMER_SIZE, 3);
+// job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+// }
+//
+// private static void genNaiveAlgorithmForMergeGraph() throws IOException {
+// generateNaiveAlgorithmForMergeGraphJob("NaiveAlgorithmForMergeGraph", outputBase
+// + "NaiveAlgorithmForMergeGraph.xml");
+// }
+
+ private static void generateLogAlgorithmForMergeGraphJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(P2ForPathMergeVertex.class);
+ job.setVertexInputFormatClass(InitialGraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(P2PathMergeOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ job.getConfiguration().setInt(P2ForPathMergeVertex.KMER_SIZE, 3);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
+ private static void genLogAlgorithmForMergeGraph() throws IOException {
+ generateLogAlgorithmForMergeGraphJob("LogAlgorithmForMergeGraph", outputBase + "LogAlgorithmForMergeGraph.xml");
+ }
+//
+// private static void generateP3ForMergeGraphJob(String jobName, String outputPath) throws IOException {
+// PregelixJob job = new PregelixJob(jobName);
+// job.setVertexClass(P3ForPathMergeVertex.class);
+// job.setVertexInputFormatClass(NaiveAlgorithmForPathMergeInputFormat.class);
+// job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+// job.setDynamicVertexValueSize(true);
+// job.setOutputKeyClass(PositionWritable.class);
+// job.setOutputValueClass(VertexValueWritable.class);
+// job.getConfiguration().setInt(P3ForPathMergeVertex.KMER_SIZE, 3);
+// job.getConfiguration().setFloat(P3ForPathMergeVertex.PSEUDORATE, 0.3f);
+// job.getConfiguration().setInt(P3ForPathMergeVertex.MAXROUND, 2);
+// job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+// }
+//
+// private static void genP3ForMergeGraph() throws IOException {
+// generateP3ForMergeGraphJob("P3ForMergeGraph", outputBase
+// + "P3ForMergeGraph.xml");
+// }
+
+ private static void generateP4ForMergeGraphJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(P4ForPathMergeVertex.class);
+ job.setVertexInputFormatClass(InitialGraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ job.getConfiguration().setInt(P4ForPathMergeVertex.KMER_SIZE, 3);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
+ private static void genP4ForMergeGraph() throws IOException {
+ generateP4ForMergeGraphJob("P4ForMergeGraph", outputBase
+ + "P4ForMergeGraph.xml");
+ }
+
+ private static void generateRemoveLowCoverageGraphJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(RemoveLowCoverageVertex.class);
+ job.setVertexInputFormatClass(GraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ job.getConfiguration().setInt(RemoveLowCoverageVertex.KMER_SIZE, 3);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
+ private static void genRemoveLowCoverageGraph() throws IOException {
+ generateRemoveLowCoverageGraphJob("RemoveLowCoverageGraph", outputBase
+ + "RemoveLowCoverageGraph.xml");
+ }
+
+ private static void generateTipAddGraphJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(TipAddVertex.class);
+ job.setVertexInputFormatClass(InitialGraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ job.getConfiguration().setInt(TipAddVertex.KMER_SIZE, 3);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
+ private static void genTipAddGraph() throws IOException {
+ generateTipAddGraphJob("TipAddGraph", outputBase
+ + "TipAddGraph.xml");
+ }
+
+ private static void generateTipRemoveGraphJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(TipRemoveVertex.class);
+ job.setVertexInputFormatClass(GraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ job.getConfiguration().setInt(TipRemoveVertex.KMER_SIZE, 3);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
+ private static void genTipRemoveGraph() throws IOException {
+ generateTipRemoveGraphJob("TipRemoveGraph", outputBase
+ + "TipRemoveGraph.xml");
+ }
+
+ private static void generateSplitRepeatGraphJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(SplitRepeatVertex.class);
+ job.setVertexInputFormatClass(InitialGraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ job.getConfiguration().setInt(SplitRepeatVertex.KMER_SIZE, 3);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
+ private static void genSplitRepeatGraph() throws IOException {
+ generateSplitRepeatGraphJob("SplitRepeatGraph", outputBase + "SplitRepeatGraph.xml");
+ }
+
+
+ private static void generateBridgeAddGraphJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(BridgeAddVertex.class);
+ job.setVertexInputFormatClass(InitialGraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ job.getConfiguration().setInt(BridgeAddVertex.KMER_SIZE, 3);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
+ private static void genBridgeAddGraph() throws IOException {
+ generateBridgeAddGraphJob("BridgeAddGraph", outputBase
+ + "BridgeAddGraph.xml");
+ }
+
+ private static void generateBridgeRemoveGraphJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(BridgeRemoveVertex.class);
+ job.setVertexInputFormatClass(GraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ job.getConfiguration().setInt(TipRemoveVertex.KMER_SIZE, 3);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
+ private static void genBridgeRemoveGraph() throws IOException {
+ generateBridgeRemoveGraphJob("BridgeRemoveGraph", outputBase
+ + "BridgeRemoveGraph.xml");
+ }
+
+ private static void generateBubbleAddGraphJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(BubbleAddVertex.class);
+ job.setVertexInputFormatClass(InitialGraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ job.getConfiguration().setInt(BubbleAddVertex.KMER_SIZE, 3);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
+ private static void genBubbleAddGraph() throws IOException {
+ generateBubbleAddGraphJob("BubbleAddGraph", outputBase
+ + "BubbleAddGraph.xml");
+ }
+
+ private static void generateBubbleMergeGraphJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(BubbleMergeVertex.class);
+ job.setVertexInputFormatClass(GraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ job.getConfiguration().setInt(BubbleMergeVertex.KMER_SIZE, 3);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
+ private static void genBubbleMergeGraph() throws IOException {
+ generateBubbleMergeGraphJob("BubbleMergeGraph", outputBase
+ + "BubbleMergeGraph.xml");
+ }
+
+ private static void generateScaffoldingGraphJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(ScaffoldingVertex.class);
+ job.setVertexInputFormatClass(GraphCleanInputFormat.class);
+ job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(VKmerBytesWritable.class);
+ job.setOutputValueClass(VertexValueWritable.class);
+ job.getConfiguration().setInt(ScaffoldingVertex.KMER_SIZE, 3);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
+ private static void genScaffoldingGraph() throws IOException {
+ generateScaffoldingGraphJob("ScaffoldingGraph", outputBase
+ + "ScaffoldingGraph.xml");
+ }
+
+ public static void main(String[] args) throws IOException {
+ genMapReduceGraph();
+ genLogAlgorithmForMergeGraph();
+ genP4ForMergeGraph();
+ genRemoveLowCoverageGraph();
+ genTipAddGraph();
+ genTipRemoveGraph();
+ genBridgeAddGraph();
+ genBridgeRemoveGraph();
+ genBubbleAddGraph();
+ genBubbleMergeGraph();
+ genSplitRepeatGraph();
+ genScaffoldingGraph();
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/BasicSmallTestCase.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/BasicSmallTestCase.java
new file mode 100644
index 0000000..70ac94f
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/BasicSmallTestCase.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.pregelix.JobRun;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.pregelix.graph.GenerateGraphViz;
+import edu.uci.ics.genomix.pregelix.sequencefile.GenerateTextFile;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.core.base.IDriver.Plan;
+import edu.uci.ics.pregelix.core.driver.Driver;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+
+public class BasicSmallTestCase extends TestCase {
+ private final PregelixJob job;
+ private final String resultFileDir;
+ private final String textFileDir;
+ private final String graphvizFileDir;
+ private final String jobFile;
+ private final Driver driver = new Driver(this.getClass());
+ private final FileSystem dfs;
+
+ public BasicSmallTestCase(String hadoopConfPath, String jobName, String jobFile, FileSystem dfs,
+ String hdfsInput, String resultFile, String textFile, String graphvizFile) throws Exception {
+ super("test");
+ this.jobFile = jobFile;
+ this.job = new PregelixJob("test");
+ this.job.getConfiguration().addResource(new Path(jobFile));
+ this.job.getConfiguration().addResource(new Path(hadoopConfPath));
+ FileInputFormat.setInputPaths(job, hdfsInput);
+ FileOutputFormat.setOutputPath(job, new Path(hdfsInput + "_result"));
+ job.setJobName(jobName);
+ this.resultFileDir = resultFile;
+ this.textFileDir = textFile;
+ this.graphvizFileDir = graphvizFile;
+
+ this.dfs = dfs;
+ }
+
+ private void waitawhile() throws InterruptedException {
+ synchronized (this) {
+ this.wait(20);
+ }
+ }
+
+ @Test
+ public void test() throws Exception {
+ setUp();
+ Plan[] plans = new Plan[] { Plan.OUTER_JOIN };
+ for (Plan plan : plans) {
+ driver.runJob(job, plan, PregelixHyracksIntegrationUtil.CC_HOST,
+ PregelixHyracksIntegrationUtil.TEST_HYRACKS_CC_CLIENT_PORT, false);
+ }
+ compareResults();
+ tearDown();
+ waitawhile();
+ }
+
+ private void compareResults() throws Exception {
+ dfs.copyToLocalFile(FileOutputFormat.getOutputPath(job), new Path(resultFileDir));
+ GenerateTextFile.generateFromPathmergeResult(3, resultFileDir, textFileDir);
+ GenerateGraphViz.convertGraphCleanOutputToGraphViz(resultFileDir, graphvizFileDir);
+ }
+
+ public String toString() {
+ return jobFile;
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/BridgeAddSmallTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/BridgeAddSmallTestSuite.java
new file mode 100644
index 0000000..c492d1e
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/BridgeAddSmallTestSuite.java
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pregelix.JobRun;
+
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import junit.framework.TestSuite;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+
+@SuppressWarnings("deprecation")
+public class BridgeAddSmallTestSuite extends TestSuite {
+ private static final Logger LOGGER = Logger.getLogger(BridgeAddSmallTestSuite.class.getName());
+
+ public static final String PreFix = "data/AddBridge";
+ public static final String[] TestDir = { PreFix + File.separator
+ + "SimpleTest"};
+ private static final String ACTUAL_RESULT_DIR = "data/actual/bridgeadd";
+ private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
+ private static final String PATH_TO_CLUSTER_STORE = "src/test/resources/cluster/stores.properties";
+ private static final String PATH_TO_CLUSTER_PROPERTIES = "src/test/resources/cluster/cluster.properties";
+ private static final String PATH_TO_JOBS = "src/test/resources/jobs/";
+ private static final String PATH_TO_ONLY = "src/test/resources/only_bridgeadd.txt";
+
+ public static final String HDFS_INPUTPATH = "/PathTestSet";
+
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private MiniDFSCluster dfsCluster;
+
+ private JobConf conf = new JobConf();
+ private int numberOfNC = 2;
+
+ public void setUp() throws Exception {
+ ClusterConfig.setStorePath(PATH_TO_CLUSTER_STORE);
+ ClusterConfig.setClusterPropertiesPath(PATH_TO_CLUSTER_PROPERTIES);
+ cleanupStores();
+ PregelixHyracksIntegrationUtil.init("src/test/resources/topology.xml");
+ LOGGER.info("Hyracks mini-cluster started");
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHDFS();
+ }
+
+ private void startHDFS() throws IOException {
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
+ FileSystem dfs = FileSystem.get(conf);
+
+ for (String testDir : TestDir) {
+ File src = new File(testDir);
+ Path dest = new Path(HDFS_INPUTPATH + File.separator + src.getName());
+ dfs.mkdirs(dest);
+ //src.listFiles()
+ //src.listFiles((FilenameFilter)(new WildcardFileFilter("part*")))
+ for (File f : src.listFiles()) {
+ dfs.copyFromLocalFile(new Path(f.getAbsolutePath()), dest);
+ }
+ }
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanupStores() throws IOException {
+ FileUtils.forceMkdir(new File("teststore"));
+ FileUtils.forceMkdir(new File("build"));
+ FileUtils.cleanDirectory(new File("teststore"));
+ FileUtils.cleanDirectory(new File("build"));
+ }
+
+ /**
+ * cleanup hdfs cluster
+ */
+ private void cleanupHDFS() throws Exception {
+ dfsCluster.shutdown();
+ }
+
+ public void tearDown() throws Exception {
+ PregelixHyracksIntegrationUtil.deinit();
+ LOGGER.info("Hyracks mini-cluster shut down");
+ cleanupHDFS();
+ }
+
+ public static Test suite() throws Exception {
+ List<String> onlys = getFileList(PATH_TO_ONLY);
+ File testData = new File(PATH_TO_JOBS);
+ File[] queries = testData.listFiles();
+ BridgeAddSmallTestSuite testSuite = new BridgeAddSmallTestSuite();
+ testSuite.setUp();
+ boolean onlyEnabled = false;
+ FileSystem dfs = FileSystem.get(testSuite.conf);
+
+ if (onlys.size() > 0) {
+ onlyEnabled = true;
+ }
+
+ for (File qFile : queries) {
+ if (qFile.isFile()) {
+ if (onlyEnabled && !isInList(onlys, qFile.getName())) {
+ continue;
+ } else {
+ for (String testPathStr : TestDir) {
+ File testDir = new File(testPathStr);
+ String resultFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "bin" + File.separator + testDir.getName();
+ String textFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "txt" + File.separator + testDir.getName();
+ String graphvizFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "graphviz" + File.separator + testDir.getName();
+ testSuite.addTest(new BasicSmallTestCase(HADOOP_CONF_PATH, qFile.getName(), qFile
+ .getAbsolutePath().toString(), dfs,
+ HDFS_INPUTPATH + File.separator + testDir.getName(), resultFileName, textFileName, graphvizFileName));
+ }
+ }
+ }
+ }
+ return testSuite;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+ try {
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ // cleanupStores();
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+ tearDown();
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ protected static List<String> getFileList(String ignorePath) throws FileNotFoundException, IOException {
+ BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
+ String s = null;
+ List<String> ignores = new ArrayList<String>();
+ while ((s = reader.readLine()) != null) {
+ ignores.add(s);
+ }
+ reader.close();
+ return ignores;
+ }
+
+ private static String jobExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot);
+ }
+
+ private static boolean isInList(List<String> onlys, String name) {
+ for (String only : onlys)
+ if (name.indexOf(only) >= 0)
+ return true;
+ return false;
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/BridgeRemoveSmallTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/BridgeRemoveSmallTestSuite.java
new file mode 100644
index 0000000..8d22e9a
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/BridgeRemoveSmallTestSuite.java
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pregelix.JobRun;
+
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import junit.framework.TestSuite;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+
+@SuppressWarnings("deprecation")
+public class BridgeRemoveSmallTestSuite extends TestSuite {
+ private static final Logger LOGGER = Logger.getLogger(BridgeRemoveSmallTestSuite.class.getName());
+
+ public static final String PreFix = "data/actual/bridgeadd/BridgeAddGraph/bin";
+ public static final String[] TestDir = { PreFix + File.separator
+ + "SimpleTest"};
+ private static final String ACTUAL_RESULT_DIR = "data/actual/bridgeremove";
+ private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
+ private static final String PATH_TO_CLUSTER_STORE = "src/test/resources/cluster/stores.properties";
+ private static final String PATH_TO_CLUSTER_PROPERTIES = "src/test/resources/cluster/cluster.properties";
+ private static final String PATH_TO_JOBS = "src/test/resources/jobs/";
+ private static final String PATH_TO_ONLY = "src/test/resources/only_bridgeremove.txt";
+
+ public static final String HDFS_INPUTPATH = "/PathTestSet";
+
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private MiniDFSCluster dfsCluster;
+
+ private JobConf conf = new JobConf();
+ private int numberOfNC = 2;
+
+ public void setUp() throws Exception {
+ ClusterConfig.setStorePath(PATH_TO_CLUSTER_STORE);
+ ClusterConfig.setClusterPropertiesPath(PATH_TO_CLUSTER_PROPERTIES);
+ cleanupStores();
+ PregelixHyracksIntegrationUtil.init("src/test/resources/topology.xml");
+ LOGGER.info("Hyracks mini-cluster started");
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHDFS();
+ }
+
+ private void startHDFS() throws IOException {
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
+ FileSystem dfs = FileSystem.get(conf);
+
+ for (String testDir : TestDir) {
+ File src = new File(testDir);
+ Path dest = new Path(HDFS_INPUTPATH + File.separator + src.getName());
+ dfs.mkdirs(dest);
+ //src.listFiles()
+ //src.listFiles((FilenameFilter)(new WildcardFileFilter("part*")))
+ for (File f : src.listFiles()) {
+ dfs.copyFromLocalFile(new Path(f.getAbsolutePath()), dest);
+ }
+ }
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanupStores() throws IOException {
+ FileUtils.forceMkdir(new File("teststore"));
+ FileUtils.forceMkdir(new File("build"));
+ FileUtils.cleanDirectory(new File("teststore"));
+ FileUtils.cleanDirectory(new File("build"));
+ }
+
+ /**
+ * cleanup hdfs cluster
+ */
+ private void cleanupHDFS() throws Exception {
+ dfsCluster.shutdown();
+ }
+
+ public void tearDown() throws Exception {
+ PregelixHyracksIntegrationUtil.deinit();
+ LOGGER.info("Hyracks mini-cluster shut down");
+ cleanupHDFS();
+ }
+
+ public static Test suite() throws Exception {
+ List<String> onlys = getFileList(PATH_TO_ONLY);
+ File testData = new File(PATH_TO_JOBS);
+ File[] queries = testData.listFiles();
+ BridgeRemoveSmallTestSuite testSuite = new BridgeRemoveSmallTestSuite();
+ testSuite.setUp();
+ boolean onlyEnabled = false;
+ FileSystem dfs = FileSystem.get(testSuite.conf);
+
+ if (onlys.size() > 0) {
+ onlyEnabled = true;
+ }
+
+ for (File qFile : queries) {
+ if (qFile.isFile()) {
+ if (onlyEnabled && !isInList(onlys, qFile.getName())) {
+ continue;
+ } else {
+ for (String testPathStr : TestDir) {
+ File testDir = new File(testPathStr);
+ String resultFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "bin" + File.separator + testDir.getName();
+ String textFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "txt" + File.separator + testDir.getName();
+ String graphvizFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "graphviz" + File.separator + testDir.getName();
+ testSuite.addTest(new BasicSmallTestCase(HADOOP_CONF_PATH, qFile.getName(), qFile
+ .getAbsolutePath().toString(), dfs,
+ HDFS_INPUTPATH + File.separator + testDir.getName(), resultFileName, textFileName, graphvizFileName));
+ }
+ }
+ }
+ }
+ return testSuite;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+ try {
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ // cleanupStores();
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+ tearDown();
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ protected static List<String> getFileList(String ignorePath) throws FileNotFoundException, IOException {
+ BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
+ String s = null;
+ List<String> ignores = new ArrayList<String>();
+ while ((s = reader.readLine()) != null) {
+ ignores.add(s);
+ }
+ reader.close();
+ return ignores;
+ }
+
+ private static String jobExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot);
+ }
+
+ private static boolean isInList(List<String> onlys, String name) {
+ for (String only : onlys)
+ if (name.indexOf(only) >= 0)
+ return true;
+ return false;
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/BubbleAddSmallTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/BubbleAddSmallTestSuite.java
new file mode 100644
index 0000000..1e5df13
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/BubbleAddSmallTestSuite.java
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pregelix.JobRun;
+
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import junit.framework.TestSuite;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+
+@SuppressWarnings("deprecation")
+public class BubbleAddSmallTestSuite extends TestSuite {
+ private static final Logger LOGGER = Logger.getLogger(BubbleAddSmallTestSuite.class.getName());
+
+ public static final String PreFix = "data/PathMergeTestSet";
+ public static final String[] TestDir = { PreFix + File.separator
+ + "5"};
+ private static final String ACTUAL_RESULT_DIR = "data/actual/bubbleadd";
+ private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
+ private static final String PATH_TO_CLUSTER_STORE = "src/test/resources/cluster/stores.properties";
+ private static final String PATH_TO_CLUSTER_PROPERTIES = "src/test/resources/cluster/cluster.properties";
+ private static final String PATH_TO_JOBS = "src/test/resources/jobs/";
+ private static final String PATH_TO_ONLY = "src/test/resources/only_bubbleadd.txt";
+
+ public static final String HDFS_INPUTPATH = "/PathTestSet";
+
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private MiniDFSCluster dfsCluster;
+
+ private JobConf conf = new JobConf();
+ private int numberOfNC = 2;
+
+ public void setUp() throws Exception {
+ ClusterConfig.setStorePath(PATH_TO_CLUSTER_STORE);
+ ClusterConfig.setClusterPropertiesPath(PATH_TO_CLUSTER_PROPERTIES);
+ cleanupStores();
+ PregelixHyracksIntegrationUtil.init("src/test/resources/topology.xml");
+ LOGGER.info("Hyracks mini-cluster started");
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHDFS();
+ }
+
+ private void startHDFS() throws IOException {
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
+ FileSystem dfs = FileSystem.get(conf);
+
+ for (String testDir : TestDir) {
+ File src = new File(testDir);
+ Path dest = new Path(HDFS_INPUTPATH + File.separator + src.getName());
+ dfs.mkdirs(dest);
+ //src.listFiles()
+ //src.listFiles((FilenameFilter)(new WildcardFileFilter("part*")))
+ for (File f : src.listFiles()) {
+ dfs.copyFromLocalFile(new Path(f.getAbsolutePath()), dest);
+ }
+ }
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanupStores() throws IOException {
+ FileUtils.forceMkdir(new File("teststore"));
+ FileUtils.forceMkdir(new File("build"));
+ FileUtils.cleanDirectory(new File("teststore"));
+ FileUtils.cleanDirectory(new File("build"));
+ }
+
+ /**
+ * cleanup hdfs cluster
+ */
+ private void cleanupHDFS() throws Exception {
+ dfsCluster.shutdown();
+ }
+
+ public void tearDown() throws Exception {
+ PregelixHyracksIntegrationUtil.deinit();
+ LOGGER.info("Hyracks mini-cluster shut down");
+ cleanupHDFS();
+ }
+
+ public static Test suite() throws Exception {
+ List<String> onlys = getFileList(PATH_TO_ONLY);
+ File testData = new File(PATH_TO_JOBS);
+ File[] queries = testData.listFiles();
+ BubbleAddSmallTestSuite testSuite = new BubbleAddSmallTestSuite();
+ testSuite.setUp();
+ boolean onlyEnabled = false;
+ FileSystem dfs = FileSystem.get(testSuite.conf);
+
+ if (onlys.size() > 0) {
+ onlyEnabled = true;
+ }
+
+ for (File qFile : queries) {
+ if (qFile.isFile()) {
+ if (onlyEnabled && !isInList(onlys, qFile.getName())) {
+ continue;
+ } else {
+ for (String testPathStr : TestDir) {
+ File testDir = new File(testPathStr);
+ String resultFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "bin" + File.separator + testDir.getName();
+ String textFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "txt" + File.separator + testDir.getName();
+ String graphvizFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "graphviz" + File.separator + testDir.getName();
+ testSuite.addTest(new BasicSmallTestCase(HADOOP_CONF_PATH, qFile.getName(), qFile
+ .getAbsolutePath().toString(), dfs,
+ HDFS_INPUTPATH + File.separator + testDir.getName(), resultFileName, textFileName, graphvizFileName));
+ }
+ }
+ }
+ }
+ return testSuite;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+ try {
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ // cleanupStores();
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+ tearDown();
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ protected static List<String> getFileList(String ignorePath) throws FileNotFoundException, IOException {
+ BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
+ String s = null;
+ List<String> ignores = new ArrayList<String>();
+ while ((s = reader.readLine()) != null) {
+ ignores.add(s);
+ }
+ reader.close();
+ return ignores;
+ }
+
+ private static String jobExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot);
+ }
+
+ private static boolean isInList(List<String> onlys, String name) {
+ for (String only : onlys)
+ if (name.indexOf(only) >= 0)
+ return true;
+ return false;
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/BubbleMergeSmallTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/BubbleMergeSmallTestSuite.java
new file mode 100644
index 0000000..21020ce
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/BubbleMergeSmallTestSuite.java
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pregelix.JobRun;
+
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import junit.framework.TestSuite;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+
+@SuppressWarnings("deprecation")
+public class BubbleMergeSmallTestSuite extends TestSuite {
+ private static final Logger LOGGER = Logger.getLogger(BubbleMergeSmallTestSuite.class.getName());
+
+ public static final String PreFix = "data/actual/bubbleadd/BubbleAddGraph/bin";
+ public static final String[] TestDir = { PreFix + File.separator
+ + "5"};
+ private static final String ACTUAL_RESULT_DIR = "data/actual/bubblemerge";
+ private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
+ private static final String PATH_TO_CLUSTER_STORE = "src/test/resources/cluster/stores.properties";
+ private static final String PATH_TO_CLUSTER_PROPERTIES = "src/test/resources/cluster/cluster.properties";
+ private static final String PATH_TO_JOBS = "src/test/resources/jobs/";
+ private static final String PATH_TO_ONLY = "src/test/resources/only_bubblemerge.txt";
+
+ public static final String HDFS_INPUTPATH = "/PathTestSet";
+
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private MiniDFSCluster dfsCluster;
+
+ private JobConf conf = new JobConf();
+ private int numberOfNC = 2;
+
+ public void setUp() throws Exception {
+ ClusterConfig.setStorePath(PATH_TO_CLUSTER_STORE);
+ ClusterConfig.setClusterPropertiesPath(PATH_TO_CLUSTER_PROPERTIES);
+ cleanupStores();
+ PregelixHyracksIntegrationUtil.init("src/test/resources/topology.xml");
+ LOGGER.info("Hyracks mini-cluster started");
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHDFS();
+ }
+
+ private void startHDFS() throws IOException {
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
+ FileSystem dfs = FileSystem.get(conf);
+
+ for (String testDir : TestDir) {
+ File src = new File(testDir);
+ Path dest = new Path(HDFS_INPUTPATH + File.separator + src.getName());
+ dfs.mkdirs(dest);
+ //src.listFiles()
+ //src.listFiles((FilenameFilter)(new WildcardFileFilter("part*")))
+ for (File f : src.listFiles()) {
+ dfs.copyFromLocalFile(new Path(f.getAbsolutePath()), dest);
+ }
+ }
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanupStores() throws IOException {
+ FileUtils.forceMkdir(new File("teststore"));
+ FileUtils.forceMkdir(new File("build"));
+ FileUtils.cleanDirectory(new File("teststore"));
+ FileUtils.cleanDirectory(new File("build"));
+ }
+
+ /**
+ * cleanup hdfs cluster
+ */
+ private void cleanupHDFS() throws Exception {
+ dfsCluster.shutdown();
+ }
+
+ public void tearDown() throws Exception {
+ PregelixHyracksIntegrationUtil.deinit();
+ LOGGER.info("Hyracks mini-cluster shut down");
+ cleanupHDFS();
+ }
+
+ public static Test suite() throws Exception {
+ List<String> onlys = getFileList(PATH_TO_ONLY);
+ File testData = new File(PATH_TO_JOBS);
+ File[] queries = testData.listFiles();
+ BubbleMergeSmallTestSuite testSuite = new BubbleMergeSmallTestSuite();
+ testSuite.setUp();
+ boolean onlyEnabled = false;
+ FileSystem dfs = FileSystem.get(testSuite.conf);
+
+ if (onlys.size() > 0) {
+ onlyEnabled = true;
+ }
+
+ for (File qFile : queries) {
+ if (qFile.isFile()) {
+ if (onlyEnabled && !isInList(onlys, qFile.getName())) {
+ continue;
+ } else {
+ for (String testPathStr : TestDir) {
+ File testDir = new File(testPathStr);
+ String resultFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "bin" + File.separator + testDir.getName();
+ String textFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "txt" + File.separator + testDir.getName();
+ String graphvizFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "graphviz" + File.separator + testDir.getName();
+ testSuite.addTest(new BasicSmallTestCase(HADOOP_CONF_PATH, qFile.getName(), qFile
+ .getAbsolutePath().toString(), dfs,
+ HDFS_INPUTPATH + File.separator + testDir.getName(), resultFileName, textFileName, graphvizFileName));
+ }
+ }
+ }
+ }
+ return testSuite;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+ try {
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ // cleanupStores();
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+ tearDown();
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ protected static List<String> getFileList(String ignorePath) throws FileNotFoundException, IOException {
+ BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
+ String s = null;
+ List<String> ignores = new ArrayList<String>();
+ while ((s = reader.readLine()) != null) {
+ ignores.add(s);
+ }
+ reader.close();
+ return ignores;
+ }
+
+ private static String jobExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot);
+ }
+
+ private static boolean isInList(List<String> onlys, String name) {
+ for (String only : onlys)
+ if (name.indexOf(only) >= 0)
+ return true;
+ return false;
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java
new file mode 100644
index 0000000..7b937c8
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java
@@ -0,0 +1,208 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pregelix.JobRun;
+
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import junit.framework.TestSuite;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+
+@SuppressWarnings("deprecation")
+public class PathMergeSmallTestSuite extends TestSuite {
+ private static final Logger LOGGER = Logger.getLogger(PathMergeSmallTestSuite.class.getName());
+
+ public static final String PreFix = "data/PathMergeTestSet"; //"graphbuildresult";
+ public static final String[] TestDir = { PreFix + File.separator
+ + "2", PreFix + File.separator
+ + "3", PreFix + File.separator
+ + "4", PreFix + File.separator
+ + "5", PreFix + File.separator
+ + "6", PreFix + File.separator
+ + "7", PreFix + File.separator
+ + "8", PreFix + File.separator
+ + "9"};
+ private static final String ACTUAL_RESULT_DIR = "data/actual/pathmerge";
+ private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
+ private static final String PATH_TO_CLUSTER_STORE = "src/test/resources/cluster/stores.properties";
+ private static final String PATH_TO_CLUSTER_PROPERTIES = "src/test/resources/cluster/cluster.properties";
+ private static final String PATH_TO_JOBS = "src/test/resources/jobs/";
+ private static final String PATH_TO_ONLY = "src/test/resources/only_pathmerge.txt";
+
+ public static final String HDFS_INPUTPATH = "/PathTestSet";
+
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private MiniDFSCluster dfsCluster;
+
+ private JobConf conf = new JobConf();
+ private int numberOfNC = 2;
+
+ public void setUp() throws Exception {
+ ClusterConfig.setStorePath(PATH_TO_CLUSTER_STORE);
+ ClusterConfig.setClusterPropertiesPath(PATH_TO_CLUSTER_PROPERTIES);
+ cleanupStores();
+ PregelixHyracksIntegrationUtil.init("src/test/resources/topology.xml");
+ LOGGER.info("Hyracks mini-cluster started");
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHDFS();
+ }
+
+ private void startHDFS() throws IOException {
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
+ FileSystem dfs = FileSystem.get(conf);
+
+ for (String testDir : TestDir) {
+ File src = new File(testDir);
+ Path dest = new Path(HDFS_INPUTPATH + File.separator + src.getName());
+ dfs.mkdirs(dest);
+ //src.listFiles()
+ //src.listFiles((FilenameFilter)(new WildcardFileFilter("part*")))
+ for (File f : src.listFiles()) {
+ dfs.copyFromLocalFile(new Path(f.getAbsolutePath()), dest);
+ }
+ }
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanupStores() throws IOException {
+ FileUtils.forceMkdir(new File("teststore"));
+ FileUtils.forceMkdir(new File("build"));
+ FileUtils.cleanDirectory(new File("teststore"));
+ FileUtils.cleanDirectory(new File("build"));
+ }
+
+ /**
+ * cleanup hdfs cluster
+ */
+ private void cleanupHDFS() throws Exception {
+ dfsCluster.shutdown();
+ }
+
+ public void tearDown() throws Exception {
+ PregelixHyracksIntegrationUtil.deinit();
+ LOGGER.info("Hyracks mini-cluster shut down");
+ cleanupHDFS();
+ }
+
+ public static Test suite() throws Exception {
+ List<String> onlys = getFileList(PATH_TO_ONLY);
+ File testData = new File(PATH_TO_JOBS);
+ File[] queries = testData.listFiles();
+ PathMergeSmallTestSuite testSuite = new PathMergeSmallTestSuite();
+ testSuite.setUp();
+ boolean onlyEnabled = false;
+ FileSystem dfs = FileSystem.get(testSuite.conf);
+
+ if (onlys.size() > 0) {
+ onlyEnabled = true;
+ }
+
+ for (File qFile : queries) {
+ if (qFile.isFile()) {
+ if (onlyEnabled && !isInList(onlys, qFile.getName())) {
+ continue;
+ } else {
+ for (String testPathStr : TestDir) {
+ File testDir = new File(testPathStr);
+ String resultFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "bin" + File.separator + testDir.getName();
+ String textFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "txt" + File.separator + testDir.getName();
+ String graphvizFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "graphviz" + File.separator + testDir.getName();
+ testSuite.addTest(new BasicSmallTestCase(HADOOP_CONF_PATH, qFile.getName(), qFile
+ .getAbsolutePath().toString(), dfs,
+ HDFS_INPUTPATH + File.separator + testDir.getName(), resultFileName, textFileName, graphvizFileName));
+ }
+ }
+ }
+ }
+ return testSuite;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+ try {
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ // cleanupStores();
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+ tearDown();
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ protected static List<String> getFileList(String ignorePath) throws FileNotFoundException, IOException {
+ BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
+ String s = null;
+ List<String> ignores = new ArrayList<String>();
+ while ((s = reader.readLine()) != null) {
+ ignores.add(s);
+ }
+ reader.close();
+ return ignores;
+ }
+
+ private static String jobExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot);
+ }
+
+ private static boolean isInList(List<String> onlys, String name) {
+ for (String only : onlys)
+ if (name.indexOf(only) >= 0)
+ return true;
+ return false;
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/RemoveLowCoverageSmallTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/RemoveLowCoverageSmallTestSuite.java
new file mode 100644
index 0000000..9a7d99e
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/RemoveLowCoverageSmallTestSuite.java
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pregelix.JobRun;
+
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import junit.framework.TestSuite;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+
+@SuppressWarnings("deprecation")
+public class RemoveLowCoverageSmallTestSuite extends TestSuite {
+ private static final Logger LOGGER = Logger.getLogger(RemoveLowCoverageSmallTestSuite.class.getName());
+
+ public static final String PreFix = "data/PathMergeTestSet"; //"graphbuildresult";
+ public static final String[] TestDir = { PreFix + File.separator
+ + "5"};
+ private static final String ACTUAL_RESULT_DIR = "data/actual/removelowcoverage";
+ private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
+ private static final String PATH_TO_CLUSTER_STORE = "src/test/resources/cluster/stores.properties";
+ private static final String PATH_TO_CLUSTER_PROPERTIES = "src/test/resources/cluster/cluster.properties";
+ private static final String PATH_TO_JOBS = "src/test/resources/jobs/";
+ private static final String PATH_TO_ONLY = "src/test/resources/only_removelowcoverage.txt";
+
+ public static final String HDFS_INPUTPATH = "/PathTestSet";
+
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private MiniDFSCluster dfsCluster;
+
+ private JobConf conf = new JobConf();
+ private int numberOfNC = 2;
+
+ public void setUp() throws Exception {
+ ClusterConfig.setStorePath(PATH_TO_CLUSTER_STORE);
+ ClusterConfig.setClusterPropertiesPath(PATH_TO_CLUSTER_PROPERTIES);
+ cleanupStores();
+ PregelixHyracksIntegrationUtil.init("src/test/resources/topology.xml");
+ LOGGER.info("Hyracks mini-cluster started");
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHDFS();
+ }
+
+ private void startHDFS() throws IOException {
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
+ FileSystem dfs = FileSystem.get(conf);
+
+ for (String testDir : TestDir) {
+ File src = new File(testDir);
+ Path dest = new Path(HDFS_INPUTPATH + File.separator + src.getName());
+ dfs.mkdirs(dest);
+ //src.listFiles()
+ //src.listFiles((FilenameFilter)(new WildcardFileFilter("part*")))
+ for (File f : src.listFiles()) {
+ dfs.copyFromLocalFile(new Path(f.getAbsolutePath()), dest);
+ }
+ }
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanupStores() throws IOException {
+ FileUtils.forceMkdir(new File("teststore"));
+ FileUtils.forceMkdir(new File("build"));
+ FileUtils.cleanDirectory(new File("teststore"));
+ FileUtils.cleanDirectory(new File("build"));
+ }
+
+ /**
+ * cleanup hdfs cluster
+ */
+ private void cleanupHDFS() throws Exception {
+ dfsCluster.shutdown();
+ }
+
+ public void tearDown() throws Exception {
+ PregelixHyracksIntegrationUtil.deinit();
+ LOGGER.info("Hyracks mini-cluster shut down");
+ cleanupHDFS();
+ }
+
+ public static Test suite() throws Exception {
+ List<String> onlys = getFileList(PATH_TO_ONLY);
+ File testData = new File(PATH_TO_JOBS);
+ File[] queries = testData.listFiles();
+ RemoveLowCoverageSmallTestSuite testSuite = new RemoveLowCoverageSmallTestSuite();
+ testSuite.setUp();
+ boolean onlyEnabled = false;
+ FileSystem dfs = FileSystem.get(testSuite.conf);
+
+ if (onlys.size() > 0) {
+ onlyEnabled = true;
+ }
+
+ for (File qFile : queries) {
+ if (qFile.isFile()) {
+ if (onlyEnabled && !isInList(onlys, qFile.getName())) {
+ continue;
+ } else {
+ for (String testPathStr : TestDir) {
+ File testDir = new File(testPathStr);
+ String resultFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "bin" + File.separator + testDir.getName();
+ String textFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "txt" + File.separator + testDir.getName();
+ String graphvizFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "graphviz" + File.separator + testDir.getName();
+ testSuite.addTest(new BasicSmallTestCase(HADOOP_CONF_PATH, qFile.getName(), qFile
+ .getAbsolutePath().toString(), dfs,
+ HDFS_INPUTPATH + File.separator + testDir.getName(), resultFileName, textFileName, graphvizFileName));
+ }
+ }
+ }
+ }
+ return testSuite;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+ try {
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ // cleanupStores();
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+ tearDown();
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ protected static List<String> getFileList(String ignorePath) throws FileNotFoundException, IOException {
+ BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
+ String s = null;
+ List<String> ignores = new ArrayList<String>();
+ while ((s = reader.readLine()) != null) {
+ ignores.add(s);
+ }
+ reader.close();
+ return ignores;
+ }
+
+ private static String jobExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot);
+ }
+
+ private static boolean isInList(List<String> onlys, String name) {
+ for (String only : onlys)
+ if (name.indexOf(only) >= 0)
+ return true;
+ return false;
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/ScaffoldingSmallTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/ScaffoldingSmallTestSuite.java
new file mode 100644
index 0000000..c8d12dc
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/ScaffoldingSmallTestSuite.java
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pregelix.JobRun;
+
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import junit.framework.TestSuite;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+
+@SuppressWarnings("deprecation")
+public class ScaffoldingSmallTestSuite extends TestSuite {
+ private static final Logger LOGGER = Logger.getLogger(ScaffoldingSmallTestSuite.class.getName());
+ //P4ForMergeGraph/bin/read
+ public static final String PreFix = "data/actual/pathmerge/P4ForMergeGraph/bin";
+ public static final String[] TestDir = { PreFix + File.separator
+ + "2"};
+ private static final String ACTUAL_RESULT_DIR = "data/actual/scaffolding";
+ private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
+ private static final String PATH_TO_CLUSTER_STORE = "src/test/resources/cluster/stores.properties";
+ private static final String PATH_TO_CLUSTER_PROPERTIES = "src/test/resources/cluster/cluster.properties";
+ private static final String PATH_TO_JOBS = "src/test/resources/jobs/";
+ private static final String PATH_TO_ONLY = "src/test/resources/only_scaffolding.txt";
+
+ public static final String HDFS_INPUTPATH = "/PathTestSet";
+
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private MiniDFSCluster dfsCluster;
+
+ private JobConf conf = new JobConf();
+ private int numberOfNC = 1;
+
+ public void setUp() throws Exception {
+ ClusterConfig.setStorePath(PATH_TO_CLUSTER_STORE);
+ ClusterConfig.setClusterPropertiesPath(PATH_TO_CLUSTER_PROPERTIES);
+ cleanupStores();
+ PregelixHyracksIntegrationUtil.init("src/test/resources/topology.xml");
+ LOGGER.info("Hyracks mini-cluster started");
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHDFS();
+ }
+
+ private void startHDFS() throws IOException {
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
+ FileSystem dfs = FileSystem.get(conf);
+
+ for (String testDir : TestDir) {
+ File src = new File(testDir);
+ Path dest = new Path(HDFS_INPUTPATH + File.separator + src.getName());
+ dfs.mkdirs(dest);
+ //src.listFiles()
+ //src.listFiles((FilenameFilter)(new WildcardFileFilter("part*")))
+ for (File f : src.listFiles()) {
+ dfs.copyFromLocalFile(new Path(f.getAbsolutePath()), dest);
+ }
+ }
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanupStores() throws IOException {
+ FileUtils.forceMkdir(new File("teststore"));
+ FileUtils.forceMkdir(new File("build"));
+ FileUtils.cleanDirectory(new File("teststore"));
+ FileUtils.cleanDirectory(new File("build"));
+ }
+
+ /**
+ * cleanup hdfs cluster
+ */
+ private void cleanupHDFS() throws Exception {
+ dfsCluster.shutdown();
+ }
+
+ public void tearDown() throws Exception {
+ PregelixHyracksIntegrationUtil.deinit();
+ LOGGER.info("Hyracks mini-cluster shut down");
+ cleanupHDFS();
+ }
+
+ public static Test suite() throws Exception {
+ List<String> onlys = getFileList(PATH_TO_ONLY);
+ File testData = new File(PATH_TO_JOBS);
+ File[] queries = testData.listFiles();
+ ScaffoldingSmallTestSuite testSuite = new ScaffoldingSmallTestSuite();
+ testSuite.setUp();
+ boolean onlyEnabled = false;
+ FileSystem dfs = FileSystem.get(testSuite.conf);
+
+ if (onlys.size() > 0) {
+ onlyEnabled = true;
+ }
+
+ for (File qFile : queries) {
+ if (qFile.isFile()) {
+ if (onlyEnabled && !isInList(onlys, qFile.getName())) {
+ continue;
+ } else {
+ for (String testPathStr : TestDir) {
+ File testDir = new File(testPathStr);
+ String resultFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "bin" + File.separator + testDir.getName();
+ String textFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "txt" + File.separator + testDir.getName();
+ String graphvizFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "graphviz" + File.separator + testDir.getName();
+ testSuite.addTest(new BasicSmallTestCase(HADOOP_CONF_PATH, qFile.getName(), qFile
+ .getAbsolutePath().toString(), dfs,
+ HDFS_INPUTPATH + File.separator + testDir.getName(), resultFileName, textFileName, graphvizFileName));
+ }
+ }
+ }
+ }
+ return testSuite;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+ try {
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ // cleanupStores();
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+ tearDown();
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ protected static List<String> getFileList(String ignorePath) throws FileNotFoundException, IOException {
+ BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
+ String s = null;
+ List<String> ignores = new ArrayList<String>();
+ while ((s = reader.readLine()) != null) {
+ ignores.add(s);
+ }
+ reader.close();
+ return ignores;
+ }
+
+ private static String jobExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot);
+ }
+
+ private static boolean isInList(List<String> onlys, String name) {
+ for (String only : onlys)
+ if (name.indexOf(only) >= 0)
+ return true;
+ return false;
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/SplitRepeatSmallTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/SplitRepeatSmallTestSuite.java
new file mode 100644
index 0000000..e3a33fa
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/SplitRepeatSmallTestSuite.java
@@ -0,0 +1,202 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pregelix.JobRun;
+
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import junit.framework.TestSuite;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+
+@SuppressWarnings("deprecation")
+public class SplitRepeatSmallTestSuite extends TestSuite {
+ private static final Logger LOGGER = Logger.getLogger(SplitRepeatSmallTestSuite.class.getName());
+ //P4ForMergeGraph/bin/read
+ public static final String PreFix = "data/SplitRepeat";
+ public static final String[] TestDir = { PreFix + File.separator
+ + "SplitOnce", PreFix + File.separator
+ + "SplitTwice"};
+ private static final String ACTUAL_RESULT_DIR = "data/actual/splitrepeat";
+ private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
+ private static final String PATH_TO_CLUSTER_STORE = "src/test/resources/cluster/stores.properties";
+ private static final String PATH_TO_CLUSTER_PROPERTIES = "src/test/resources/cluster/cluster.properties";
+ private static final String PATH_TO_JOBS = "src/test/resources/jobs/";
+ private static final String PATH_TO_ONLY = "src/test/resources/only_splitrepeat.txt";
+
+ public static final String HDFS_INPUTPATH = "/PathTestSet";
+
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private MiniDFSCluster dfsCluster;
+
+ private JobConf conf = new JobConf();
+ private int numberOfNC = 1;
+
+ public void setUp() throws Exception {
+ ClusterConfig.setStorePath(PATH_TO_CLUSTER_STORE);
+ ClusterConfig.setClusterPropertiesPath(PATH_TO_CLUSTER_PROPERTIES);
+ cleanupStores();
+ PregelixHyracksIntegrationUtil.init("src/test/resources/topology.xml");
+ LOGGER.info("Hyracks mini-cluster started");
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHDFS();
+ }
+
+ private void startHDFS() throws IOException {
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
+ FileSystem dfs = FileSystem.get(conf);
+
+ for (String testDir : TestDir) {
+ File src = new File(testDir);
+ Path dest = new Path(HDFS_INPUTPATH + File.separator + src.getName());
+ dfs.mkdirs(dest);
+ //src.listFiles()
+ //src.listFiles((FilenameFilter)(new WildcardFileFilter("part*")))
+ for (File f : src.listFiles()) {
+ dfs.copyFromLocalFile(new Path(f.getAbsolutePath()), dest);
+ }
+ }
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanupStores() throws IOException {
+ FileUtils.forceMkdir(new File("teststore"));
+ FileUtils.forceMkdir(new File("build"));
+ FileUtils.cleanDirectory(new File("teststore"));
+ FileUtils.cleanDirectory(new File("build"));
+ }
+
+ /**
+ * cleanup hdfs cluster
+ */
+ private void cleanupHDFS() throws Exception {
+ dfsCluster.shutdown();
+ }
+
+ public void tearDown() throws Exception {
+ PregelixHyracksIntegrationUtil.deinit();
+ LOGGER.info("Hyracks mini-cluster shut down");
+ cleanupHDFS();
+ }
+
+ public static Test suite() throws Exception {
+ List<String> onlys = getFileList(PATH_TO_ONLY);
+ File testData = new File(PATH_TO_JOBS);
+ File[] queries = testData.listFiles();
+ SplitRepeatSmallTestSuite testSuite = new SplitRepeatSmallTestSuite();
+ testSuite.setUp();
+ boolean onlyEnabled = false;
+ FileSystem dfs = FileSystem.get(testSuite.conf);
+
+ if (onlys.size() > 0) {
+ onlyEnabled = true;
+ }
+
+ for (File qFile : queries) {
+ if (qFile.isFile()) {
+ if (onlyEnabled && !isInList(onlys, qFile.getName())) {
+ continue;
+ } else {
+ for (String testPathStr : TestDir) {
+ File testDir = new File(testPathStr);
+ String resultFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "bin" + File.separator + testDir.getName();
+ String textFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "txt" + File.separator + testDir.getName();
+ String graphvizFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "graphviz" + File.separator + testDir.getName();
+ testSuite.addTest(new BasicSmallTestCase(HADOOP_CONF_PATH, qFile.getName(), qFile
+ .getAbsolutePath().toString(), dfs,
+ HDFS_INPUTPATH + File.separator + testDir.getName(), resultFileName, textFileName, graphvizFileName));
+ }
+ }
+ }
+ }
+ return testSuite;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+ try {
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ // cleanupStores();
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+ tearDown();
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ protected static List<String> getFileList(String ignorePath) throws FileNotFoundException, IOException {
+ BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
+ String s = null;
+ List<String> ignores = new ArrayList<String>();
+ while ((s = reader.readLine()) != null) {
+ ignores.add(s);
+ }
+ reader.close();
+ return ignores;
+ }
+
+ private static String jobExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot);
+ }
+
+ private static boolean isInList(List<String> onlys, String name) {
+ for (String only : onlys)
+ if (name.indexOf(only) >= 0)
+ return true;
+ return false;
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/TipAddSmallTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/TipAddSmallTestSuite.java
new file mode 100644
index 0000000..b2d02c8
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/TipAddSmallTestSuite.java
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pregelix.JobRun;
+
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import junit.framework.TestSuite;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+
+@SuppressWarnings("deprecation")
+public class TipAddSmallTestSuite extends TestSuite {
+ private static final Logger LOGGER = Logger.getLogger(TipAddSmallTestSuite.class.getName());
+
+ public static final String PreFix = "data/PathMergeTestSet"; //"graphbuildresult";
+ public static final String[] TestDir = { PreFix + File.separator
+ + "5"};
+ private static final String ACTUAL_RESULT_DIR = "data/actual/tipadd";
+ private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
+ private static final String PATH_TO_CLUSTER_STORE = "src/test/resources/cluster/stores.properties";
+ private static final String PATH_TO_CLUSTER_PROPERTIES = "src/test/resources/cluster/cluster.properties";
+ private static final String PATH_TO_JOBS = "src/test/resources/jobs/";
+ private static final String PATH_TO_ONLY = "src/test/resources/only_tipadd.txt";
+
+ public static final String HDFS_INPUTPATH = "/PathTestSet";
+
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private MiniDFSCluster dfsCluster;
+
+ private JobConf conf = new JobConf();
+ private int numberOfNC = 2;
+
+ public void setUp() throws Exception {
+ ClusterConfig.setStorePath(PATH_TO_CLUSTER_STORE);
+ ClusterConfig.setClusterPropertiesPath(PATH_TO_CLUSTER_PROPERTIES);
+ cleanupStores();
+ PregelixHyracksIntegrationUtil.init("src/test/resources/topology.xml");
+ LOGGER.info("Hyracks mini-cluster started");
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHDFS();
+ }
+
+ private void startHDFS() throws IOException {
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
+ FileSystem dfs = FileSystem.get(conf);
+
+ for (String testDir : TestDir) {
+ File src = new File(testDir);
+ Path dest = new Path(HDFS_INPUTPATH + File.separator + src.getName());
+ dfs.mkdirs(dest);
+ //src.listFiles()
+ //src.listFiles((FilenameFilter)(new WildcardFileFilter("part*")))
+ for (File f : src.listFiles()) {
+ dfs.copyFromLocalFile(new Path(f.getAbsolutePath()), dest);
+ }
+ }
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanupStores() throws IOException {
+ FileUtils.forceMkdir(new File("teststore"));
+ FileUtils.forceMkdir(new File("build"));
+ FileUtils.cleanDirectory(new File("teststore"));
+ FileUtils.cleanDirectory(new File("build"));
+ }
+
+ /**
+ * cleanup hdfs cluster
+ */
+ private void cleanupHDFS() throws Exception {
+ dfsCluster.shutdown();
+ }
+
+ public void tearDown() throws Exception {
+ PregelixHyracksIntegrationUtil.deinit();
+ LOGGER.info("Hyracks mini-cluster shut down");
+ cleanupHDFS();
+ }
+
+ public static Test suite() throws Exception {
+ List<String> onlys = getFileList(PATH_TO_ONLY);
+ File testData = new File(PATH_TO_JOBS);
+ File[] queries = testData.listFiles();
+ TipAddSmallTestSuite testSuite = new TipAddSmallTestSuite();
+ testSuite.setUp();
+ boolean onlyEnabled = false;
+ FileSystem dfs = FileSystem.get(testSuite.conf);
+
+ if (onlys.size() > 0) {
+ onlyEnabled = true;
+ }
+
+ for (File qFile : queries) {
+ if (qFile.isFile()) {
+ if (onlyEnabled && !isInList(onlys, qFile.getName())) {
+ continue;
+ } else {
+ for (String testPathStr : TestDir) {
+ File testDir = new File(testPathStr);
+ String resultFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "bin" + File.separator + testDir.getName();
+ String textFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "txt" + File.separator + testDir.getName();
+ String graphvizFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "graphviz" + File.separator + testDir.getName();
+ testSuite.addTest(new BasicSmallTestCase(HADOOP_CONF_PATH, qFile.getName(), qFile
+ .getAbsolutePath().toString(), dfs,
+ HDFS_INPUTPATH + File.separator + testDir.getName(), resultFileName, textFileName, graphvizFileName));
+ }
+ }
+ }
+ }
+ return testSuite;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+ try {
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ // cleanupStores();
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+ tearDown();
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ protected static List<String> getFileList(String ignorePath) throws FileNotFoundException, IOException {
+ BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
+ String s = null;
+ List<String> ignores = new ArrayList<String>();
+ while ((s = reader.readLine()) != null) {
+ ignores.add(s);
+ }
+ reader.close();
+ return ignores;
+ }
+
+ private static String jobExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot);
+ }
+
+ private static boolean isInList(List<String> onlys, String name) {
+ for (String only : onlys)
+ if (name.indexOf(only) >= 0)
+ return true;
+ return false;
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/TipRemoveSmallTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/TipRemoveSmallTestSuite.java
new file mode 100644
index 0000000..b4514d6
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/TipRemoveSmallTestSuite.java
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pregelix.JobRun;
+
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import junit.framework.TestSuite;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+
+@SuppressWarnings("deprecation")
+public class TipRemoveSmallTestSuite extends TestSuite {
+ private static final Logger LOGGER = Logger.getLogger(TipRemoveSmallTestSuite.class.getName());
+ //P4ForMergeGraph/bin/read
+ public static final String PreFix = "data/actual/tipadd/TipAddGraph/bin";
+ public static final String[] TestDir = { PreFix + File.separator
+ + "5"};
+ private static final String ACTUAL_RESULT_DIR = "data/actual/tipremove";
+ private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
+ private static final String PATH_TO_CLUSTER_STORE = "src/test/resources/cluster/stores.properties";
+ private static final String PATH_TO_CLUSTER_PROPERTIES = "src/test/resources/cluster/cluster.properties";
+ private static final String PATH_TO_JOBS = "src/test/resources/jobs/";
+ private static final String PATH_TO_ONLY = "src/test/resources/only_tipremove.txt";
+
+ public static final String HDFS_INPUTPATH = "/PathTestSet";
+
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private MiniDFSCluster dfsCluster;
+
+ private JobConf conf = new JobConf();
+ private int numberOfNC = 2;
+
+ public void setUp() throws Exception {
+ ClusterConfig.setStorePath(PATH_TO_CLUSTER_STORE);
+ ClusterConfig.setClusterPropertiesPath(PATH_TO_CLUSTER_PROPERTIES);
+ cleanupStores();
+ PregelixHyracksIntegrationUtil.init("src/test/resources/topology.xml");
+ LOGGER.info("Hyracks mini-cluster started");
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHDFS();
+ }
+
+ private void startHDFS() throws IOException {
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
+ FileSystem dfs = FileSystem.get(conf);
+
+ for (String testDir : TestDir) {
+ File src = new File(testDir);
+ Path dest = new Path(HDFS_INPUTPATH + File.separator + src.getName());
+ dfs.mkdirs(dest);
+ //src.listFiles()
+ //src.listFiles((FilenameFilter)(new WildcardFileFilter("part*")))
+ for (File f : src.listFiles()) {
+ dfs.copyFromLocalFile(new Path(f.getAbsolutePath()), dest);
+ }
+ }
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanupStores() throws IOException {
+ FileUtils.forceMkdir(new File("teststore"));
+ FileUtils.forceMkdir(new File("build"));
+ FileUtils.cleanDirectory(new File("teststore"));
+ FileUtils.cleanDirectory(new File("build"));
+ }
+
+ /**
+ * cleanup hdfs cluster
+ */
+ private void cleanupHDFS() throws Exception {
+ dfsCluster.shutdown();
+ }
+
+ public void tearDown() throws Exception {
+ PregelixHyracksIntegrationUtil.deinit();
+ LOGGER.info("Hyracks mini-cluster shut down");
+ cleanupHDFS();
+ }
+
+ public static Test suite() throws Exception {
+ List<String> onlys = getFileList(PATH_TO_ONLY);
+ File testData = new File(PATH_TO_JOBS);
+ File[] queries = testData.listFiles();
+ TipRemoveSmallTestSuite testSuite = new TipRemoveSmallTestSuite();
+ testSuite.setUp();
+ boolean onlyEnabled = false;
+ FileSystem dfs = FileSystem.get(testSuite.conf);
+
+ if (onlys.size() > 0) {
+ onlyEnabled = true;
+ }
+
+ for (File qFile : queries) {
+ if (qFile.isFile()) {
+ if (onlyEnabled && !isInList(onlys, qFile.getName())) {
+ continue;
+ } else {
+ for (String testPathStr : TestDir) {
+ File testDir = new File(testPathStr);
+ String resultFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "bin" + File.separator + testDir.getName();
+ String textFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "txt" + File.separator + testDir.getName();
+ String graphvizFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
+ + File.separator + "graphviz" + File.separator + testDir.getName();
+ testSuite.addTest(new BasicSmallTestCase(HADOOP_CONF_PATH, qFile.getName(), qFile
+ .getAbsolutePath().toString(), dfs,
+ HDFS_INPUTPATH + File.separator + testDir.getName(), resultFileName, textFileName, graphvizFileName));
+ }
+ }
+ }
+ }
+ return testSuite;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+ try {
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ // cleanupStores();
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+ tearDown();
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ protected static List<String> getFileList(String ignorePath) throws FileNotFoundException, IOException {
+ BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
+ String s = null;
+ List<String> ignores = new ArrayList<String>();
+ while ((s = reader.readLine()) != null) {
+ ignores.add(s);
+ }
+ reader.close();
+ return ignores;
+ }
+
+ private static String jobExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot);
+ }
+
+ private static boolean isInList(List<String> onlys, String name) {
+ for (String only : onlys)
+ if (name.indexOf(only) >= 0)
+ return true;
+ return false;
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/ResultGen/ReportGenerator.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/ResultGen/ReportGenerator.java
new file mode 100644
index 0000000..f5f42fc
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/ResultGen/ReportGenerator.java
@@ -0,0 +1,53 @@
+package edu.uci.ics.genomix.pregelix.ResultGen;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.text.DecimalFormat;
+
+import org.apache.commons.io.FileUtils;
+
+public class ReportGenerator {
+ public static final String PATH_TO_REPORT = "report";
+ public static final String PATH_TO_LOGINFO = "log";
+
+ public static void generateReportFromLoginfo(String fileName) throws Exception {
+ DecimalFormat df = new DecimalFormat("0.0000");
+ BufferedReader br = new BufferedReader(new FileReader(PATH_TO_LOGINFO + "/" + fileName));
+ BufferedWriter bw = new BufferedWriter(new FileWriter(new File(PATH_TO_REPORT + "/" + fileName)));
+ String line;
+ int i = 0;
+ double totalTime = 0;
+ line = br.readLine();
+ do {
+ line = br.readLine();
+ String[] tokens = line.split(" ");
+ for (i = 1; i < tokens.length - 1; i++) {
+ bw.write(tokens[i] + " ");
+ }
+ String subString = tokens[i].substring(0, tokens[i].length() - 2);
+ double ms = Double.parseDouble(subString) / 60000;
+ totalTime += ms;
+ bw.write(df.format(ms) + "m");
+ bw.newLine();
+ } while ((line = br.readLine()) != null);
+ bw.write("The total time is " + df.format(totalTime) + "m");
+ bw.close();
+ br.close();
+ }
+
+ public static void main(String[] args) throws Exception {
+ FileUtils.forceMkdir(new File(PATH_TO_REPORT));
+ FileUtils.cleanDirectory(new File(PATH_TO_REPORT));
+ generateReportFromLoginfo("log_nc4");
+ generateReportFromLoginfo("log_nc8");
+ generateReportFromLoginfo("naive_nc4");
+ generateReportFromLoginfo("naive_nc4_vertex16");
+ generateReportFromLoginfo("log_nc4_vertex16");
+ generateReportFromLoginfo("naive_nc8_outerjoin");
+ generateReportFromLoginfo("naive_nc8_outerjoin_2");
+ generateReportFromLoginfo("naive_nc8_innerjoin");
+ }
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/example/util/TestUtils.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/example/util/TestUtils.java
new file mode 100644
index 0000000..8ac1b09
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/example/util/TestUtils.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pregelix.example.util;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+
+public class TestUtils {
+
+ public static void compareWithResultDir(File expectedFileDir, File actualFileDir) throws Exception {
+ String[] fileNames = expectedFileDir.list();
+ for (String fileName : fileNames) {
+ compareWithResult(new File(expectedFileDir, fileName), new File(actualFileDir, fileName));
+ }
+ }
+
+ public static void compareWithResult(File expectedFile, File actualFile) throws Exception {
+ BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
+ BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
+ String lineExpected, lineActual;
+ int num = 1;
+ try {
+ while ((lineExpected = readerExpected.readLine()) != null) {
+ lineActual = readerActual.readLine();
+ if (lineActual == null) {
+ throw new Exception("Actual result changed at line " + num + ":\n< " + lineExpected + "\n> ");
+ }
+ if (!equalStrings(lineExpected, lineActual)) {
+ throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
+ + lineActual);
+ }
+ ++num;
+ }
+ lineActual = readerActual.readLine();
+ if (lineActual != null) {
+ throw new Exception("Actual result changed at line " + num + ":\n< \n> " + lineActual);
+ }
+ } finally {
+ readerExpected.close();
+ readerActual.close();
+ }
+ }
+
+ private static boolean equalStrings(String s1, String s2) {
+ String[] rowsOne = s1.split("\n");
+ String[] rowsTwo = s2.split("\n");
+
+ if (rowsOne.length != rowsTwo.length)
+ return false;
+
+ for (int i = 0; i < rowsOne.length; i++) {
+ String row1 = rowsOne[i];
+ String row2 = rowsTwo[i];
+
+ if (row1.equals(row2))
+ continue;
+
+ boolean spaceOrTab = false;
+ spaceOrTab = row1.contains(" ");
+ String[] fields1 = spaceOrTab ? row1.split(" ") : row1.split("\t");
+ String[] fields2 = spaceOrTab ? row2.split(" ") : row2.split("\t");
+
+ for (int j = 0; j < fields1.length; j++) {
+ if (fields1[j].equals(fields2[j])) {
+ continue;
+ } else if (fields1[j].indexOf('.') < 0) {
+ return false;
+ } else {
+ Double double1 = Double.parseDouble(fields1[j]);
+ Double double2 = Double.parseDouble(fields2[j]);
+ float float1 = (float) double1.doubleValue();
+ float float2 = (float) double2.doubleValue();
+
+ if (Math.abs(float1 - float2) < 1.0e-7)
+ continue;
+ else {
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/pathmerge/CompareTest.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/pathmerge/CompareTest.java
new file mode 100644
index 0000000..0a2ae92
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/pathmerge/CompareTest.java
@@ -0,0 +1,19 @@
+package edu.uci.ics.genomix.pregelix.pathmerge;
+
+import java.io.File;
+
+import org.junit.Test;
+
+import edu.uci.ics.genomix.pregelix.example.util.TestUtils;
+
+public class CompareTest {
+ public static final String PATH_TO_TESTSTORE = "testcase/pathmerge";
+ public static final String CHAIN_OUTPUT = PATH_TO_TESTSTORE + "chain";
+
+ @Test
+ public void test() throws Exception {
+ File naive = new File(CHAIN_OUTPUT + "/naive-sort");
+ File log = new File(CHAIN_OUTPUT + "/log-sort");
+ TestUtils.compareWithResult(naive, log);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/pathmerge/MergePathTest.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/pathmerge/MergePathTest.java
new file mode 100644
index 0000000..9daeaa3
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/pathmerge/MergePathTest.java
@@ -0,0 +1,134 @@
+package edu.uci.ics.genomix.pregelix.pathmerge;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
+import edu.uci.ics.genomix.pregelix.type.State2;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+public class MergePathTest {
+ public static final String PATH_TO_TESTSTORE = "testcase/pathmerge/";
+ public static final String NAIVE_DATA_INPUT = "genomix_result/p1_nc4_16vertex";
+ public static final String LOG_DATA_INPUT = "genomix_result/p2_nc4_16vertex";
+ public static final String TEXT_OUTPUT = PATH_TO_TESTSTORE + "textfile";
+ public static final String CHAIN_OUTPUT = PATH_TO_TESTSTORE + "chain";
+
+ private static int nc = 4;
+ private static int kmerSize = 55;
+
+ //private static int maxLength = 102;
+
+ @Test
+ public void test() throws Exception {
+ FileUtils.forceMkdir(new File(PATH_TO_TESTSTORE));
+ FileUtils.cleanDirectory(new File(PATH_TO_TESTSTORE));
+ FileUtils.forceMkdir(new File(TEXT_OUTPUT));
+ FileUtils.cleanDirectory(new File(TEXT_OUTPUT));
+ FileUtils.forceMkdir(new File(CHAIN_OUTPUT));
+ FileUtils.cleanDirectory(new File(CHAIN_OUTPUT));
+ generateTextFromPathmergeResult(NAIVE_DATA_INPUT, TEXT_OUTPUT, "/naive");
+ generateTextFromPathmergeResult(LOG_DATA_INPUT, TEXT_OUTPUT, "/log");
+ //generateSpecificLengthChainFromNaivePathmergeResult(NAIVE_DATA_INPUT, CHAIN_OUTPUT, maxLength);
+ //generateSpecificLengthChainFromLogPathmergeResult(LOG_DATA_INPUT, CHAIN_OUTPUT, maxLength);
+ }
+
+ public static void generateTextFromPathmergeResult(String input, String outputDir, String fileName)
+ throws IOException {
+ BufferedWriter bw = new BufferedWriter(new FileWriter(new File(outputDir + fileName)));
+ Configuration conf = new Configuration();
+ FileSystem fileSys = FileSystem.get(conf);
+ for (int i = 0; i < nc; i++) {
+ Path path = new Path(input + "/part-" + i);
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, path, conf);
+ VKmerBytesWritable key = new VKmerBytesWritable(kmerSize);
+ VertexValueWritable value = new VertexValueWritable();
+
+ while (reader.next(key, value)) {
+ if (key == null || value == null) {
+ break;
+ }
+ if (value.getState() == State2.FINAL_VERTEX) {
+ /*bw.write(value.getMergeChain().toString()
+ + "\t" + GeneCode.getSymbolFromBitMap(value.getAdjMap()));
+ bw.newLine();*/
+ bw.write(key.toString() + "\t" + value.toString());
+ bw.newLine();
+ }
+ //if(value.getLengthOfMergeChain() != 0
+ // && value.getLengthOfMergeChain() != -1
+ // && value.getState() == State.FINAL_VERTEX){
+ //bw.write(key.toString() + "\t" +
+ // value.toString());
+ //bw.write(value.getLengthOfMergeChain() + "\t" +
+ // value.getMergeChain().toString() + "\t" +
+ // GeneCode.getSymbolFromBitMap(value.getAdjMap()) + "\t" +
+ // key.toString());
+ //value.getState());
+
+ //}
+ }
+ reader.close();
+ }
+ bw.close();
+ }
+
+ public static void generateSpecificLengthChainFromNaivePathmergeResult(String input, String output, int maxLength)
+ throws IOException {
+ BufferedWriter bw = new BufferedWriter(new FileWriter(new File(output + "/naive")));
+ Configuration conf = new Configuration();
+ FileSystem fileSys = FileSystem.get(conf);
+ for (int i = 0; i < nc; i++) {
+ Path path = new Path(input + "/part-" + i);
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, path, conf);
+ VKmerBytesWritable key = new VKmerBytesWritable(kmerSize);
+ VertexValueWritable value = new VertexValueWritable();
+
+ while (reader.next(key, value)) {
+ if (key == null || value == null) {
+ break;
+ }
+ /*if (value.getLengthOfMergeChain() <= maxLength && value.getLengthOfMergeChain() != kmerSize) {
+ bw.write(value.getLengthOfMergeChain() + "\t" + value.getMergeChain().toString());
+ bw.newLine();
+ }*/
+ }
+ reader.close();
+ }
+ bw.close();
+ }
+
+ public static void generateSpecificLengthChainFromLogPathmergeResult(String input, String output, int maxLength)
+ throws IOException {
+ BufferedWriter bw = new BufferedWriter(new FileWriter(new File(output + "/log")));
+ Configuration conf = new Configuration();
+ FileSystem fileSys = FileSystem.get(conf);
+ for (int i = 0; i < nc; i++) {
+ Path path = new Path(input + "/part-" + i);
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, path, conf);
+ VKmerBytesWritable key = new VKmerBytesWritable(kmerSize);
+ VertexValueWritable value = new VertexValueWritable();
+
+ while (reader.next(key, value)) {
+ if (key == null || value == null) {
+ break;
+ }
+ /* if (value.getLengthOfMergeChain() <= maxLength && value.getState() == State.FINAL_VERTEX) {
+ bw.write(value.getLengthOfMergeChain() + "\t" + value.getMergeChain().toString());
+ bw.newLine();
+ }*/
+ }
+ reader.close();
+ }
+ bw.close();
+ }
+}
diff --git a/genomix/genomix-pregelix/src/test/resources/cluster/cluster.properties b/genomix/genomix-pregelix/src/test/resources/cluster/cluster.properties
new file mode 100644
index 0000000..0c6abd1
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/cluster/cluster.properties
@@ -0,0 +1,37 @@
+#The CC port for Hyracks clients
+CC_CLIENTPORT=3099
+
+#The CC port for Hyracks cluster management
+CC_CLUSTERPORT=1099
+
+#The directory of hyracks binaries
+HYRACKS_HOME=../../../../hyracks
+
+#The tmp directory for cc to install jars
+CCTMP_DIR=/tmp/t1
+
+#The tmp directory for nc to install jars
+NCTMP_DIR=/tmp/t2
+
+#The directory to put cc logs
+CCLOGS_DIR=$CCTMP_DIR/logs
+
+#The directory to put nc logs
+NCLOGS_DIR=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS="/tmp/t3,/tmp/t4,/tmp/t5,/tmp/t6"
+
+#The JAVA_HOME
+JAVA_HOME=$JAVA_HOME
+
+#The frame size of the internal dataflow engine
+FRAME_SIZE=131072
+
+#CC JAVA_OPTS
+CCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7001,server=y,suspend=n -Xmx3g -Djava.util.logging.config.file=logging.properties"
+# Yourkit option: -agentpath:/grid/0/dev/vborkar/tools/yjp-10.0.4/bin/linux-x86-64/libyjpagent.so=port=20001"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7002,server=y,suspend=n -Xmx3g -Djava.util.logging.config.file=genomix-pregelix/src/test/resources/logging.properties"
+
diff --git a/genomix/genomix-pregelix/src/test/resources/cluster/stores.properties b/genomix/genomix-pregelix/src/test/resources/cluster/stores.properties
new file mode 100644
index 0000000..2daf1ee
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/cluster/stores.properties
@@ -0,0 +1 @@
+store=teststore1,teststore2,teststore3,teststore4,
\ No newline at end of file
diff --git a/genomix/genomix-pregelix/src/test/resources/data/webmap/text.txt b/genomix/genomix-pregelix/src/test/resources/data/webmap/text.txt
new file mode 100755
index 0000000..01c49e5
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/data/webmap/text.txt
@@ -0,0 +1,6 @@
+1 AATAGAAG
+2 AATAGCTT
+3 AATAGAAG
+4 AATAGCTT
+5 AATAGAAG
+6 AGAAGAAG
diff --git a/genomix/genomix-pregelix/src/test/resources/hadoop/conf/core-site.xml b/genomix/genomix-pregelix/src/test/resources/hadoop/conf/core-site.xml
new file mode 100644
index 0000000..47dfac5
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/hadoop/conf/core-site.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+<property>
+ <name>fs.default.name</name>
+ <value>hdfs://127.0.0.1:31888</value>
+</property>
+<property>
+ <name>hadoop.tmp.dir</name>
+ <value>/tmp/hadoop</value>
+</property>
+
+
+</configuration>
diff --git a/genomix/genomix-pregelix/src/test/resources/hadoop/conf/hdfs-site.xml b/genomix/genomix-pregelix/src/test/resources/hadoop/conf/hdfs-site.xml
new file mode 100644
index 0000000..a36b6d7
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/hadoop/conf/hdfs-site.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+<property>
+ <name>dfs.replication</name>
+ <value>1</value>
+</property>
+
+<property>
+ <name>dfs.block.size</name>
+ <value>655360</value>
+</property>
+
+</configuration>
diff --git a/genomix/genomix-pregelix/src/test/resources/hadoop/conf/log4j.properties b/genomix/genomix-pregelix/src/test/resources/hadoop/conf/log4j.properties
new file mode 100755
index 0000000..d5e6004
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/hadoop/conf/log4j.properties
@@ -0,0 +1,94 @@
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=FATAL,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshhold=FATAL
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# Rolling File Appender
+#
+
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+#
+# FSNamesystem Audit logging
+# All audit events are logged at INFO level
+#
+log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
diff --git a/genomix/genomix-pregelix/src/test/resources/hadoop/conf/mapred-site.xml b/genomix/genomix-pregelix/src/test/resources/hadoop/conf/mapred-site.xml
new file mode 100644
index 0000000..f75b072
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/hadoop/conf/mapred-site.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+ <property>
+ <name>mapred.job.tracker</name>
+ <value>localhost:29007</value>
+ </property>
+ <property>
+ <name>mapred.tasktracker.map.tasks.maximum</name>
+ <value>20</value>
+ </property>
+ <property>
+ <name>mapred.tasktracker.reduce.tasks.maximum</name>
+ <value>20</value>
+ </property>
+ <property>
+ <name>mapred.max.split.size</name>
+ <value>128000</value>
+ </property>
+
+</configuration>
diff --git a/genomix/genomix-pregelix/src/test/resources/hyracks-deployment.properties b/genomix/genomix-pregelix/src/test/resources/hyracks-deployment.properties
new file mode 100644
index 0000000..9c42b89
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/hyracks-deployment.properties
@@ -0,0 +1,2 @@
+#cc.bootstrap.class=edu.uci.ics.asterix.hyracks.bootstrap.CCBootstrapImpl
+nc.bootstrap.class=edu.uci.ics.pregelix.runtime.bootstrap.NCBootstrapImpl
diff --git a/genomix/genomix-pregelix/src/test/resources/ignore.txt b/genomix/genomix-pregelix/src/test/resources/ignore.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/ignore.txt
diff --git a/genomix/genomix-pregelix/src/test/resources/log4j.properties b/genomix/genomix-pregelix/src/test/resources/log4j.properties
new file mode 100755
index 0000000..d5e6004
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/log4j.properties
@@ -0,0 +1,94 @@
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=FATAL,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshhold=FATAL
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# Rolling File Appender
+#
+
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+#
+# FSNamesystem Audit logging
+# All audit events are logged at INFO level
+#
+log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
diff --git a/genomix/genomix-pregelix/src/test/resources/logging.properties b/genomix/genomix-pregelix/src/test/resources/logging.properties
new file mode 100644
index 0000000..0ed3dfc
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/logging.properties
@@ -0,0 +1,67 @@
+############################################################
+# Default Logging Configuration File
+#
+# You can use a different file by specifying a filename
+# with the java.util.logging.config.file system property.
+# For example java -Djava.util.logging.config.file=myfile
+############################################################
+
+############################################################
+# Global properties
+############################################################
+
+# "handlers" specifies a comma separated list of log Handler
+# classes. These handlers will be installed during VM startup.
+# Note that these classes must be on the system classpath.
+# By default we only configure a ConsoleHandler, which will only
+# show messages at the INFO and above levels.
+
+handlers= java.util.logging.ConsoleHandler
+
+# To also add the FileHandler, use the following line instead.
+
+# handlers= java.util.logging.FileHandler, java.util.logging.ConsoleHandler
+
+# Default global logging level.
+# This specifies which kinds of events are logged across
+# all loggers. For any given facility this global level
+# can be overriden by a facility specific level
+# Note that the ConsoleHandler also has a separate level
+# setting to limit messages printed to the console.
+
+.level= SEVERE
+# .level= INFO
+# .level= FINE
+# .level = FINEST
+
+############################################################
+# Handler specific properties.
+# Describes specific configuration info for Handlers.
+############################################################
+
+# default file output is in user's home directory.
+
+# java.util.logging.FileHandler.pattern = %h/java%u.log
+# java.util.logging.FileHandler.limit = 50000
+# java.util.logging.FileHandler.count = 1
+# java.util.logging.FileHandler.formatter = java.util.logging.XMLFormatter
+
+# Limit the message that are printed on the console to FINE and above.
+
+java.util.logging.ConsoleHandler.level = FINEST
+java.util.logging.ConsoleHandler.formatter = java.util.logging.SimpleFormatter
+
+
+############################################################
+# Facility specific properties.
+# Provides extra control for each logger.
+############################################################
+
+# For example, set the com.xyz.foo logger to only log SEVERE
+# messages:
+
+edu.uci.ics.genomix.pregelix = INFO
+#edu.uci.ics.asterix.level = FINE
+#edu.uci.ics.algebricks.level = FINE
+edu.uci.ics.hyracks.level = SEVERE
+#edu.uci.ics.hyracks.control.nc.net.level = FINE
\ No newline at end of file
diff --git a/genomix/genomix-pregelix/src/test/resources/only.txt b/genomix/genomix-pregelix/src/test/resources/only.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/only.txt
diff --git a/genomix/genomix-pregelix/src/test/resources/only_bridgeadd.txt b/genomix/genomix-pregelix/src/test/resources/only_bridgeadd.txt
new file mode 100644
index 0000000..f172e3c
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/only_bridgeadd.txt
@@ -0,0 +1 @@
+BridgeAddGraph.xml
diff --git a/genomix/genomix-pregelix/src/test/resources/only_bridgeremove.txt b/genomix/genomix-pregelix/src/test/resources/only_bridgeremove.txt
new file mode 100644
index 0000000..53537cb
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/only_bridgeremove.txt
@@ -0,0 +1 @@
+BridgeRemoveGraph.xml
diff --git a/genomix/genomix-pregelix/src/test/resources/only_bubbleadd.txt b/genomix/genomix-pregelix/src/test/resources/only_bubbleadd.txt
new file mode 100644
index 0000000..a4c7fc8
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/only_bubbleadd.txt
@@ -0,0 +1 @@
+BubbleAddGraph.xml
diff --git a/genomix/genomix-pregelix/src/test/resources/only_bubblemerge.txt b/genomix/genomix-pregelix/src/test/resources/only_bubblemerge.txt
new file mode 100644
index 0000000..417161c
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/only_bubblemerge.txt
@@ -0,0 +1 @@
+BubbleMergeGraph.xml
diff --git a/genomix/genomix-pregelix/src/test/resources/only_pathmerge.txt b/genomix/genomix-pregelix/src/test/resources/only_pathmerge.txt
new file mode 100644
index 0000000..3d007d2
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/only_pathmerge.txt
@@ -0,0 +1 @@
+P4ForMergeGraph.xml
diff --git a/genomix/genomix-pregelix/src/test/resources/only_removelowcoverage.txt b/genomix/genomix-pregelix/src/test/resources/only_removelowcoverage.txt
new file mode 100644
index 0000000..77ff8f0
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/only_removelowcoverage.txt
@@ -0,0 +1 @@
+RemoveLowCoverageGraph.xml
diff --git a/genomix/genomix-pregelix/src/test/resources/only_scaffolding.txt b/genomix/genomix-pregelix/src/test/resources/only_scaffolding.txt
new file mode 100644
index 0000000..fed8efd
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/only_scaffolding.txt
@@ -0,0 +1 @@
+ScaffoldingGraph.xml
diff --git a/genomix/genomix-pregelix/src/test/resources/only_splitrepeat.txt b/genomix/genomix-pregelix/src/test/resources/only_splitrepeat.txt
new file mode 100644
index 0000000..41cba34
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/only_splitrepeat.txt
@@ -0,0 +1 @@
+SplitRepeatGraph.xml
\ No newline at end of file
diff --git a/genomix/genomix-pregelix/src/test/resources/only_tipadd.txt b/genomix/genomix-pregelix/src/test/resources/only_tipadd.txt
new file mode 100644
index 0000000..53eecdd
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/only_tipadd.txt
@@ -0,0 +1 @@
+TipAddGraph.xml
diff --git a/genomix/genomix-pregelix/src/test/resources/only_tipremove.txt b/genomix/genomix-pregelix/src/test/resources/only_tipremove.txt
new file mode 100644
index 0000000..7392b4e
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/only_tipremove.txt
@@ -0,0 +1 @@
+TipRemoveGraph.xml
diff --git a/genomix/genomix-pregelix/src/test/resources/topology.xml b/genomix/genomix-pregelix/src/test/resources/topology.xml
new file mode 100755
index 0000000..2a6c380
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/topology.xml
@@ -0,0 +1,7 @@
+<cluster-topology>
+ <network-switch name="Global">
+ <network-switch name="local">
+ <terminal name="127.1.0.1"/>
+ </network-switch>
+ </network-switch>
+</cluster-topology>
\ No newline at end of file
diff --git a/genomix/genomix-pregelix/text/BridgePath/log_BridgePath b/genomix/genomix-pregelix/text/BridgePath/log_BridgePath
new file mode 100644
index 0000000..cab1281
--- /dev/null
+++ b/genomix/genomix-pregelix/text/BridgePath/log_BridgePath
@@ -0,0 +1,6 @@
+TTCCA T|C
+CCGTG CT|
+TCCAC T|CT
+CCACC T|G 9 CCACCCCGT 5
+TTTCC |A
+CCACT T|G 9 CCACTCCGT 5
diff --git a/genomix/genomix-pregelix/text/BridgePath/naive_BridgePath b/genomix/genomix-pregelix/text/BridgePath/naive_BridgePath
new file mode 100644
index 0000000..c669dba
--- /dev/null
+++ b/genomix/genomix-pregelix/text/BridgePath/naive_BridgePath
@@ -0,0 +1,6 @@
+TTCCA T|C 5 TTCCA 1
+CCGTG CT|
+TCCAC T|CT
+CCACC T|G 9 CCACCCCGT 1
+TTTCC |A
+CCACT T|C
diff --git a/genomix/genomix-pregelix/text/CyclePath/log_CyclePath b/genomix/genomix-pregelix/text/CyclePath/log_CyclePath
new file mode 100644
index 0000000..d0ee84f
--- /dev/null
+++ b/genomix/genomix-pregelix/text/CyclePath/log_CyclePath
@@ -0,0 +1,3 @@
+GCAAC |T
+AACTT C|T 12 AACTTCATCAAC 5
+CAACT GT|T
diff --git a/genomix/genomix-pregelix/text/CyclePath/naive_CyclePath b/genomix/genomix-pregelix/text/CyclePath/naive_CyclePath
new file mode 100644
index 0000000..5cdb7c2
--- /dev/null
+++ b/genomix/genomix-pregelix/text/CyclePath/naive_CyclePath
@@ -0,0 +1,3 @@
+GCAAC |T
+AACTT C|T 12 AACTTCATCAAC 1
+CAACT GT|T
diff --git a/genomix/genomix-pregelix/text/LongPath/log_LongPath b/genomix/genomix-pregelix/text/LongPath/log_LongPath
new file mode 100644
index 0000000..98cb21e
--- /dev/null
+++ b/genomix/genomix-pregelix/text/LongPath/log_LongPath
@@ -0,0 +1,3 @@
+GCCTC G|G 15 GCCTCAGTACGCCCG 5
+CCCGG G|
+GGCCT |C
diff --git a/genomix/genomix-pregelix/text/LongPath/naive_LongPath b/genomix/genomix-pregelix/text/LongPath/naive_LongPath
new file mode 100644
index 0000000..c1a472e
--- /dev/null
+++ b/genomix/genomix-pregelix/text/LongPath/naive_LongPath
@@ -0,0 +1,3 @@
+GCCTC G|G 15 GCCTCAGTACGCCCG 1
+CCCGG G|
+GGCCT |C
diff --git a/genomix/genomix-pregelix/text/Path/log_Path b/genomix/genomix-pregelix/text/Path/log_Path
new file mode 100644
index 0000000..3a46528
--- /dev/null
+++ b/genomix/genomix-pregelix/text/Path/log_Path
@@ -0,0 +1,3 @@
+GCCTC G|G 10 GCCTCAGTAC 5
+GGCCT |C
+GTACG A|
diff --git a/genomix/genomix-pregelix/text/Path/naive_Path b/genomix/genomix-pregelix/text/Path/naive_Path
new file mode 100644
index 0000000..b8d2aeb
--- /dev/null
+++ b/genomix/genomix-pregelix/text/Path/naive_Path
@@ -0,0 +1,3 @@
+GCCTC G|G 10 GCCTCAGTAC 1
+GGCCT |C
+GTACG A|
diff --git a/genomix/genomix-pregelix/text/SimplePath/log_SimplePath b/genomix/genomix-pregelix/text/SimplePath/log_SimplePath
new file mode 100644
index 0000000..5c149ac
--- /dev/null
+++ b/genomix/genomix-pregelix/text/SimplePath/log_SimplePath
@@ -0,0 +1,9 @@
+CGGCA G|A 8 CGGCAAGA 5
+AGCAC C|
+AAGAC |A
+GCGGC |A
+GCATC C|
+ATATC |G
+TATCG A|C 8 TATCGCAT 5
+AAGAA C|
+AGACA A|C 8 AGACAGCA 5
diff --git a/genomix/genomix-pregelix/text/SimplePath/naive_SimplePath b/genomix/genomix-pregelix/text/SimplePath/naive_SimplePath
new file mode 100644
index 0000000..cf53cc8
--- /dev/null
+++ b/genomix/genomix-pregelix/text/SimplePath/naive_SimplePath
@@ -0,0 +1,9 @@
+CGGCA G|A 8 CGGCAAGA 1
+AGCAC C|
+AAGAC |A
+GCGGC |A
+GCATC C|
+ATATC |G
+TATCG A|C 8 TATCGCAT 1
+AAGAA C|
+AGACA A|C 8 AGACAGCA 1
diff --git a/genomix/genomix-pregelix/text/SinglePath/log_SinglePath b/genomix/genomix-pregelix/text/SinglePath/log_SinglePath
new file mode 100644
index 0000000..f1371ec
--- /dev/null
+++ b/genomix/genomix-pregelix/text/SinglePath/log_SinglePath
@@ -0,0 +1,3 @@
+ACAGT A|
+GACAA A|T 8 GACAACAG 5
+AGACA |A
diff --git a/genomix/genomix-pregelix/text/SinglePath/naive_SinglePath b/genomix/genomix-pregelix/text/SinglePath/naive_SinglePath
new file mode 100644
index 0000000..b736667
--- /dev/null
+++ b/genomix/genomix-pregelix/text/SinglePath/naive_SinglePath
@@ -0,0 +1,3 @@
+ACAGT A|
+GACAA A|T 8 GACAACAG 1
+AGACA |A
diff --git a/genomix/genomix-pregelix/text/TreePath/log_TreePath b/genomix/genomix-pregelix/text/TreePath/log_TreePath
new file mode 100644
index 0000000..0b9f198
--- /dev/null
+++ b/genomix/genomix-pregelix/text/TreePath/log_TreePath
@@ -0,0 +1,9 @@
+CAGTA T|AC
+AGTAC C|G 10 AGTACGCCCG 5
+ATCCC T|
+GCCTC G|A 8 GCCTCAGT 5
+CCCGG G|
+GGCCT |CG
+AGTAA C|C 10 AGTAACTAAA 5
+TAAAC C|
+GCCTG G|C 12 GCCTGGCTATCC 5
diff --git a/genomix/genomix-pregelix/text/TreePath/naive_TreePath b/genomix/genomix-pregelix/text/TreePath/naive_TreePath
new file mode 100644
index 0000000..39dcbaa
--- /dev/null
+++ b/genomix/genomix-pregelix/text/TreePath/naive_TreePath
@@ -0,0 +1,9 @@
+CAGTA T|AC
+AGTAC C|G 10 AGTACGCCCG 1
+ATCCC T|
+GCCTC G|A 8 GCCTCAGT 1
+CCCGG G|
+GGCCT |CG
+AGTAA C|C 10 AGTAACTAAA 1
+TAAAC C|
+GCCTG G|C 12 GCCTGGCTATCC 1
diff --git a/genomix/pom.xml b/genomix/pom.xml
new file mode 100644
index 0000000..7876c8e
--- /dev/null
+++ b/genomix/pom.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>genomix</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <packaging>pom</packaging>
+ <name>genomix</name>
+
+ <distributionManagement>
+ <repository>
+ <id>hyracks-releases</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-releases/</url>
+ </repository>
+ <snapshotRepository>
+ <id>hyracks-snapshots</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-snapshots/</url>
+ </snapshotRepository>
+ </distributionManagement>
+
+ <repositories>
+ <repository>
+ <id>hyracks-public</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/groups/hyracks-public/</url>
+ </repository>
+ <repository>
+ <id>jboss-public</id>
+ <url>https://repository.jboss.org/nexus/content/groups/public/</url>
+ </repository>
+ </repositories>
+
+ <pluginRepositories>
+ <pluginRepository>
+ <id>hyracks-public</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/groups/hyracks-public/</url>
+ <releases>
+ <updatePolicy>always</updatePolicy>
+ </releases>
+ </pluginRepository>
+ </pluginRepositories>
+
+ <modules>
+ <module>genomix-data</module>
+ <module>genomix-hyracks</module>
+ <module>genomix-hadoop</module>
+ <module>genomix-pregelix</module>
+ </modules>
+</project>
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/partitions/MaterializingPipelinedPartition.java b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/partitions/MaterializingPipelinedPartition.java
index 15db1fe..7c16be0 100644
--- a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/partitions/MaterializingPipelinedPartition.java
+++ b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/partitions/MaterializingPipelinedPartition.java
@@ -139,7 +139,7 @@
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("open(" + pid + " by " + taId);
}
- fRef = manager.getFileFactory().createUnmanagedWorkspaceFile(pid.toString());
+ fRef = manager.getFileFactory().createUnmanagedWorkspaceFile(pid.toString().replace(':', '_'));
handle = ctx.getIOManager().open(fRef, IIOManager.FileReadWriteMode.READ_WRITE,
IIOManager.FileSyncMode.METADATA_ASYNC_DATA_ASYNC);
size = 0;
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/GroupRunMergingFrameReader.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/GroupRunMergingFrameReader.java
new file mode 100644
index 0000000..d63609e
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/GroupRunMergingFrameReader.java
@@ -0,0 +1,377 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hashsort;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
+import edu.uci.ics.hyracks.api.comm.IFrameReader;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
+
+public class GroupRunMergingFrameReader implements IFrameReader {
+
+ private static final int INT_SIZE = 4;
+
+ private final IHyracksTaskContext ctx;
+ private final IFrameReader[] runCursors;
+ private final List<ByteBuffer> inFrames;
+ private final int[] keyFields;
+ private final int framesLimit;
+ private final int tableSize;
+ private final IBinaryComparator[] comparators;
+ private final RecordDescriptor recordDesc;
+ private final FrameTupleAppender outFrameAppender;
+ private final ITuplePartitionComputer tpc;
+ private ReferencedPriorityQueue topTuples;
+ private int[] tupleIndexes;
+ private int[] currentFrameIndexForRuns, bufferedFramesForRuns;
+ private FrameTupleAccessor[] tupleAccessors;
+ private int framesBuffered;
+
+ private final IAggregatorDescriptor grouper;
+ private final AggregateState groupState;
+
+ private final boolean isLoadBuffered;
+
+ private final boolean isFinalPhase;
+
+ private final ArrayTupleBuilder groupTupleBuilder, outputTupleBuilder;
+
+ private byte[] groupResultCache;
+ private ByteBuffer groupResultCacheBuffer;
+ private IFrameTupleAccessor groupResultCacheAccessor;
+ private FrameTupleAppender groupResultCacheAppender;
+
+ // FIXME
+ long queueCompCounter = 0, mergeCompCounter = 0;
+
+ public GroupRunMergingFrameReader(IHyracksTaskContext ctx, IFrameReader[] runCursors, int framesLimit,
+ int tableSize, int[] keyFields, ITuplePartitionComputer tpc, IBinaryComparator[] comparators,
+ IAggregatorDescriptor grouper, RecordDescriptor recordDesc, boolean isFinalPhase) {
+ this(ctx, runCursors, framesLimit, tableSize, keyFields, tpc, comparators, grouper, recordDesc, isFinalPhase,
+ false);
+ }
+
+ public GroupRunMergingFrameReader(IHyracksTaskContext ctx, IFrameReader[] runCursors, int framesLimit,
+ int tableSize, int[] keyFields, ITuplePartitionComputer tpc, IBinaryComparator[] comparators,
+ IAggregatorDescriptor grouper, RecordDescriptor recordDesc, boolean isFinalPhase, boolean isLoadBuffered) {
+ this.ctx = ctx;
+ this.runCursors = runCursors;
+ this.inFrames = new ArrayList<ByteBuffer>();
+ this.keyFields = keyFields;
+ this.tableSize = tableSize;
+ this.comparators = comparators;
+ this.recordDesc = recordDesc;
+ this.grouper = grouper;
+ this.groupState = grouper.createAggregateStates();
+ this.outFrameAppender = new FrameTupleAppender(ctx.getFrameSize());
+ this.isLoadBuffered = isLoadBuffered;
+ this.isFinalPhase = isFinalPhase;
+ this.framesLimit = framesLimit;
+ this.tpc = tpc;
+
+ this.groupTupleBuilder = new ArrayTupleBuilder(recordDesc.getFieldCount());
+ this.outputTupleBuilder = new ArrayTupleBuilder(recordDesc.getFieldCount());
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see edu.uci.ics.hyracks.api.comm.IFrameReader#open()
+ */
+ @Override
+ public void open() throws HyracksDataException {
+ if (isLoadBuffered) {
+ while (inFrames.size() + 1 < framesLimit) {
+ inFrames.add(ctx.allocateFrame());
+ }
+ framesBuffered = inFrames.size() / runCursors.length;
+ } else {
+ while (inFrames.size() < framesLimit - 1 && inFrames.size() < runCursors.length) {
+ inFrames.add(ctx.allocateFrame());
+ }
+ framesBuffered = 1;
+ }
+ tupleAccessors = new FrameTupleAccessor[runCursors.length];
+ currentFrameIndexForRuns = new int[runCursors.length];
+ bufferedFramesForRuns = new int[runCursors.length];
+ Comparator<ReferenceEntryWithBucketID> comparator = createEntryComparator(comparators);
+ topTuples = new ReferencedPriorityQueue(ctx.getFrameSize(), recordDesc, runCursors.length, comparator);
+ tupleIndexes = new int[runCursors.length];
+
+ for (int i = 0; i < runCursors.length; i++) {
+ int runIndex = topTuples.peek().getRunid();
+ tupleIndexes[runIndex] = 0;
+ runCursors[runIndex].open();
+ for (int j = 0; j < framesBuffered; j++) {
+
+ if (runCursors[runIndex].nextFrame(inFrames.get(runIndex * framesBuffered + j))) {
+
+ bufferedFramesForRuns[runIndex]++;
+ if (j == 0) {
+ tupleAccessors[runIndex] = new FrameTupleAccessor(ctx.getFrameSize(), recordDesc);
+ tupleAccessors[runIndex].reset(inFrames.get(runIndex * framesBuffered + j));
+ setNextTopTuple(runIndex, tupleIndexes, runCursors, tupleAccessors, topTuples);
+ currentFrameIndexForRuns[runIndex] = runIndex * framesBuffered;
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see edu.uci.ics.hyracks.api.comm.IFrameReader#nextFrame(java.nio.ByteBuffer)
+ */
+ @Override
+ public boolean nextFrame(ByteBuffer buffer) throws HyracksDataException {
+ outFrameAppender.reset(buffer, true);
+
+ while (!topTuples.areRunsExhausted()) {
+ ReferenceEntryWithBucketID top = topTuples.peek();
+ int runIndex = top.getRunid();
+ FrameTupleAccessor fta = top.getAccessor();
+ int tupleIndex = top.getTupleIndex();
+
+ // check whether we can do aggregation
+ boolean needInsert = true;
+ if (groupResultCache != null && groupResultCacheAccessor.getTupleCount() > 0) {
+ groupResultCacheAccessor.reset(ByteBuffer.wrap(groupResultCache));
+ if (compareFrameTuples(fta, tupleIndex, groupResultCacheAccessor, 0) == 0) {
+ needInsert = false;
+ }
+ }
+
+ if (needInsert) {
+
+ // try to flush the group cache into the output buffer, if any
+ if (groupResultCacheAccessor != null && groupResultCacheAccessor.getFieldCount() > 0) {
+ outputTupleBuilder.reset();
+ for (int k = 0; k < keyFields.length; k++) {
+ outputTupleBuilder.addField(groupResultCacheAccessor, 0, k);
+ }
+ if (isFinalPhase) {
+ grouper.outputFinalResult(outputTupleBuilder, groupResultCacheAccessor, 0, groupState);
+ } else {
+ grouper.outputPartialResult(outputTupleBuilder, groupResultCacheAccessor, 0, groupState);
+ }
+
+ // return if the buffer is full
+ if (!outFrameAppender.append(outputTupleBuilder.getFieldEndOffsets(),
+ outputTupleBuilder.getByteArray(), 0, outputTupleBuilder.getSize())) {
+ return true;
+ }
+ groupResultCacheBuffer.putInt(groupResultCache.length - 4, 0);
+ }
+
+ groupTupleBuilder.reset();
+ for (int k : keyFields) {
+ groupTupleBuilder.addField(fta, tupleIndex, k);
+ }
+ grouper.init(groupTupleBuilder, fta, tupleIndex, groupState);
+
+ // enlarge the cache buffer if necessary
+ int requiredSize = groupTupleBuilder.getSize() + groupTupleBuilder.getFieldEndOffsets().length
+ * INT_SIZE + 2 * INT_SIZE;
+
+ if (groupResultCache == null || groupResultCache.length < requiredSize) {
+ groupResultCache = new byte[requiredSize];
+ groupResultCacheAppender = new FrameTupleAppender(groupResultCache.length);
+ groupResultCacheBuffer = ByteBuffer.wrap(groupResultCache);
+ groupResultCacheAccessor = new FrameTupleAccessor(groupResultCache.length, recordDesc);
+ }
+
+ // always reset the group cache
+ groupResultCacheAppender.reset(groupResultCacheBuffer, true);
+ if (!groupResultCacheAppender.append(groupTupleBuilder.getFieldEndOffsets(),
+ groupTupleBuilder.getByteArray(), 0, groupTupleBuilder.getSize())) {
+ throw new HyracksDataException("The partial result is too large to be initialized in a frame.");
+ }
+
+ groupResultCacheAccessor.reset(groupResultCacheBuffer);
+
+ } else {
+ grouper.aggregate(fta, tupleIndex, groupResultCacheAccessor, 0, groupState);
+ }
+
+ ++tupleIndexes[runIndex];
+ setNextTopTuple(runIndex, tupleIndexes, runCursors, tupleAccessors, topTuples);
+ }
+
+ if (groupResultCacheAccessor != null && groupResultCacheAccessor.getTupleCount() > 0) {
+ outputTupleBuilder.reset();
+ for (int k = 0; k < keyFields.length; k++) {
+ outputTupleBuilder.addField(groupResultCacheAccessor, 0, k);
+ }
+ if (isFinalPhase) {
+ grouper.outputFinalResult(outputTupleBuilder, groupResultCacheAccessor, 0, groupState);
+ } else {
+ grouper.outputPartialResult(outputTupleBuilder, groupResultCacheAccessor, 0, groupState);
+ }
+
+ // return if the buffer is full
+ if (!outFrameAppender.append(outputTupleBuilder.getFieldEndOffsets(), outputTupleBuilder.getByteArray(), 0,
+ outputTupleBuilder.getSize())) {
+ return true;
+ }
+
+ groupResultCacheAccessor = null;
+ groupResultCache = null;
+ groupResultCacheBuffer = null;
+ groupResultCacheAppender = null;
+ }
+
+ if (outFrameAppender.getTupleCount() > 0) {
+ return true;
+ }
+
+ return false;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see edu.uci.ics.hyracks.api.comm.IFrameReader#close()
+ */
+ @Override
+ public void close() throws HyracksDataException {
+ for (int i = 0; i < runCursors.length; ++i) {
+ closeRun(i, runCursors, tupleAccessors);
+ }
+ }
+
+ private void setNextTopTuple(int runIndex, int[] tupleIndexes, IFrameReader[] runCursors,
+ FrameTupleAccessor[] tupleAccessors, ReferencedPriorityQueue topTuples) throws HyracksDataException {
+ boolean exists = hasNextTuple(runIndex, tupleIndexes, runCursors, tupleAccessors);
+ if (exists) {
+ int h = tpc.partition(tupleAccessors[runIndex], tupleIndexes[runIndex], tableSize);
+ topTuples.popAndReplace(tupleAccessors[runIndex], tupleIndexes[runIndex], h);
+ } else {
+ topTuples.pop();
+ closeRun(runIndex, runCursors, tupleAccessors);
+ }
+ }
+
+ private boolean hasNextTuple(int runIndex, int[] tupleIndexes, IFrameReader[] runCursors,
+ FrameTupleAccessor[] tupleAccessors) throws HyracksDataException {
+ if (tupleAccessors[runIndex] == null || runCursors[runIndex] == null) {
+ return false;
+ } else if (tupleIndexes[runIndex] >= tupleAccessors[runIndex].getTupleCount()) {
+ if (currentFrameIndexForRuns[runIndex] - runIndex * framesBuffered < bufferedFramesForRuns[runIndex] - 1) {
+ currentFrameIndexForRuns[runIndex]++;
+ } else {
+ bufferedFramesForRuns[runIndex] = 0;
+ for (int j = 0; j < framesBuffered; j++) {
+ if (runCursors[runIndex].nextFrame(inFrames.get(runIndex * framesBuffered + j))) {
+ bufferedFramesForRuns[runIndex]++;
+ } else {
+ break;
+ }
+ }
+ currentFrameIndexForRuns[runIndex] = runIndex * framesBuffered;
+ }
+ if (bufferedFramesForRuns[runIndex] > 0) {
+ tupleAccessors[runIndex].reset(inFrames.get(currentFrameIndexForRuns[runIndex]));
+ tupleIndexes[runIndex] = 0;
+ return true;
+ } else {
+ return false;
+ }
+ } else {
+ return true;
+ }
+ }
+
+ private void closeRun(int index, IFrameReader[] runCursors, IFrameTupleAccessor[] tupleAccessors)
+ throws HyracksDataException {
+ if (runCursors[index] != null) {
+ runCursors[index].close();
+ runCursors[index] = null;
+ tupleAccessors[index] = null;
+ }
+ }
+
+ private int compareFrameTuples(IFrameTupleAccessor fta1, int j1, IFrameTupleAccessor fta2, int j2) {
+ mergeCompCounter++;
+ byte[] b1 = fta1.getBuffer().array();
+ byte[] b2 = fta2.getBuffer().array();
+ for (int f = 0; f < keyFields.length; ++f) {
+ int fIdx = f;
+ int s1 = fta1.getTupleStartOffset(j1) + fta1.getFieldSlotsLength() + fta1.getFieldStartOffset(j1, fIdx);
+ int l1 = fta1.getFieldLength(j1, fIdx);
+ int s2 = fta2.getTupleStartOffset(j2) + fta2.getFieldSlotsLength() + fta2.getFieldStartOffset(j2, fIdx);
+ int l2_start = fta2.getFieldStartOffset(j2, fIdx);
+ int l2_end = fta2.getFieldEndOffset(j2, fIdx);
+ int l2 = l2_end - l2_start;
+ int c = comparators[f].compare(b1, s1, l1, b2, s2, l2);
+ if (c != 0) {
+ return c;
+ }
+ }
+ return 0;
+ }
+
+ private Comparator<ReferenceEntryWithBucketID> createEntryComparator(final IBinaryComparator[] comparators) {
+ return new Comparator<ReferenceEntryWithBucketID>() {
+ public int compare(ReferenceEntryWithBucketID tp1, ReferenceEntryWithBucketID tp2) {
+
+ queueCompCounter++;
+
+ int cmp = tp1.getBucketID() - tp2.getBucketID();
+
+ if (cmp != 0) {
+ return cmp;
+ }
+
+ FrameTupleAccessor fta1 = (FrameTupleAccessor) tp1.getAccessor();
+ FrameTupleAccessor fta2 = (FrameTupleAccessor) tp2.getAccessor();
+ int j1 = tp1.getTupleIndex();
+ int j2 = tp2.getTupleIndex();
+ byte[] b1 = fta1.getBuffer().array();
+ byte[] b2 = fta2.getBuffer().array();
+ for (int f = 0; f < keyFields.length; ++f) {
+ int fIdx = keyFields[f];
+ int s1 = fta1.getTupleStartOffset(j1) + fta1.getFieldSlotsLength()
+ + fta1.getFieldStartOffset(j1, fIdx);
+ int l1 = fta1.getFieldEndOffset(j1, fIdx) - fta1.getFieldStartOffset(j1, fIdx);
+ int s2 = fta2.getTupleStartOffset(j2) + fta2.getFieldSlotsLength()
+ + fta2.getFieldStartOffset(j2, fIdx);
+ int l2 = fta2.getFieldEndOffset(j2, fIdx) - fta2.getFieldStartOffset(j2, fIdx);
+ int c = comparators[f].compare(b1, s1, l1, b2, s2, l2);
+ if (c != 0) {
+ return c;
+ }
+ }
+
+ return cmp;
+ }
+ };
+ }
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortGroupHashTable.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortGroupHashTable.java
new file mode 100644
index 0000000..6e85cff
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortGroupHashTable.java
@@ -0,0 +1,686 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hashsort;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.LinkedList;
+
+import edu.uci.ics.hyracks.api.comm.FrameHelper;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
+import edu.uci.ics.hyracks.dataflow.common.io.RunFileReader;
+import edu.uci.ics.hyracks.dataflow.common.io.RunFileWriter;
+import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.group.hybridhash.FrameTupleAccessorForGroupHashtable;
+import edu.uci.ics.hyracks.dataflow.std.group.hybridhash.FrameTupleAppenderForGroupHashtable;
+import edu.uci.ics.hyracks.dataflow.std.structures.TuplePointer;
+
+public class HybridHashSortGroupHashTable {
+
+ protected static final int INT_SIZE = 4;
+ protected static final int INIT_REF_COUNT = 8;
+ protected static final int PTR_SIZE = 3;
+
+ protected final int tableSize, framesLimit, frameSize;
+
+ protected final ByteBuffer[] headers;
+ protected final ByteBuffer[] contents;
+
+ protected final IHyracksTaskContext ctx;
+
+ protected int currentLargestFrameIndex;
+ protected int totalTupleCount;
+
+ protected final IAggregatorDescriptor aggregator;
+ protected final AggregateState aggState;
+
+ protected final int[] keys, internalKeys;
+
+ private final IBinaryComparator[] comparators;
+
+ protected final ITuplePartitionComputer tpc;
+
+ protected final INormalizedKeyComputer firstNormalizer;
+
+ private ByteBuffer outputBuffer;
+
+ private LinkedList<RunFileReader> runReaders;
+
+ protected TuplePointer matchPointer;
+
+ protected final FrameTupleAccessorForGroupHashtable hashtableRecordAccessor;
+
+ private final FrameTupleAccessorForGroupHashtable compFrameAccessor1, compFrameAccessor2;
+
+ protected final FrameTupleAppenderForGroupHashtable internalAppender;
+
+ private final FrameTupleAppender outputAppender;
+
+ /**
+ * Tuple builder for hash table insertion
+ */
+ protected final ArrayTupleBuilder internalTupleBuilder, outputTupleBuilder;
+
+ /**
+ * pointers for sort records in an entry
+ */
+ protected int[] tPointers;
+
+ protected int usedEntries = 0;
+
+ protected long hashedKeys = 0, hashedRawRec = 0;
+
+ public HybridHashSortGroupHashTable(IHyracksTaskContext ctx, int frameLimits, int tableSize, int[] keys,
+ IBinaryComparator[] comparators, ITuplePartitionComputer tpc,
+ INormalizedKeyComputer firstNormalizerComputer, IAggregatorDescriptor aggregator,
+ RecordDescriptor inRecDesc, RecordDescriptor outRecDesc) {
+ this.ctx = ctx;
+ this.tableSize = tableSize;
+ this.framesLimit = frameLimits;
+ this.frameSize = ctx.getFrameSize();
+
+ this.keys = keys;
+ this.internalKeys = new int[keys.length];
+ for (int i = 0; i < internalKeys.length; i++) {
+ internalKeys[i] = i;
+ }
+
+ this.aggregator = aggregator;
+ this.aggState = aggregator.createAggregateStates();
+
+ this.tpc = tpc;
+ this.comparators = comparators;
+ this.firstNormalizer = firstNormalizerComputer;
+
+ // initialize the hash table
+ int residual = ((tableSize % frameSize) * INT_SIZE * 2) % frameSize == 0 ? 0 : 1;
+ this.headers = new ByteBuffer[tableSize / frameSize * INT_SIZE * 2 + tableSize % frameSize * 2 * INT_SIZE
+ / frameSize + residual];
+
+ this.outputBuffer = ctx.allocateFrame();
+
+ this.contents = new ByteBuffer[framesLimit - 1 - headers.length];
+ this.currentLargestFrameIndex = -1;
+ this.totalTupleCount = 0;
+
+ this.runReaders = new LinkedList<RunFileReader>();
+ this.hashtableRecordAccessor = new FrameTupleAccessorForGroupHashtable(frameSize, outRecDesc);
+ this.compFrameAccessor1 = new FrameTupleAccessorForGroupHashtable(frameSize, outRecDesc);
+ this.compFrameAccessor2 = new FrameTupleAccessorForGroupHashtable(frameSize, outRecDesc);
+
+ this.internalTupleBuilder = new ArrayTupleBuilder(outRecDesc.getFieldCount());
+ this.outputTupleBuilder = new ArrayTupleBuilder(outRecDesc.getFieldCount());
+ this.internalAppender = new FrameTupleAppenderForGroupHashtable(frameSize);
+ this.outputAppender = new FrameTupleAppender(frameSize);
+
+ this.matchPointer = new TuplePointer();
+
+ }
+
+ /**
+ * Reset the header page
+ *
+ * @param headerFrameIndex
+ */
+ protected void resetHeader(int headerFrameIndex) {
+ for (int i = 0; i < frameSize; i += INT_SIZE) {
+ headers[headerFrameIndex].putInt(i, -1);
+ }
+ }
+
+ /**
+ * Get the header frame index of the given hash table entry
+ *
+ * @param entry
+ * @return
+ */
+ protected int getHeaderFrameIndex(int entry) {
+ int frameIndex = entry / frameSize * 2 * INT_SIZE + entry % frameSize * 2 * INT_SIZE / frameSize;
+ return frameIndex;
+ }
+
+ /**
+ * Get the tuple index of the given hash table entry
+ *
+ * @param entry
+ * @return
+ */
+ protected int getHeaderTupleIndex(int entry) {
+ int offset = entry % frameSize * 2 * INT_SIZE % frameSize;
+ return offset;
+ }
+
+ public void insert(FrameTupleAccessor accessor, int tupleIndex) throws HyracksDataException {
+
+ int entry = tpc.partition(accessor, tupleIndex, tableSize);
+
+ hashedRawRec++;
+
+ if (findMatch(entry, accessor, tupleIndex)) {
+ // find match; do aggregation
+ hashtableRecordAccessor.reset(contents[matchPointer.frameIndex]);
+ aggregator.aggregate(accessor, tupleIndex, hashtableRecordAccessor, matchPointer.tupleIndex, aggState);
+ } else {
+
+ internalTupleBuilder.reset();
+ for (int k = 0; k < keys.length; k++) {
+ internalTupleBuilder.addField(accessor, tupleIndex, keys[k]);
+ }
+ aggregator.init(internalTupleBuilder, accessor, tupleIndex, aggState);
+ int insertFrameIndex = -1, insertTupleIndex = -1;
+ boolean inserted = false;
+
+ if (currentLargestFrameIndex < 0) {
+ currentLargestFrameIndex = 0;
+ }
+
+ if (contents[currentLargestFrameIndex] == null) {
+ contents[currentLargestFrameIndex] = ctx.allocateFrame();
+ }
+
+ internalAppender.reset(contents[currentLargestFrameIndex], false);
+ if (internalAppender.append(internalTupleBuilder.getFieldEndOffsets(), internalTupleBuilder.getByteArray(),
+ 0, internalTupleBuilder.getSize())) {
+ inserted = true;
+ insertFrameIndex = currentLargestFrameIndex;
+ insertTupleIndex = internalAppender.getTupleCount() - 1;
+ }
+
+ if (!inserted && currentLargestFrameIndex < contents.length - 1) {
+ currentLargestFrameIndex++;
+ if (contents[currentLargestFrameIndex] == null) {
+ contents[currentLargestFrameIndex] = ctx.allocateFrame();
+ }
+ internalAppender.reset(contents[currentLargestFrameIndex], true);
+ if (!internalAppender.append(internalTupleBuilder.getFieldEndOffsets(),
+ internalTupleBuilder.getByteArray(), 0, internalTupleBuilder.getSize())) {
+ throw new HyracksDataException("Failed to insert an aggregation value.");
+ } else {
+ insertFrameIndex = currentLargestFrameIndex;
+ insertTupleIndex = internalAppender.getTupleCount() - 1;
+ inserted = true;
+ }
+ }
+
+ // memory is full
+ if (!inserted) {
+ // flush hash table and try to insert again
+ flush();
+
+ // update the match point to the header reference
+ matchPointer.frameIndex = -1;
+ matchPointer.tupleIndex = -1;
+ // re-insert
+ currentLargestFrameIndex++;
+ if (contents[currentLargestFrameIndex] == null) {
+ contents[currentLargestFrameIndex] = ctx.allocateFrame();
+ }
+ internalAppender.reset(contents[currentLargestFrameIndex], true);
+ if (!internalAppender.append(internalTupleBuilder.getFieldEndOffsets(),
+ internalTupleBuilder.getByteArray(), 0, internalTupleBuilder.getSize())) {
+ throw new HyracksDataException("Failed to insert an aggregation value.");
+ } else {
+ insertFrameIndex = currentLargestFrameIndex;
+ insertTupleIndex = internalAppender.getTupleCount() - 1;
+ }
+ }
+
+ // no match; new insertion
+ if (matchPointer.frameIndex < 0) {
+ // first record for this entry; update the header references
+ int headerFrameIndex = getHeaderFrameIndex(entry);
+ int headerFrameOffset = getHeaderTupleIndex(entry);
+ if (headers[headerFrameIndex] == null) {
+ headers[headerFrameIndex] = ctx.allocateFrame();
+ resetHeader(headerFrameIndex);
+ }
+ headers[headerFrameIndex].putInt(headerFrameOffset, insertFrameIndex);
+ headers[headerFrameIndex].putInt(headerFrameOffset + INT_SIZE, insertTupleIndex);
+ usedEntries++;
+
+ } else {
+ // update the previous reference
+ hashtableRecordAccessor.reset(contents[matchPointer.frameIndex]);
+ int refOffset = hashtableRecordAccessor.getTupleHashReferenceOffset(matchPointer.tupleIndex);
+ contents[matchPointer.frameIndex].putInt(refOffset, insertFrameIndex);
+ contents[matchPointer.frameIndex].putInt(refOffset + INT_SIZE, insertTupleIndex);
+ }
+ hashedKeys++;
+ totalTupleCount++;
+ }
+ }
+
+ /**
+ * Flush the hash table directly to the output
+ */
+ public void flushHashtableToOutput(IFrameWriter outputWriter) throws HyracksDataException {
+
+ outputAppender.reset(outputBuffer, true);
+ for (int i = 0; i < contents.length; i++) {
+ if (contents[i] == null) {
+ continue;
+ }
+ hashtableRecordAccessor.reset(contents[i]);
+ int tupleCount = hashtableRecordAccessor.getTupleCount();
+ for (int j = 0; j < tupleCount; j++) {
+ outputTupleBuilder.reset();
+
+ int tupleOffset = hashtableRecordAccessor.getTupleStartOffset(j);
+ int fieldOffset = hashtableRecordAccessor.getFieldCount() * INT_SIZE;
+
+ for (int k = 0; k < internalKeys.length; k++) {
+ outputTupleBuilder.addField(hashtableRecordAccessor.getBuffer().array(), tupleOffset + fieldOffset
+ + hashtableRecordAccessor.getFieldStartOffset(j, k),
+ hashtableRecordAccessor.getFieldLength(j, k));
+ }
+
+ aggregator.outputFinalResult(outputTupleBuilder, hashtableRecordAccessor, j, aggState);
+
+ if (!outputAppender.append(outputTupleBuilder.getFieldEndOffsets(), outputTupleBuilder.getByteArray(),
+ 0, outputTupleBuilder.getSize())) {
+
+ FrameUtils.flushFrame(outputBuffer, outputWriter);
+
+ outputAppender.reset(outputBuffer, true);
+ if (!outputAppender.append(outputTupleBuilder.getFieldEndOffsets(),
+ outputTupleBuilder.getByteArray(), 0, outputTupleBuilder.getSize())) {
+ throw new HyracksDataException("Failed to flush the hash table to the final output");
+ }
+ }
+ }
+ }
+
+ if (outputAppender.getTupleCount() > 0) {
+
+ FrameUtils.flushFrame(outputBuffer, outputWriter);
+
+ outputAppender.reset(outputBuffer, true);
+ }
+
+ totalTupleCount = 0;
+ usedEntries = 0;
+ }
+
+ /**
+ * Flush hash table into a run file.
+ *
+ * @throws HyracksDataException
+ */
+ protected void flush() throws HyracksDataException {
+
+ long methodTimer = System.nanoTime();
+
+ FileReference runFile;
+ try {
+ runFile = ctx.getJobletContext().createManagedWorkspaceFile(
+ HybridHashSortGroupHashTable.class.getSimpleName());
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ RunFileWriter runWriter = new RunFileWriter(runFile, ctx.getIOManager());
+ runWriter.open();
+ flushEntries(runWriter);
+ runWriter.close();
+ runReaders.add(runWriter.createReader());
+ reset();
+
+ ctx.getCounterContext()
+ .getCounter("optional." + HybridHashSortGroupHashTable.class.getSimpleName() + ".flush.time", true)
+ .update(System.nanoTime() - methodTimer);
+ }
+
+ private void flushEntries(IFrameWriter writer) throws HyracksDataException {
+
+ outputAppender.reset(outputBuffer, true);
+ for (int i = 0; i < tableSize; i++) {
+ int tupleInEntry = sortEntry(i);
+
+ for (int ptr = 0; ptr < tupleInEntry; ptr++) {
+ int frameIndex = tPointers[ptr * PTR_SIZE];
+ int tupleIndex = tPointers[ptr * PTR_SIZE + 1];
+
+ hashtableRecordAccessor.reset(contents[frameIndex]);
+ outputTupleBuilder.reset();
+
+ int tupleOffset = hashtableRecordAccessor.getTupleStartOffset(tupleIndex);
+ int fieldOffset = hashtableRecordAccessor.getFieldCount() * INT_SIZE;
+
+ for (int k = 0; k < internalKeys.length; k++) {
+ outputTupleBuilder.addField(hashtableRecordAccessor.getBuffer().array(), tupleOffset + fieldOffset
+ + hashtableRecordAccessor.getFieldStartOffset(tupleIndex, k),
+ hashtableRecordAccessor.getFieldLength(tupleIndex, k));
+ }
+
+ aggregator.outputPartialResult(outputTupleBuilder, hashtableRecordAccessor, tupleIndex, aggState);
+
+ if (!outputAppender.append(outputTupleBuilder.getFieldEndOffsets(), outputTupleBuilder.getByteArray(),
+ 0, outputTupleBuilder.getSize())) {
+
+ FrameUtils.flushFrame(outputBuffer, writer);
+
+ outputAppender.reset(outputBuffer, true);
+ if (!outputAppender.append(outputTupleBuilder.getFieldEndOffsets(),
+ outputTupleBuilder.getByteArray(), 0, outputTupleBuilder.getSize())) {
+ throw new HyracksDataException("Failed to flush an aggregation result.");
+ }
+ }
+ totalTupleCount--;
+ }
+
+ if (tupleInEntry > 0) {
+ usedEntries--;
+ }
+ }
+
+ if (outputAppender.getTupleCount() > 0) {
+
+ FrameUtils.flushFrame(outputBuffer, writer);
+
+ outputAppender.reset(outputBuffer, true);
+ }
+ }
+
+ protected int sortEntry(int entryID) {
+
+ if (tPointers == null)
+ tPointers = new int[INIT_REF_COUNT * PTR_SIZE];
+ int ptr = 0;
+
+ int headerFrameIndex = entryID / frameSize * 2 * INT_SIZE + (entryID % frameSize) * 2 * INT_SIZE / frameSize;
+ int headerFrameOffset = (entryID % frameSize) * 2 * INT_SIZE % frameSize;
+
+ if (headers[headerFrameIndex] == null) {
+ return 0;
+ }
+
+ int entryFrameIndex = headers[headerFrameIndex].getInt(headerFrameOffset);
+ int entryTupleIndex = headers[headerFrameIndex].getInt(headerFrameOffset + INT_SIZE);
+
+ do {
+ if (entryFrameIndex < 0) {
+ break;
+ }
+ hashtableRecordAccessor.reset(contents[entryFrameIndex]);
+ tPointers[ptr * PTR_SIZE] = entryFrameIndex;
+ tPointers[ptr * PTR_SIZE + 1] = entryTupleIndex;
+ int tStart = hashtableRecordAccessor.getTupleStartOffset(entryTupleIndex);
+ int f0StartRel = hashtableRecordAccessor.getFieldStartOffset(entryTupleIndex, internalKeys[0]);
+ int f0EndRel = hashtableRecordAccessor.getFieldEndOffset(entryTupleIndex, internalKeys[0]);
+ int f0Start = f0StartRel + tStart + hashtableRecordAccessor.getFieldSlotsLength();
+ tPointers[ptr * PTR_SIZE + 2] = firstNormalizer == null ? 0 : firstNormalizer.normalize(
+ hashtableRecordAccessor.getBuffer().array(), f0Start, f0EndRel - f0StartRel);
+
+ ptr++;
+
+ if (ptr * PTR_SIZE >= tPointers.length) {
+ int[] newTPointers = new int[tPointers.length * 2];
+ System.arraycopy(tPointers, 0, newTPointers, 0, tPointers.length);
+ tPointers = newTPointers;
+ }
+
+ // move to the next record
+ int refOffset = hashtableRecordAccessor.getTupleHashReferenceOffset(entryTupleIndex);
+ int prevFrameIndex = entryFrameIndex;
+ entryFrameIndex = contents[prevFrameIndex].getInt(refOffset);
+ entryTupleIndex = contents[prevFrameIndex].getInt(refOffset + INT_SIZE);
+
+ } while (true);
+
+ // sort records
+ if (ptr > 1) {
+ sort(0, ptr);
+ }
+
+ return ptr;
+ }
+
+ protected void sort(int offset, int len) {
+ int m = offset + (len >> 1);
+ int mFrameIndex = tPointers[m * PTR_SIZE];
+ int mTupleIndex = tPointers[m * PTR_SIZE + 1];
+ int mNormKey = tPointers[m * PTR_SIZE + 2];
+ compFrameAccessor1.reset(contents[mFrameIndex]);
+
+ int a = offset;
+ int b = a;
+ int c = offset + len - 1;
+ int d = c;
+ while (true) {
+ while (b <= c) {
+ int bFrameIndex = tPointers[b * PTR_SIZE];
+ int bTupleIndex = tPointers[b * PTR_SIZE + 1];
+ int bNormKey = tPointers[b * PTR_SIZE + 2];
+ int cmp = 0;
+ if (bNormKey != mNormKey) {
+ cmp = ((((long) bNormKey) & 0xffffffffL) < (((long) mNormKey) & 0xffffffffL)) ? -1 : 1;
+ } else {
+ compFrameAccessor2.reset(contents[bFrameIndex]);
+ cmp = compare(compFrameAccessor2, bTupleIndex, compFrameAccessor1, mTupleIndex);
+ }
+ if (cmp > 0) {
+ break;
+ }
+ if (cmp == 0) {
+ swap(a++, b);
+ }
+ ++b;
+ }
+ while (c >= b) {
+ int cFrameIndex = tPointers[c * PTR_SIZE];
+ int cTupleIndex = tPointers[c * PTR_SIZE + 1];
+ int cNormKey = tPointers[c * PTR_SIZE + 2];
+ int cmp = 0;
+ if (cNormKey != mNormKey) {
+ cmp = ((((long) cNormKey) & 0xffffffffL) < (((long) mNormKey) & 0xffffffffL)) ? -1 : 1;
+ } else {
+ compFrameAccessor2.reset(contents[cFrameIndex]);
+ cmp = compare(compFrameAccessor2, cTupleIndex, compFrameAccessor1, mTupleIndex);
+ }
+ if (cmp < 0) {
+ break;
+ }
+ if (cmp == 0) {
+ swap(c, d--);
+ }
+ --c;
+ }
+ if (b > c)
+ break;
+ swap(b++, c--);
+ }
+
+ int s;
+ int n = offset + len;
+ s = Math.min(a - offset, b - a);
+ vecswap(offset, b - s, s);
+ s = Math.min(d - c, n - d - 1);
+ vecswap(b, n - s, s);
+
+ if ((s = b - a) > 1) {
+ sort(offset, s);
+ }
+ if ((s = d - c) > 1) {
+ sort(n - s, s);
+ }
+ }
+
+ private void swap(int a, int b) {
+ for (int i = 0; i < PTR_SIZE; i++) {
+ int t = tPointers[a * PTR_SIZE + i];
+ tPointers[a * PTR_SIZE + i] = tPointers[b * PTR_SIZE + i];
+ tPointers[b * PTR_SIZE + i] = t;
+ }
+ }
+
+ private void vecswap(int a, int b, int n) {
+ for (int i = 0; i < n; i++, a++, b++) {
+ swap(a, b);
+ }
+ }
+
+ protected boolean findMatch(int entry, FrameTupleAccessor accessor, int tupleIndex) {
+
+ // reset the match pointer
+ matchPointer.frameIndex = -1;
+ matchPointer.tupleIndex = -1;
+
+ // get reference in the header
+ int headerFrameIndex = getHeaderFrameIndex(entry);
+ int headerFrameOffset = getHeaderTupleIndex(entry);
+
+ if (headers[headerFrameIndex] == null) {
+ return false;
+ }
+
+ // initialize the pointer to the first record
+ int entryFrameIndex = headers[headerFrameIndex].getInt(headerFrameOffset);
+ int entryTupleIndex = headers[headerFrameIndex].getInt(headerFrameOffset + INT_SIZE);
+
+ while (entryFrameIndex >= 0) {
+ matchPointer.frameIndex = entryFrameIndex;
+ matchPointer.tupleIndex = entryTupleIndex;
+ hashtableRecordAccessor.reset(contents[entryFrameIndex]);
+
+ if (compare(accessor, tupleIndex, hashtableRecordAccessor, entryTupleIndex) == 0) {
+ return true;
+ }
+ // Move to the next record in this entry following the linked list
+ int refOffset = hashtableRecordAccessor.getTupleHashReferenceOffset(entryTupleIndex);
+ int prevFrameIndex = entryFrameIndex;
+ entryFrameIndex = contents[prevFrameIndex].getInt(refOffset);
+ entryTupleIndex = contents[prevFrameIndex].getInt(refOffset + INT_SIZE);
+ }
+
+ return false;
+ }
+
+ public LinkedList<RunFileReader> getRunFileReaders() {
+ return runReaders;
+ }
+
+ private int compare(FrameTupleAccessor accessor, int tupleIndex, FrameTupleAccessorForGroupHashtable hashAccessor,
+ int hashTupleIndex) {
+ int tStart0 = accessor.getTupleStartOffset(tupleIndex);
+ int fStartOffset0 = accessor.getFieldSlotsLength() + tStart0;
+
+ int tStart1 = hashAccessor.getTupleStartOffset(hashTupleIndex);
+ int fStartOffset1 = hashAccessor.getFieldSlotsLength() + tStart1;
+
+ for (int i = 0; i < keys.length; ++i) {
+ int fStart0 = accessor.getFieldStartOffset(tupleIndex, keys[i]);
+ int fEnd0 = accessor.getFieldEndOffset(tupleIndex, keys[i]);
+ int fLen0 = fEnd0 - fStart0;
+
+ int fStart1 = hashAccessor.getFieldStartOffset(hashTupleIndex, internalKeys[i]);
+ int fEnd1 = hashAccessor.getFieldEndOffset(hashTupleIndex, internalKeys[i]);
+ int fLen1 = fEnd1 - fStart1;
+
+ int c = comparators[i].compare(accessor.getBuffer().array(), fStart0 + fStartOffset0, fLen0, hashAccessor
+ .getBuffer().array(), fStart1 + fStartOffset1, fLen1);
+ if (c != 0) {
+ return c;
+ }
+ }
+ return 0;
+ }
+
+ private int compare(FrameTupleAccessorForGroupHashtable accessor1, int tupleIndex1,
+ FrameTupleAccessorForGroupHashtable accessor2, int tupleIndex2) {
+ int tStart1 = accessor1.getTupleStartOffset(tupleIndex1);
+ int fStartOffset1 = accessor1.getFieldSlotsLength() + tStart1;
+
+ int tStart2 = accessor2.getTupleStartOffset(tupleIndex2);
+ int fStartOffset2 = accessor2.getFieldSlotsLength() + tStart2;
+
+ for (int i = 0; i < internalKeys.length; ++i) {
+ int fStart1 = accessor1.getFieldStartOffset(tupleIndex1, internalKeys[i]);
+ int fEnd1 = accessor1.getFieldEndOffset(tupleIndex1, internalKeys[i]);
+ int fLen1 = fEnd1 - fStart1;
+
+ int fStart2 = accessor2.getFieldStartOffset(tupleIndex2, internalKeys[i]);
+ int fEnd2 = accessor2.getFieldEndOffset(tupleIndex2, internalKeys[i]);
+ int fLen2 = fEnd2 - fStart2;
+
+ int c = comparators[i].compare(accessor1.getBuffer().array(), fStart1 + fStartOffset1, fLen1, accessor2
+ .getBuffer().array(), fStart2 + fStartOffset2, fLen2);
+ if (c != 0) {
+ return c;
+ }
+ }
+ return 0;
+ }
+
+ public void reset() {
+ for (int i = 0; i < headers.length; i++) {
+ if (headers[i] != null) {
+ resetHeader(i);
+ }
+ }
+ for (int i = 0; i < contents.length; i++) {
+ if (contents[i] != null) {
+ contents[i].putInt(FrameHelper.getTupleCountOffset(frameSize), 0);
+ }
+ }
+
+ usedEntries = 0;
+ totalTupleCount = 0;
+ currentLargestFrameIndex = -1;
+ }
+
+ public void finishup() throws HyracksDataException {
+ if (runReaders.size() > 0) {
+ flush();
+ }
+
+ hashedKeys = 0;
+ hashedRawRec = 0;
+ }
+
+ /**
+ * Close the hash table. Note that only memory allocated by frames are freed. Aggregation
+ * states maintained in {@link #aggState} and run file readers in {@link #runReaders} should
+ * be valid for later processing.
+ */
+ public void close() throws HyracksDataException {
+ for (int i = 0; i < headers.length; i++) {
+ headers[i] = null;
+ }
+ for (int i = 0; i < contents.length; i++) {
+ contents[i] = null;
+ }
+ outputBuffer = null;
+ tPointers = null;
+ }
+
+ public int getTupleCount() {
+ return totalTupleCount;
+ }
+
+ public int getFrameSize() {
+ return headers.length + contents.length + 1;
+ }
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortGroupOperatorDescriptor.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortGroupOperatorDescriptor.java
new file mode 100644
index 0000000..5296c9f
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortGroupOperatorDescriptor.java
@@ -0,0 +1,275 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hashsort;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.LinkedList;
+
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.ActivityId;
+import edu.uci.ics.hyracks.api.dataflow.IActivityGraphBuilder;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
+import edu.uci.ics.hyracks.api.dataflow.TaskId;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputerFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.JobId;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.io.RunFileReader;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractActivityNode;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractStateObject;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
+
+public class HybridHashSortGroupOperatorDescriptor extends AbstractOperatorDescriptor {
+
+ private static final int AGGREGATE_ACTIVITY_ID = 0;
+
+ private static final int MERGE_ACTIVITY_ID = 1;
+
+ private static final long serialVersionUID = 1L;
+ private final int[] keyFields, storedKeyFields;
+ private final INormalizedKeyComputerFactory firstNormalizerFactory;
+
+ private final IAggregatorDescriptorFactory aggregatorFactory;
+ private final IAggregatorDescriptorFactory mergerFactory;
+
+ private final ITuplePartitionComputerFactory aggTpcf, mergeTpcf;
+
+ private final int framesLimit;
+ private final IBinaryComparatorFactory[] comparatorFactories;
+
+ private final int tableSize;
+
+ private final boolean isLoadOptimized;
+
+ public HybridHashSortGroupOperatorDescriptor(JobSpecification spec, int[] keyFields, int framesLimit,
+ int tableSize, IBinaryComparatorFactory[] comparatorFactories, ITuplePartitionComputerFactory aggTpcf,
+ ITuplePartitionComputerFactory mergeTpcf, IAggregatorDescriptorFactory aggregatorFactory,
+ IAggregatorDescriptorFactory mergerFactory, RecordDescriptor recordDescriptor) {
+ this(spec, keyFields, framesLimit, tableSize, comparatorFactories, aggTpcf, mergeTpcf, null, aggregatorFactory,
+ mergerFactory, recordDescriptor, false);
+ }
+
+ public HybridHashSortGroupOperatorDescriptor(JobSpecification spec, int[] keyFields, int framesLimit,
+ int tableSize, IBinaryComparatorFactory[] comparatorFactories, ITuplePartitionComputerFactory aggTpcf,
+ ITuplePartitionComputerFactory mergeTpcf, INormalizedKeyComputerFactory firstNormalizerFactory,
+ IAggregatorDescriptorFactory aggregatorFactory, IAggregatorDescriptorFactory mergerFactory,
+ RecordDescriptor recordDescriptor) {
+ this(spec, keyFields, framesLimit, tableSize, comparatorFactories, aggTpcf, mergeTpcf, firstNormalizerFactory,
+ aggregatorFactory, mergerFactory, recordDescriptor, false);
+ }
+
+ public HybridHashSortGroupOperatorDescriptor(JobSpecification spec, int[] keyFields, int framesLimit,
+ int tableSize, IBinaryComparatorFactory[] comparatorFactories, ITuplePartitionComputerFactory aggTpcf,
+ ITuplePartitionComputerFactory mergeTpcf, INormalizedKeyComputerFactory firstNormalizerFactory,
+ IAggregatorDescriptorFactory aggregatorFactory, IAggregatorDescriptorFactory mergerFactory,
+ RecordDescriptor recordDescriptor, boolean isLoadOpt) {
+ super(spec, 1, 1);
+ this.framesLimit = framesLimit;
+ if (framesLimit <= 2) {
+ /**
+ * Minimum of 3 frames: 2 for in-memory hash table, and 1 for output
+ * aggregation results.
+ */
+ throw new IllegalStateException("frame limit should at least be 3, but it is " + framesLimit + "!");
+ }
+
+ storedKeyFields = new int[keyFields.length];
+ for (int i = 0; i < storedKeyFields.length; i++) {
+ storedKeyFields[i] = i;
+ }
+ this.aggregatorFactory = aggregatorFactory;
+ this.mergerFactory = mergerFactory;
+ this.keyFields = keyFields;
+ this.comparatorFactories = comparatorFactories;
+ this.firstNormalizerFactory = firstNormalizerFactory;
+ this.aggTpcf = aggTpcf;
+ this.mergeTpcf = mergeTpcf;
+ this.tableSize = tableSize;
+
+ /**
+ * Set the record descriptor. Note that since this operator is a unary
+ * operator, only the first record descriptor is used here.
+ */
+ recordDescriptors[0] = recordDescriptor;
+
+ this.isLoadOptimized = isLoadOpt;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor#contributeActivities(edu.uci.ics.hyracks.api.dataflow.
+ * IActivityGraphBuilder)
+ */
+ @Override
+ public void contributeActivities(IActivityGraphBuilder builder) {
+ AggregateActivity aggregateAct = new AggregateActivity(new ActivityId(getOperatorId(), AGGREGATE_ACTIVITY_ID));
+ MergeActivity mergeAct = new MergeActivity(new ActivityId(odId, MERGE_ACTIVITY_ID));
+
+ builder.addActivity(this, aggregateAct);
+ builder.addSourceEdge(0, aggregateAct, 0);
+
+ builder.addActivity(this, mergeAct);
+ builder.addTargetEdge(0, mergeAct, 0);
+
+ builder.addBlockingEdge(aggregateAct, mergeAct);
+ }
+
+ public static class AggregateActivityState extends AbstractStateObject {
+
+ private HybridHashSortGroupHashTable gTable;
+
+ public AggregateActivityState() {
+ }
+
+ private AggregateActivityState(JobId jobId, TaskId tId) {
+ super(jobId, tId);
+ }
+
+ @Override
+ public void toBytes(DataOutput out) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void fromBytes(DataInput in) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ private class AggregateActivity extends AbstractActivityNode {
+
+ private static final long serialVersionUID = 1L;
+
+ public AggregateActivity(ActivityId id) {
+ super(id);
+ }
+
+ @Override
+ public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
+ final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions)
+ throws HyracksDataException {
+ return new AbstractUnaryInputSinkOperatorNodePushable() {
+
+ HybridHashSortGroupHashTable serializableGroupHashtable;
+
+ FrameTupleAccessor accessor;
+
+ @Override
+ public void open() throws HyracksDataException {
+
+ RecordDescriptor inRecDesc = recordDescProvider.getInputRecordDescriptor(getActivityId(), 0);
+
+ IBinaryComparator[] comparators = new IBinaryComparator[comparatorFactories.length];
+ for (int i = 0; i < comparatorFactories.length; i++) {
+ comparators[i] = comparatorFactories[i].createBinaryComparator();
+ }
+
+ serializableGroupHashtable = new HybridHashSortGroupHashTable(ctx, framesLimit, tableSize,
+ keyFields, comparators, aggTpcf.createPartitioner(),
+ firstNormalizerFactory.createNormalizedKeyComputer(), aggregatorFactory.createAggregator(
+ ctx, inRecDesc, recordDescriptors[0], keyFields, storedKeyFields), inRecDesc,
+ recordDescriptors[0]);
+ accessor = new FrameTupleAccessor(ctx.getFrameSize(), inRecDesc);
+ }
+
+ @Override
+ public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+ accessor.reset(buffer);
+ int tupleCount = accessor.getTupleCount();
+ for (int i = 0; i < tupleCount; i++) {
+ serializableGroupHashtable.insert(accessor, i);
+ }
+ }
+
+ @Override
+ public void fail() throws HyracksDataException {
+ }
+
+ @Override
+ public void close() throws HyracksDataException {
+ serializableGroupHashtable.finishup();
+ AggregateActivityState state = new AggregateActivityState(ctx.getJobletContext().getJobId(),
+ new TaskId(getActivityId(), partition));
+ state.gTable = serializableGroupHashtable;
+ ctx.setStateObject(state);
+ }
+ };
+ }
+ }
+
+ private class MergeActivity extends AbstractActivityNode {
+
+ private static final long serialVersionUID = 1L;
+
+ public MergeActivity(ActivityId id) {
+ super(id);
+ }
+
+ @Override
+ public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
+ IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions)
+ throws HyracksDataException {
+
+ return new AbstractUnaryOutputSourceOperatorNodePushable() {
+
+ public void initialize() throws HyracksDataException {
+
+ AggregateActivityState aggState = (AggregateActivityState) ctx.getStateObject(new TaskId(
+ new ActivityId(getOperatorId(), AGGREGATE_ACTIVITY_ID), partition));
+
+ LinkedList<RunFileReader> runs = aggState.gTable.getRunFileReaders();
+
+ writer.open();
+ if (runs.size() <= 0) {
+ aggState.gTable.flushHashtableToOutput(writer);
+ aggState.gTable.close();
+ } else {
+ aggState.gTable.close();
+
+ IBinaryComparator[] comparators = new IBinaryComparator[comparatorFactories.length];
+ for (int i = 0; i < comparatorFactories.length; i++) {
+ comparators[i] = comparatorFactories[i].createBinaryComparator();
+ }
+
+ HybridHashSortRunMerger merger = new HybridHashSortRunMerger(ctx, runs, storedKeyFields,
+ comparators, recordDescriptors[0], mergeTpcf.createPartitioner(),
+ mergerFactory.createAggregator(ctx, recordDescriptors[0], recordDescriptors[0],
+ storedKeyFields, storedKeyFields), framesLimit, tableSize, writer,
+ isLoadOptimized);
+
+ merger.process();
+ }
+
+ writer.close();
+ }
+
+ };
+ }
+ }
+
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortGrouperBucketMerge.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortGrouperBucketMerge.java
new file mode 100644
index 0000000..1de2237
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortGrouperBucketMerge.java
@@ -0,0 +1,488 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hashsort;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.LinkedList;
+import java.util.List;
+
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
+import edu.uci.ics.hyracks.dataflow.common.io.RunFileReader;
+import edu.uci.ics.hyracks.dataflow.common.io.RunFileWriter;
+import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.util.ReferenceEntry;
+
+public class HybridHashSortGrouperBucketMerge {
+
+ private final int[] keyFields;
+ private final IBinaryComparator[] comparators;
+
+ private final IAggregatorDescriptor merger;
+ private final AggregateState mergeState;
+
+ private final int framesLimit, tableSize;
+
+ private final RecordDescriptor inRecDesc;
+
+ private final IHyracksTaskContext ctx;
+
+ private final ArrayTupleBuilder tupleBuilder;
+
+ private final IFrameWriter outputWriter;
+
+ private final ITuplePartitionComputer tpc;
+
+ private final boolean isLoadOptimized;
+
+ List<ByteBuffer> inFrames;
+ ByteBuffer outFrame, writerFrame;
+ FrameTupleAppender outAppender, writerAppender;
+ LinkedList<RunFileReader> runs;
+ ArrayTupleBuilder finalTupleBuilder;
+ FrameTupleAccessor outFrameAccessor;
+ int[] currentFrameIndexInRun, currentRunFrames, currentBucketInRun;
+ int runFrameLimit = 1;
+
+ public HybridHashSortGrouperBucketMerge(IHyracksTaskContext ctx, int[] keyFields, int framesLimit, int tableSize,
+ ITuplePartitionComputer tpc, IBinaryComparator[] comparators, IAggregatorDescriptor merger,
+ RecordDescriptor inRecDesc, RecordDescriptor outRecDesc, IFrameWriter outputWriter)
+ throws HyracksDataException {
+ this.ctx = ctx;
+ this.framesLimit = framesLimit;
+ this.tableSize = tableSize;
+
+ this.keyFields = keyFields;
+ this.comparators = comparators;
+ this.merger = merger;
+ this.mergeState = merger.createAggregateStates();
+
+ this.inRecDesc = inRecDesc;
+
+ this.tupleBuilder = new ArrayTupleBuilder(inRecDesc.getFieldCount());
+
+ this.outAppender = new FrameTupleAppender(ctx.getFrameSize());
+
+ this.outputWriter = outputWriter;
+
+ this.outFrameAccessor = new FrameTupleAccessor(ctx.getFrameSize(), inRecDesc);
+
+ this.tpc = tpc;
+
+ this.isLoadOptimized = true;
+ }
+
+ public HybridHashSortGrouperBucketMerge(IHyracksTaskContext ctx, int[] keyFields, int framesLimit, int tableSize,
+ ITuplePartitionComputer tpc, IBinaryComparator[] comparators, IAggregatorDescriptor merger,
+ RecordDescriptor inRecDesc, RecordDescriptor outRecDesc, IFrameWriter outputWriter, boolean loadOptimized)
+ throws HyracksDataException {
+ this.ctx = ctx;
+ this.framesLimit = framesLimit;
+ this.tableSize = tableSize;
+
+ this.keyFields = keyFields;
+ this.comparators = comparators;
+ this.merger = merger;
+ this.mergeState = merger.createAggregateStates();
+
+ this.inRecDesc = inRecDesc;
+
+ this.tupleBuilder = new ArrayTupleBuilder(inRecDesc.getFieldCount());
+
+ this.outAppender = new FrameTupleAppender(ctx.getFrameSize());
+
+ this.outputWriter = outputWriter;
+
+ this.outFrameAccessor = new FrameTupleAccessor(ctx.getFrameSize(), inRecDesc);
+
+ this.tpc = tpc;
+
+ this.isLoadOptimized = loadOptimized;
+ }
+
+ public void initialize(LinkedList<RunFileReader> runFiles) throws HyracksDataException {
+
+ runs = runFiles;
+
+ try {
+ if (runs.size() <= 0) {
+ return;
+ } else {
+ inFrames = new ArrayList<ByteBuffer>();
+ outFrame = ctx.allocateFrame();
+ outAppender.reset(outFrame, true);
+ outFrameAccessor.reset(outFrame);
+ int runProcOffset = 0;
+ while (runs.size() > 0) {
+ try {
+ doPass(runs, runProcOffset);
+ if (runs.size() + 2 <= framesLimit) {
+ // final phase
+ runProcOffset = 0;
+ } else {
+ // one more merge level
+ runProcOffset++;
+ }
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+ inFrames.clear();
+ }
+ } catch (Exception e) {
+ outputWriter.fail();
+ throw new HyracksDataException(e);
+ } finally {
+ mergeState.close();
+ }
+ }
+
+ private void doPass(LinkedList<RunFileReader> runs, int offset) throws HyracksDataException {
+ FileReference newRun = null;
+ IFrameWriter writer = outputWriter;
+ boolean finalPass = false;
+
+ int runNumber = runs.size() - offset;
+
+ while (inFrames.size() + 2 < framesLimit) {
+ inFrames.add(ctx.allocateFrame());
+ }
+
+ if (runNumber + 2 <= framesLimit) {
+ finalPass = true;
+ if (isLoadOptimized)
+ runFrameLimit = (framesLimit - 2) / runNumber;
+ else
+ runFrameLimit = 1;
+ } else {
+ runFrameLimit = 1;
+ runNumber = framesLimit - 2;
+ newRun = ctx.getJobletContext().createManagedWorkspaceFile(
+ HybridHashSortGrouperBucketMerge.class.getSimpleName());
+ writer = new RunFileWriter(newRun, ctx.getIOManager());
+ writer.open();
+ }
+ try {
+ currentFrameIndexInRun = new int[runNumber];
+ currentRunFrames = new int[runNumber];
+ currentBucketInRun = new int[runNumber];
+ /**
+ * Create file readers for each input run file, only for
+ * the ones fit into the inFrames
+ */
+ RunFileReader[] runFileReaders = new RunFileReader[runNumber];
+ FrameTupleAccessor[] tupleAccessors = new FrameTupleAccessor[inFrames.size()];
+ Comparator<ReferenceHashEntry> comparator = createEntryComparator(comparators);
+ ReferencedBucketBasedPriorityQueue topTuples = new ReferencedBucketBasedPriorityQueue(ctx.getFrameSize(),
+ inRecDesc, runNumber, comparator, tpc, tableSize);
+ /**
+ * current tuple index in each run
+ */
+ int[] tupleIndices = new int[runNumber];
+
+ for (int i = 0; i < runNumber; i++) {
+ int runIndex = i + offset;
+ tupleIndices[i] = 0;
+ // Load the run file
+ runFileReaders[i] = runs.get(runIndex);
+ runFileReaders[i].open();
+
+ currentRunFrames[i] = 0;
+ currentFrameIndexInRun[i] = i * runFrameLimit;
+ for (int j = 0; j < runFrameLimit; j++) {
+ int frameIndex = currentFrameIndexInRun[i] + j;
+ boolean hasNextFrame = runFileReaders[runIndex].nextFrame(inFrames.get(frameIndex));
+ if (hasNextFrame) {
+ tupleAccessors[frameIndex] = new FrameTupleAccessor(ctx.getFrameSize(), inRecDesc);
+ tupleAccessors[frameIndex].reset(inFrames.get(frameIndex));
+ currentRunFrames[i]++;
+ if (j == 0) {
+ currentBucketInRun[i] = tpc.partition(tupleAccessors[frameIndex], tupleIndices[i],
+ tableSize);
+ setNextTopTuple(i, tupleIndices, runFileReaders, tupleAccessors, topTuples);
+ }
+ } else {
+ break;
+ }
+ }
+ }
+
+ /**
+ * Start merging
+ */
+ while (!topTuples.areRunsExhausted()) {
+ /**
+ * Get the top record
+ */
+ ReferenceEntry top = topTuples.peek();
+ int tupleIndex = top.getTupleIndex();
+ int runIndex = topTuples.peek().getRunid();
+
+ FrameTupleAccessor fta = top.getAccessor();
+
+ int currentTupleInOutFrame = outFrameAccessor.getTupleCount() - 1;
+ if (currentTupleInOutFrame < 0
+ || compareFrameTuples(fta, tupleIndex, outFrameAccessor, currentTupleInOutFrame) != 0) {
+
+ tupleBuilder.reset();
+
+ for (int k = 0; k < keyFields.length; k++) {
+ tupleBuilder.addField(fta, tupleIndex, keyFields[k]);
+ }
+
+ merger.init(tupleBuilder, fta, tupleIndex, mergeState);
+
+ if (!outAppender.appendSkipEmptyField(tupleBuilder.getFieldEndOffsets(),
+ tupleBuilder.getByteArray(), 0, tupleBuilder.getSize())) {
+ flushOutFrame(writer, finalPass);
+ if (!outAppender.appendSkipEmptyField(tupleBuilder.getFieldEndOffsets(),
+ tupleBuilder.getByteArray(), 0, tupleBuilder.getSize())) {
+ throw new HyracksDataException(
+ "The partial result is too large to be initialized in a frame.");
+ }
+ }
+
+ } else {
+ /**
+ * if new tuple is in the same group of the
+ * current aggregator do merge and output to the
+ * outFrame
+ */
+
+ merger.aggregate(fta, tupleIndex, outFrameAccessor, currentTupleInOutFrame, mergeState);
+
+ }
+ tupleIndices[runIndex]++;
+ setNextTopTuple(runIndex, tupleIndices, runFileReaders, tupleAccessors, topTuples);
+ }
+
+ if (outAppender.getTupleCount() > 0) {
+ flushOutFrame(writer, finalPass);
+ outAppender.reset(outFrame, true);
+ }
+
+ merger.close();
+
+ runs.subList(offset, runNumber).clear();
+ /**
+ * insert the new run file into the beginning of the run
+ * file list
+ */
+ if (!finalPass) {
+ runs.add(offset, ((RunFileWriter) writer).createReader());
+ }
+ } finally {
+ if (!finalPass) {
+ writer.close();
+ }
+ mergeState.reset();
+ }
+ }
+
+ private void flushOutFrame(IFrameWriter writer, boolean isFinal) throws HyracksDataException {
+
+ if (finalTupleBuilder == null) {
+ finalTupleBuilder = new ArrayTupleBuilder(inRecDesc.getFields().length);
+ }
+
+ if (writerFrame == null) {
+ writerFrame = ctx.allocateFrame();
+ }
+
+ if (writerAppender == null) {
+ writerAppender = new FrameTupleAppender(ctx.getFrameSize());
+ }
+ writerAppender.reset(writerFrame, true);
+
+ outFrameAccessor.reset(outFrame);
+
+ for (int i = 0; i < outFrameAccessor.getTupleCount(); i++) {
+
+ finalTupleBuilder.reset();
+
+ for (int k = 0; k < keyFields.length; k++) {
+ finalTupleBuilder.addField(outFrameAccessor, i, keyFields[k]);
+ }
+
+ if (isFinal) {
+
+ merger.outputFinalResult(finalTupleBuilder, outFrameAccessor, i, mergeState);
+
+ } else {
+
+ merger.outputPartialResult(finalTupleBuilder, outFrameAccessor, i, mergeState);
+ }
+
+ if (!writerAppender.appendSkipEmptyField(finalTupleBuilder.getFieldEndOffsets(),
+ finalTupleBuilder.getByteArray(), 0, finalTupleBuilder.getSize())) {
+ FrameUtils.flushFrame(writerFrame, writer);
+ writerAppender.reset(writerFrame, true);
+ if (!writerAppender.appendSkipEmptyField(finalTupleBuilder.getFieldEndOffsets(),
+ finalTupleBuilder.getByteArray(), 0, finalTupleBuilder.getSize())) {
+ throw new HyracksDataException("Aggregation output is too large to be fit into a frame.");
+ }
+ }
+ }
+ if (writerAppender.getTupleCount() > 0) {
+ FrameUtils.flushFrame(writerFrame, writer);
+ writerAppender.reset(writerFrame, true);
+ }
+
+ outAppender.reset(outFrame, true);
+ }
+
+ private void setNextTopTuple(int runIndex, int[] tupleIndices, RunFileReader[] runCursors,
+ FrameTupleAccessor[] tupleAccessors, ReferencedBucketBasedPriorityQueue topTuples)
+ throws HyracksDataException {
+ int runStart = runIndex * runFrameLimit;
+ boolean existNext = false;
+ if (tupleAccessors[currentFrameIndexInRun[runIndex]] == null || runCursors[runIndex] == null) {
+ /**
+ * run already closed
+ */
+ existNext = false;
+ } else if (currentFrameIndexInRun[runIndex] - runStart < currentRunFrames[runIndex] - 1) {
+ /**
+ * not the last frame for this run
+ */
+ existNext = true;
+ if (tupleIndices[runIndex] >= tupleAccessors[currentFrameIndexInRun[runIndex]].getTupleCount()) {
+ tupleIndices[runIndex] = 0;
+ currentFrameIndexInRun[runIndex]++;
+ }
+ } else if (tupleIndices[runIndex] < tupleAccessors[currentFrameIndexInRun[runIndex]].getTupleCount()) {
+ /**
+ * the last frame has expired
+ */
+ existNext = true;
+ } else {
+ /**
+ * If all tuples in the targeting frame have been
+ * checked.
+ */
+ tupleIndices[runIndex] = 0;
+ currentFrameIndexInRun[runIndex] = runStart;
+ /**
+ * read in batch
+ */
+ currentRunFrames[runIndex] = 0;
+ for (int j = 0; j < runFrameLimit; j++) {
+ int frameIndex = currentFrameIndexInRun[runIndex] + j;
+ if (runCursors[runIndex].nextFrame(inFrames.get(frameIndex))) {
+ tupleAccessors[frameIndex].reset(inFrames.get(frameIndex));
+ existNext = true;
+ currentRunFrames[runIndex]++;
+ } else {
+ break;
+ }
+ }
+ }
+
+ if (existNext) {
+ topTuples.popAndReplace(tupleAccessors[currentFrameIndexInRun[runIndex]], tupleIndices[runIndex]);
+ } else {
+ topTuples.pop();
+ closeRun(runIndex, runCursors, tupleAccessors);
+ }
+ }
+
+ /**
+ * Close the run file, and also the corresponding readers and
+ * input frame.
+ *
+ * @param index
+ * @param runCursors
+ * @param tupleAccessor
+ * @throws HyracksDataException
+ */
+ private void closeRun(int index, RunFileReader[] runCursors, IFrameTupleAccessor[] tupleAccessor)
+ throws HyracksDataException {
+ if (runCursors[index] != null) {
+ runCursors[index].close();
+ runCursors[index] = null;
+ int frameOffset = index * runFrameLimit;
+ for (int j = 0; j < runFrameLimit; j++) {
+ tupleAccessor[frameOffset + j] = null;
+ }
+ }
+ }
+
+ private int compareFrameTuples(IFrameTupleAccessor fta1, int j1, IFrameTupleAccessor fta2, int j2) {
+ byte[] b1 = fta1.getBuffer().array();
+ byte[] b2 = fta2.getBuffer().array();
+ for (int f = 0; f < keyFields.length; ++f) {
+ int fIdx = f;
+ int s1 = fta1.getTupleStartOffset(j1) + fta1.getFieldSlotsLength() + fta1.getFieldStartOffset(j1, fIdx);
+ int l1 = fta1.getFieldLength(j1, fIdx);
+ int s2 = fta2.getTupleStartOffset(j2) + fta2.getFieldSlotsLength() + fta2.getFieldStartOffset(j2, fIdx);
+ int l2_start = fta2.getFieldStartOffset(j2, fIdx);
+ int l2_end = fta2.getFieldEndOffset(j2, fIdx);
+ int l2 = l2_end - l2_start;
+ int c = comparators[f].compare(b1, s1, l1, b2, s2, l2);
+ if (c != 0) {
+ return c;
+ }
+ }
+ return 0;
+ }
+
+ private Comparator<ReferenceHashEntry> createEntryComparator(final IBinaryComparator[] comparators) {
+ return new Comparator<ReferenceHashEntry>() {
+
+ @Override
+ public int compare(ReferenceHashEntry o1, ReferenceHashEntry o2) {
+ int cmp = o1.getHashValue() - o2.getHashValue();
+ if (cmp != 0) {
+ return cmp;
+ } else {
+ FrameTupleAccessor fta1 = (FrameTupleAccessor) o1.getAccessor();
+ FrameTupleAccessor fta2 = (FrameTupleAccessor) o2.getAccessor();
+ int j1 = o1.getTupleIndex();
+ int j2 = o2.getTupleIndex();
+ byte[] b1 = fta1.getBuffer().array();
+ byte[] b2 = fta2.getBuffer().array();
+ for (int f = 0; f < keyFields.length; ++f) {
+ int fIdx = f;
+ int s1 = fta1.getTupleStartOffset(j1) + fta1.getFieldSlotsLength()
+ + fta1.getFieldStartOffset(j1, fIdx);
+ int l1 = fta1.getFieldEndOffset(j1, fIdx) - fta1.getFieldStartOffset(j1, fIdx);
+ int s2 = fta2.getTupleStartOffset(j2) + fta2.getFieldSlotsLength()
+ + fta2.getFieldStartOffset(j2, fIdx);
+ int l2 = fta2.getFieldEndOffset(j2, fIdx) - fta2.getFieldStartOffset(j2, fIdx);
+ int c = comparators[f].compare(b1, s1, l1, b2, s2, l2);
+ if (c != 0) {
+ return c;
+ }
+ }
+ return 0;
+ }
+ }
+
+ };
+ }
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortRunMerger.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortRunMerger.java
new file mode 100644
index 0000000..a846e4a
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/HybridHashSortRunMerger.java
@@ -0,0 +1,144 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hashsort;
+
+import java.nio.ByteBuffer;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import edu.uci.ics.hyracks.api.comm.IFrameReader;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
+import edu.uci.ics.hyracks.dataflow.common.io.RunFileReader;
+import edu.uci.ics.hyracks.dataflow.common.io.RunFileWriter;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
+
+public class HybridHashSortRunMerger {
+
+ private final IHyracksTaskContext ctx;
+ private final List<RunFileReader> runs;
+ private final int[] keyFields;
+ private final IBinaryComparator[] comparators;
+ private final RecordDescriptor recordDesc;
+ private final int framesLimit;
+ private final int tableSize;
+ private final IFrameWriter writer;
+ private final IAggregatorDescriptor grouper;
+ private final ITuplePartitionComputer tpc;
+ private ByteBuffer outFrame;
+ private FrameTupleAppender outFrameAppender;
+ private final boolean isLoadBuffered;
+
+ public HybridHashSortRunMerger(IHyracksTaskContext ctx, LinkedList<RunFileReader> runs, int[] keyFields,
+ IBinaryComparator[] comparators, RecordDescriptor recordDesc, ITuplePartitionComputer tpc,
+ IAggregatorDescriptor grouper, int framesLimit, int tableSize, IFrameWriter writer, boolean isLoadBuffered) {
+ this.ctx = ctx;
+ this.runs = runs;
+ this.keyFields = keyFields;
+ this.comparators = comparators;
+ this.recordDesc = recordDesc;
+ this.framesLimit = framesLimit;
+ this.writer = writer;
+ this.isLoadBuffered = isLoadBuffered;
+ this.tableSize = tableSize;
+ this.tpc = tpc;
+ this.grouper = grouper;
+ }
+
+ public void process() throws HyracksDataException {
+
+ // FIXME
+ int mergeLevels = 0, mergeRunCount = 0;
+ try {
+
+ outFrame = ctx.allocateFrame();
+ outFrameAppender = new FrameTupleAppender(ctx.getFrameSize());
+ outFrameAppender.reset(outFrame, true);
+
+ int maxMergeWidth = framesLimit - 1;
+ while (runs.size() > maxMergeWidth) {
+ int generationSeparator = 0;
+ // FIXME
+ int mergeRounds = 0;
+ while (generationSeparator < runs.size() && runs.size() > maxMergeWidth) {
+ int mergeWidth = Math.min(Math.min(runs.size() - generationSeparator, maxMergeWidth), runs.size()
+ - maxMergeWidth + 1);
+ FileReference newRun = null;
+ IFrameWriter mergeResultWriter = this.writer;
+ newRun = ctx.createManagedWorkspaceFile(HybridHashSortRunMerger.class.getSimpleName());
+ mergeResultWriter = new RunFileWriter(newRun, ctx.getIOManager());
+ mergeResultWriter.open();
+ IFrameReader[] runCursors = new RunFileReader[mergeWidth];
+ for (int i = 0; i < mergeWidth; i++) {
+ runCursors[i] = runs.get(generationSeparator + i);
+ }
+ merge(mergeResultWriter, runCursors, false);
+ runs.subList(generationSeparator, generationSeparator + mergeWidth).clear();
+ runs.add(generationSeparator++, ((RunFileWriter) mergeResultWriter).createReader());
+ mergeRounds++;
+ }
+ mergeLevels++;
+ mergeRunCount += mergeRounds;
+ }
+ if (!runs.isEmpty()) {
+ IFrameReader[] runCursors = new RunFileReader[runs.size()];
+ for (int i = 0; i < runCursors.length; i++) {
+ runCursors[i] = runs.get(i);
+ }
+ merge(writer, runCursors, true);
+ }
+ } catch (Exception e) {
+ writer.fail();
+ throw new HyracksDataException(e);
+ } finally {
+
+ ctx.getCounterContext()
+ .getCounter("optional." + HybridHashSortRunMerger.class.getSimpleName() + ".merge.runs.count", true)
+ .set(mergeRunCount);
+
+ ctx.getCounterContext()
+ .getCounter("optional." + HybridHashSortRunMerger.class.getSimpleName() + ".merge.levels", true)
+ .set(mergeLevels);
+ }
+ }
+
+ private void merge(IFrameWriter mergeResultWriter, IFrameReader[] runCursors, boolean isFinal)
+ throws HyracksDataException {
+ // FIXME
+ long methodTimer = System.nanoTime();
+
+ IFrameReader merger = new GroupRunMergingFrameReader(ctx, runCursors, framesLimit, tableSize, keyFields, tpc,
+ comparators, grouper, recordDesc, isFinal, isLoadBuffered);
+ merger.open();
+ try {
+ while (merger.nextFrame(outFrame)) {
+ FrameUtils.flushFrame(outFrame, mergeResultWriter);
+ }
+ } finally {
+ merger.close();
+ }
+ ctx.getCounterContext()
+ .getCounter("optional." + HybridHashSortRunMerger.class.getSimpleName() + ".merge.time", true)
+ .update(System.nanoTime() - methodTimer);
+ }
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferenceEntryWithBucketID.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferenceEntryWithBucketID.java
new file mode 100644
index 0000000..3c91fea
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferenceEntryWithBucketID.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hashsort;
+
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.std.util.ReferenceEntry;
+
+public class ReferenceEntryWithBucketID extends ReferenceEntry {
+
+ private int bucketID;
+
+ public ReferenceEntryWithBucketID(int runid, FrameTupleAccessor fta, int tupleIndex, int bucketID) {
+ super(runid, fta, tupleIndex);
+ this.bucketID = bucketID;
+ }
+
+ public int getBucketID() {
+ return bucketID;
+ }
+
+ public void setBucketID(int bucketID) {
+ this.bucketID = bucketID;
+ }
+
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferenceHashEntry.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferenceHashEntry.java
new file mode 100644
index 0000000..394f0a8
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferenceHashEntry.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hashsort;
+
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.std.util.ReferenceEntry;
+
+public class ReferenceHashEntry extends ReferenceEntry {
+
+ private int hashValue;
+
+ public ReferenceHashEntry(int runid, FrameTupleAccessor fta, int tupleIndex, int hashVal) {
+ super(runid, fta, tupleIndex);
+ this.hashValue = hashVal;
+ }
+
+ public int getHashValue() {
+ return hashValue;
+ }
+
+ public void setHashValue(int hashVal) {
+ this.hashValue = hashVal;
+ }
+
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferencedBucketBasedPriorityQueue.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferencedBucketBasedPriorityQueue.java
new file mode 100644
index 0000000..adfbe81
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferencedBucketBasedPriorityQueue.java
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hashsort;
+
+import java.io.IOException;
+import java.util.BitSet;
+import java.util.Comparator;
+
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.std.util.ReferenceEntry;
+
+public class ReferencedBucketBasedPriorityQueue {
+
+ private final int frameSize;
+ private final RecordDescriptor recordDescriptor;
+ private final ReferenceHashEntry entries[];
+ private final int size;
+ private final BitSet runAvail;
+ private int nItems;
+ private final int tableSize;
+
+ private final Comparator<ReferenceHashEntry> comparator;
+
+ private final ITuplePartitionComputer tpc;
+
+ public ReferencedBucketBasedPriorityQueue(int frameSize, RecordDescriptor recordDescriptor, int initSize,
+ Comparator<ReferenceHashEntry> comparator, ITuplePartitionComputer tpc, int tableSize) {
+ this.frameSize = frameSize;
+ this.recordDescriptor = recordDescriptor;
+ if (initSize < 1)
+ throw new IllegalArgumentException();
+ this.comparator = comparator;
+ nItems = initSize;
+ size = (initSize + 1) & 0xfffffffe;
+ entries = new ReferenceHashEntry[size];
+ runAvail = new BitSet(size);
+ runAvail.set(0, initSize, true);
+ for (int i = 0; i < size; i++) {
+ entries[i] = new ReferenceHashEntry(i, null, -1, -1);
+ }
+ this.tpc = tpc;
+ this.tableSize = tableSize;
+ }
+
+ /**
+ * Retrieve the top entry without removing it
+ *
+ * @return the top entry
+ */
+ public ReferenceEntry peek() {
+ return entries[0];
+ }
+
+ /**
+ * compare the new entry with entries within the queue, to find a spot for
+ * this new entry
+ *
+ * @param entry
+ * @return runid of this entry
+ * @throws HyracksDataException
+ * @throws IOException
+ */
+ public int popAndReplace(FrameTupleAccessor fta, int tIndex) throws HyracksDataException {
+ ReferenceHashEntry entry = entries[0];
+ if (entry.getAccessor() == null) {
+ entry.setAccessor(new FrameTupleAccessor(frameSize, recordDescriptor));
+ }
+ entry.getAccessor().reset(fta.getBuffer());
+ entry.setTupleIndex(tIndex);
+ entry.setHashValue(tpc.partition(fta, tIndex, tableSize));
+
+ add(entry);
+ return entry.getRunid();
+ }
+
+ /**
+ * Push entry into priority queue
+ *
+ * @param e
+ * the new Entry
+ * @throws HyracksDataException
+ */
+ private void add(ReferenceHashEntry e) throws HyracksDataException {
+ ReferenceHashEntry min = entries[0];
+ int slot = (size >> 1) + (min.getRunid() >> 1);
+
+ ReferenceHashEntry curr = e;
+ while (!runAvail.isEmpty() && slot > 0) {
+ int c = 0;
+ if (!runAvail.get(entries[slot].getRunid())) {
+ // run of entries[slot] is exhausted, i.e. not available, curr
+ // wins
+ c = 1;
+ } else if (entries[slot].getAccessor() != null /*
+ * entries[slot] is
+ * not MIN value
+ */
+ && runAvail.get(curr.getRunid() /* curr run is available */)) {
+
+ if (curr.getAccessor() != null) {
+ c = comparator.compare(entries[slot], curr);
+ } else {
+ // curr is MIN value, wins
+ c = 1;
+ }
+ }
+
+ if (c <= 0) { // curr lost
+ // entries[slot] swaps up
+ ReferenceHashEntry tmp = entries[slot];
+ entries[slot] = curr;
+ curr = tmp;// winner to pass up
+ }// else curr wins
+ slot >>= 1;
+ }
+ // set new entries[0]
+ entries[0] = curr;
+ }
+
+ /**
+ * Pop is called only when a run is exhausted
+ *
+ * @return
+ * @throws HyracksDataException
+ */
+ public ReferenceHashEntry pop() throws HyracksDataException {
+ ReferenceHashEntry min = entries[0];
+ runAvail.clear(min.getRunid());
+ add(min);
+ nItems--;
+ return min;
+ }
+
+ public boolean areRunsExhausted() {
+ return runAvail.isEmpty();
+ }
+
+ public int size() {
+ return nItems;
+ }
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferencedPriorityQueue.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferencedPriorityQueue.java
new file mode 100644
index 0000000..d9d5118
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hashsort/ReferencedPriorityQueue.java
@@ -0,0 +1,133 @@
+package edu.uci.ics.hyracks.dataflow.std.group.hashsort;
+
+import java.io.IOException;
+import java.util.BitSet;
+import java.util.Comparator;
+
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+
+/**
+ * TODO need to be merged with the ReferencedPriorityQueue in the util package
+ */
+public class ReferencedPriorityQueue {
+ private final int frameSize;
+ private final RecordDescriptor recordDescriptor;
+ private final ReferenceEntryWithBucketID entries[];
+ private final int size;
+ private final BitSet runAvail;
+ private int nItems;
+
+ private final Comparator<ReferenceEntryWithBucketID> comparator;
+
+ public ReferencedPriorityQueue(int frameSize, RecordDescriptor recordDescriptor, int initSize,
+ Comparator<ReferenceEntryWithBucketID> comparator) {
+ this.frameSize = frameSize;
+ this.recordDescriptor = recordDescriptor;
+ if (initSize < 1)
+ throw new IllegalArgumentException();
+ this.comparator = comparator;
+ nItems = initSize;
+ size = (initSize + 1) & 0xfffffffe;
+ entries = new ReferenceEntryWithBucketID[size];
+ runAvail = new BitSet(size);
+ runAvail.set(0, initSize, true);
+ for (int i = 0; i < size; i++) {
+ entries[i] = new ReferenceEntryWithBucketID(i, null, -1, -1);
+ }
+ }
+
+ /**
+ * Retrieve the top entry without removing it
+ *
+ * @return the top entry
+ */
+ public ReferenceEntryWithBucketID peek() {
+ return entries[0];
+ }
+
+ /**
+ * compare the new entry with entries within the queue, to find a spot for
+ * this new entry
+ *
+ * @param entry
+ * @return runid of this entry
+ * @throws IOException
+ */
+ public int popAndReplace(FrameTupleAccessor fta, int tIndex, int bucketID) {
+ ReferenceEntryWithBucketID entry = entries[0];
+ if (entry.getAccessor() == null) {
+ entry.setAccessor(new FrameTupleAccessor(frameSize, recordDescriptor));
+ }
+ entry.getAccessor().reset(fta.getBuffer());
+ entry.setTupleIndex(tIndex);
+ entry.setBucketID(bucketID);
+
+ add(entry);
+ return entry.getRunid();
+ }
+
+ /**
+ * Push entry into priority queue
+ *
+ * @param e
+ * the new Entry
+ */
+ private void add(ReferenceEntryWithBucketID e) {
+ ReferenceEntryWithBucketID min = entries[0];
+ int slot = (size >> 1) + (min.getRunid() >> 1);
+
+ ReferenceEntryWithBucketID curr = e;
+ while (!runAvail.isEmpty() && slot > 0) {
+ int c = 0;
+ if (!runAvail.get(entries[slot].getRunid())) {
+ // run of entries[slot] is exhausted, i.e. not available, curr
+ // wins
+ c = 1;
+ } else if (entries[slot].getAccessor() != null /*
+ * entries[slot] is
+ * not MIN value
+ */
+ && runAvail.get(curr.getRunid() /* curr run is available */)) {
+
+ if (curr.getAccessor() != null) {
+ c = comparator.compare(entries[slot], curr);
+ } else {
+ // curr is MIN value, wins
+ c = 1;
+ }
+ }
+
+ if (c <= 0) { // curr lost
+ // entries[slot] swaps up
+ ReferenceEntryWithBucketID tmp = entries[slot];
+ entries[slot] = curr;
+ curr = tmp;// winner to pass up
+ }// else curr wins
+ slot >>= 1;
+ }
+ // set new entries[0]
+ entries[0] = curr;
+ }
+
+ /**
+ * Pop is called only when a run is exhausted
+ *
+ * @return
+ */
+ public ReferenceEntryWithBucketID pop() {
+ ReferenceEntryWithBucketID min = entries[0];
+ runAvail.clear(min.getRunid());
+ add(min);
+ nItems--;
+ return min;
+ }
+
+ public boolean areRunsExhausted() {
+ return runAvail.isEmpty();
+ }
+
+ public int size() {
+ return nItems;
+ }
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/FrameTupleAccessorForGroupHashtable.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/FrameTupleAccessorForGroupHashtable.java
new file mode 100644
index 0000000..72bae76
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/FrameTupleAccessorForGroupHashtable.java
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hybridhash;
+
+import java.nio.ByteBuffer;
+
+import edu.uci.ics.hyracks.api.comm.FrameHelper;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+
+public class FrameTupleAccessorForGroupHashtable implements IFrameTupleAccessor {
+ private final int frameSize;
+ private final RecordDescriptor recordDescriptor;
+
+ private final static int INT_SIZE = 4;
+
+ private ByteBuffer buffer;
+
+ public FrameTupleAccessorForGroupHashtable(int frameSize, RecordDescriptor recordDescriptor) {
+ this.frameSize = frameSize;
+ this.recordDescriptor = recordDescriptor;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#getFieldCount()
+ */
+ @Override
+ public int getFieldCount() {
+ return recordDescriptor.getFieldCount();
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#getFieldSlotsLength()
+ */
+ @Override
+ public int getFieldSlotsLength() {
+ return getFieldCount() * 4;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#getFieldEndOffset(int, int)
+ */
+ @Override
+ public int getFieldEndOffset(int tupleIndex, int fIdx) {
+ return buffer.getInt(getTupleStartOffset(tupleIndex) + fIdx * 4);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#getFieldStartOffset(int, int)
+ */
+ @Override
+ public int getFieldStartOffset(int tupleIndex, int fIdx) {
+ return fIdx == 0 ? 0 : buffer.getInt(getTupleStartOffset(tupleIndex) + (fIdx - 1) * 4);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#getFieldLength(int, int)
+ */
+ @Override
+ public int getFieldLength(int tupleIndex, int fIdx) {
+ return getFieldEndOffset(tupleIndex, fIdx) - getFieldStartOffset(tupleIndex, fIdx);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#getTupleEndOffset(int)
+ */
+ @Override
+ public int getTupleEndOffset(int tupleIndex) {
+ return buffer.getInt(FrameHelper.getTupleCountOffset(frameSize) - 4 * (tupleIndex + 1)) - 2 * INT_SIZE;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#getTupleStartOffset(int)
+ */
+ @Override
+ public int getTupleStartOffset(int tupleIndex) {
+ return tupleIndex == 0 ? 0 : buffer.getInt(FrameHelper.getTupleCountOffset(frameSize) - 4 * tupleIndex);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#getTupleCount()
+ */
+ @Override
+ public int getTupleCount() {
+ return buffer.getInt(FrameHelper.getTupleCountOffset(frameSize));
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#getBuffer()
+ */
+ @Override
+ public ByteBuffer getBuffer() {
+ return buffer;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor#reset(java.nio.ByteBuffer)
+ */
+ @Override
+ public void reset(ByteBuffer buffer) {
+ this.buffer = buffer;
+ }
+
+ public int getTupleHashReferenceOffset(int tupleIndex) {
+ return getTupleEndOffset(tupleIndex);
+ }
+
+ public int getTupleEndOffsetWithHashReference(int tupleIndex) {
+ return buffer.getInt(FrameHelper.getTupleCountOffset(frameSize) - 4 * (tupleIndex + 1));
+ }
+
+ public int getHashReferenceNextFrameIndex(int tupleIndex) {
+ return buffer.getInt(getTupleHashReferenceOffset(tupleIndex));
+ }
+
+ public int getHashReferenceNextTupleIndex(int tupleIndex) {
+ return buffer.getInt(getTupleHashReferenceOffset(tupleIndex) + INT_SIZE);
+ }
+
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/FrameTupleAppenderForGroupHashtable.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/FrameTupleAppenderForGroupHashtable.java
new file mode 100644
index 0000000..c5668f5
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/FrameTupleAppenderForGroupHashtable.java
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hybridhash;
+
+import java.nio.ByteBuffer;
+
+import edu.uci.ics.hyracks.api.comm.FrameHelper;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+
+public class FrameTupleAppenderForGroupHashtable {
+ private final int frameSize;
+
+ private ByteBuffer buffer;
+
+ private int tupleCount;
+
+ private int tupleDataEndOffset;
+
+ public FrameTupleAppenderForGroupHashtable(int frameSize) {
+ this.frameSize = frameSize;
+ }
+
+ public void reset(ByteBuffer buffer, boolean clear) {
+ this.buffer = buffer;
+ if (clear) {
+ buffer.putInt(FrameHelper.getTupleCountOffset(frameSize), 0);
+ tupleCount = 0;
+ tupleDataEndOffset = 0;
+ } else {
+ tupleCount = buffer.getInt(FrameHelper.getTupleCountOffset(frameSize));
+ tupleDataEndOffset = tupleCount == 0 ? 0 : buffer.getInt(FrameHelper.getTupleCountOffset(frameSize)
+ - tupleCount * 4);
+ }
+ }
+
+ public boolean append(int[] fieldSlots, byte[] bytes, int offset, int length) {
+ if (tupleDataEndOffset + fieldSlots.length * 4 + length + 2 * 4 + 4 + (tupleCount + 1) * 4 <= frameSize) {
+ for (int i = 0; i < fieldSlots.length; ++i) {
+ buffer.putInt(tupleDataEndOffset + i * 4, fieldSlots[i]);
+ }
+ System.arraycopy(bytes, offset, buffer.array(), tupleDataEndOffset + fieldSlots.length * 4, length);
+ buffer.putInt(tupleDataEndOffset + fieldSlots.length * 4 + length, -1);
+ buffer.putInt(tupleDataEndOffset + fieldSlots.length * 4 + length + 4, -1);
+ tupleDataEndOffset += fieldSlots.length * 4 + length + 2 * 4;
+ buffer.putInt(FrameHelper.getTupleCountOffset(frameSize) - 4 * (tupleCount + 1), tupleDataEndOffset);
+ ++tupleCount;
+ buffer.putInt(FrameHelper.getTupleCountOffset(frameSize), tupleCount);
+ return true;
+ }
+ return false;
+ }
+
+ public boolean append(byte[] bytes, int offset, int length) {
+ if (tupleDataEndOffset + length + 2 * 4 + 4 + (tupleCount + 1) * 4 <= frameSize) {
+ System.arraycopy(bytes, offset, buffer.array(), tupleDataEndOffset, length);
+ buffer.putInt(tupleDataEndOffset + length, -1);
+ buffer.putInt(tupleDataEndOffset + length + 4, -1);
+ tupleDataEndOffset += length + 2 * 4;
+ buffer.putInt(FrameHelper.getTupleCountOffset(frameSize) - 4 * (tupleCount + 1), tupleDataEndOffset);
+ ++tupleCount;
+ buffer.putInt(FrameHelper.getTupleCountOffset(frameSize), tupleCount);
+ return true;
+ }
+ return false;
+ }
+
+ public boolean appendSkipEmptyField(int[] fieldSlots, byte[] bytes, int offset, int length) {
+ if (tupleDataEndOffset + fieldSlots.length * 4 + length + 2 * 4 + 4 + (tupleCount + 1) * 4 <= frameSize) {
+ int effectiveSlots = 0;
+ for (int i = 0; i < fieldSlots.length; ++i) {
+ if (fieldSlots[i] > 0) {
+ buffer.putInt(tupleDataEndOffset + i * 4, fieldSlots[i]);
+ effectiveSlots++;
+ }
+ }
+ System.arraycopy(bytes, offset, buffer.array(), tupleDataEndOffset + effectiveSlots * 4, length);
+ buffer.putInt(tupleDataEndOffset + effectiveSlots * 4 + length, -1);
+ buffer.putInt(tupleDataEndOffset + effectiveSlots * 4 + length + 4, -1);
+ tupleDataEndOffset += effectiveSlots * 4 + length + 2 * 4;
+ buffer.putInt(FrameHelper.getTupleCountOffset(frameSize) - 4 * (tupleCount + 1), tupleDataEndOffset);
+ ++tupleCount;
+ buffer.putInt(FrameHelper.getTupleCountOffset(frameSize), tupleCount);
+ return true;
+ }
+ return false;
+ }
+
+ public boolean append(IFrameTupleAccessor tupleAccessor, int tStartOffset, int tEndOffset) {
+ int length = tEndOffset - tStartOffset;
+ if (tupleDataEndOffset + length + 2 * 4 + 4 + (tupleCount + 1) * 4 <= frameSize) {
+ ByteBuffer src = tupleAccessor.getBuffer();
+ System.arraycopy(src.array(), tStartOffset, buffer.array(), tupleDataEndOffset, length);
+ buffer.putInt(tupleDataEndOffset + length, -1);
+ buffer.putInt(tupleDataEndOffset + length + 4, -1);
+ tupleDataEndOffset += length + 2 * 4;
+ buffer.putInt(FrameHelper.getTupleCountOffset(frameSize) - 4 * (tupleCount + 1), tupleDataEndOffset);
+ ++tupleCount;
+ buffer.putInt(FrameHelper.getTupleCountOffset(frameSize), tupleCount);
+ return true;
+ }
+ return false;
+ }
+
+ public boolean append(IFrameTupleAccessor tupleAccessor, int tIndex) {
+ int tStartOffset = tupleAccessor.getTupleStartOffset(tIndex);
+ int tEndOffset = tupleAccessor.getTupleEndOffset(tIndex);
+ return append(tupleAccessor, tStartOffset, tEndOffset);
+ }
+
+ public int getTupleCount() {
+ return tupleCount;
+ }
+
+ public ByteBuffer getBuffer() {
+ return buffer;
+ }
+}
+
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/HybridHashGroupHashTable.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/HybridHashGroupHashTable.java
new file mode 100644
index 0000000..b325b83
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/HybridHashGroupHashTable.java
@@ -0,0 +1,609 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hybridhash;
+
+import java.nio.ByteBuffer;
+import java.util.LinkedList;
+import java.util.List;
+
+import edu.uci.ics.hyracks.api.comm.IFrameReader;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputerFamily;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
+import edu.uci.ics.hyracks.dataflow.common.io.RunFileWriter;
+import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.structures.TuplePointer;
+
+public class HybridHashGroupHashTable implements IFrameWriter {
+
+ private final static int HEADER_REF_EMPTY = -1;
+
+ private static final int INT_SIZE = 4;
+
+ private IHyracksTaskContext ctx;
+
+ private final int frameSize;
+
+ private final int framesLimit;
+
+ private final int tableSize;
+
+ private final int numOfPartitions;
+
+ private final IFrameWriter outputWriter;
+
+ private final IBinaryComparator[] comparators;
+
+ /**
+ * index for keys
+ */
+ private final int[] inputKeys, internalKeys;
+
+ private final RecordDescriptor inputRecordDescriptor, outputRecordDescriptor;
+
+ /**
+ * hash partitioner for hashing
+ */
+ private final ITuplePartitionComputer hashComputer;
+
+ /**
+ * hash partitioner for partitioning
+ */
+ private final ITuplePartitionComputer partitionComputer;
+
+ /**
+ * Hashtable haders
+ */
+ private ByteBuffer[] headers;
+
+ /**
+ * buffers for hash table
+ */
+ private ByteBuffer[] contents;
+
+ /**
+ * output buffers for spilled partitions
+ */
+ private ByteBuffer[] spilledPartOutputBuffers;
+
+ /**
+ * run writers for spilled partitions
+ */
+ private RunFileWriter[] spilledPartRunWriters;
+
+ private int[] spilledPartRunSizeArrayInFrames;
+ private int[] spilledPartRunSizeArrayInTuples;
+
+ private List<IFrameReader> spilledPartRunReaders = null;
+ private List<Integer> spilledRunAggregatedPages = null;
+ private List<Integer> spilledPartRunSizesInFrames = null;
+ private List<Integer> spilledPartRunSizesInTuples = null;
+
+ /**
+ * index of the current working buffer in hash table
+ */
+ private int currentHashTableFrame;
+
+ /**
+ * Aggregation state
+ */
+ private AggregateState htAggregateState;
+
+ /**
+ * the aggregator
+ */
+ private final IAggregatorDescriptor aggregator;
+
+ /**
+ * records inserted into the in-memory hash table (for hashing and aggregation)
+ */
+ private int hashedRawRecords = 0;
+
+ /**
+ * in-memory part size in tuples
+ */
+ private int hashedKeys = 0;
+
+ /**
+ * Hash table tuple pointer
+ */
+ private TuplePointer matchPointer;
+
+ /**
+ * Frame tuple accessor for input data frames
+ */
+ private FrameTupleAccessor inputFrameTupleAccessor;
+
+ /**
+ * flag for whether the hash table if full
+ */
+ private boolean isHashtableFull;
+
+ /**
+ * flag for only partition (no aggregation and hashing)
+ */
+ private boolean isPartitionOnly;
+
+ /**
+ * Tuple accessor for hash table contents
+ */
+ private FrameTupleAccessorForGroupHashtable hashtableRecordAccessor;
+
+ private ArrayTupleBuilder internalTupleBuilder;
+
+ private FrameTupleAppender spilledPartInsertAppender;
+
+ private FrameTupleAppenderForGroupHashtable htInsertAppender;
+
+ public HybridHashGroupHashTable(IHyracksTaskContext ctx, int framesLimit, int tableSize, int numOfPartitions,
+ int[] keys, int hashSeedOffset, IBinaryComparator[] comparators, ITuplePartitionComputerFamily tpcFamily,
+ IAggregatorDescriptor aggregator, RecordDescriptor inputRecordDescriptor,
+ RecordDescriptor outputRecordDescriptor, IFrameWriter outputWriter) throws HyracksDataException {
+ this.ctx = ctx;
+ this.frameSize = ctx.getFrameSize();
+ this.tableSize = tableSize;
+ this.framesLimit = framesLimit;
+ this.numOfPartitions = numOfPartitions;
+ this.inputKeys = keys;
+ this.internalKeys = new int[keys.length];
+ for (int i = 0; i < internalKeys.length; i++) {
+ internalKeys[i] = i;
+ }
+
+ this.comparators = comparators;
+
+ this.inputRecordDescriptor = inputRecordDescriptor;
+ this.outputRecordDescriptor = outputRecordDescriptor;
+
+ this.outputWriter = outputWriter;
+
+ this.hashComputer = tpcFamily.createPartitioner(hashSeedOffset * 2);
+ this.partitionComputer = tpcFamily.createPartitioner(hashSeedOffset * 2 + 1);
+
+ this.aggregator = aggregator;
+
+ }
+
+ public static double getHashtableOverheadRatio(int tableSize, int frameSize, int framesLimit, int recordSizeInByte) {
+ int pagesForRecord = framesLimit - getHeaderPages(tableSize, frameSize);
+ int recordsInHashtable = (pagesForRecord - 1) * ((int) (frameSize / (recordSizeInByte + 2 * INT_SIZE)));
+
+ return (double) framesLimit * frameSize / recordsInHashtable / recordSizeInByte;
+ }
+
+ public static int getHeaderPages(int tableSize, int frameSize) {
+ return (int) Math.ceil((double)tableSize * INT_SIZE * 2 / frameSize);
+ }
+
+ @Override
+ public void open() throws HyracksDataException {
+ // initialize hash headers
+ int htHeaderCount = getHeaderPages(tableSize, frameSize);
+
+ isPartitionOnly = false;
+ if (numOfPartitions >= framesLimit - htHeaderCount) {
+ isPartitionOnly = true;
+ }
+
+ if (isPartitionOnly) {
+ htHeaderCount = 0;
+ }
+
+ headers = new ByteBuffer[htHeaderCount];
+
+ // initialize hash table contents
+ contents = new ByteBuffer[framesLimit - htHeaderCount - numOfPartitions];
+ currentHashTableFrame = 0;
+ isHashtableFull = false;
+
+ // initialize hash table aggregate state
+ htAggregateState = aggregator.createAggregateStates();
+
+ // initialize partition information
+ spilledPartOutputBuffers = new ByteBuffer[numOfPartitions];
+ spilledPartRunWriters = new RunFileWriter[numOfPartitions];
+ spilledPartRunSizeArrayInFrames = new int[numOfPartitions];
+ spilledPartRunSizeArrayInTuples = new int[numOfPartitions];
+
+ // initialize other helper classes
+ inputFrameTupleAccessor = new FrameTupleAccessor(frameSize, inputRecordDescriptor);
+ internalTupleBuilder = new ArrayTupleBuilder(outputRecordDescriptor.getFieldCount());
+ spilledPartInsertAppender = new FrameTupleAppender(frameSize);
+
+ htInsertAppender = new FrameTupleAppenderForGroupHashtable(frameSize);
+ matchPointer = new TuplePointer();
+ hashtableRecordAccessor = new FrameTupleAccessorForGroupHashtable(frameSize, outputRecordDescriptor);
+ }
+
+ @Override
+ public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+ inputFrameTupleAccessor.reset(buffer);
+ int tupleCount = inputFrameTupleAccessor.getTupleCount();
+ for (int i = 0; i < tupleCount; i++) {
+ insert(inputFrameTupleAccessor, i);
+ }
+ }
+
+ @Override
+ public void fail() throws HyracksDataException {
+ // TODO Auto-generated method stub
+
+ }
+
+ @Override
+ public void close() throws HyracksDataException {
+ for (int i = 0; i < numOfPartitions; i++) {
+ if (spilledPartRunWriters[i] != null) {
+ spilledPartRunWriters[i].close();
+ }
+ }
+ htAggregateState.close();
+ }
+
+ private void insert(FrameTupleAccessor accessor, int tupleIndex) throws HyracksDataException {
+
+ if (isPartitionOnly) {
+ // for partition only
+ int pid = partitionComputer.partition(accessor, tupleIndex, tableSize) % numOfPartitions;
+ insertSpilledPartition(accessor, tupleIndex, pid);
+ spilledPartRunSizeArrayInTuples[pid]++;
+ return;
+ }
+
+ int hid = hashComputer.partition(accessor, tupleIndex, tableSize);
+
+ if (findMatch(hid, accessor, tupleIndex)) {
+ // found a matching: do aggregation
+ hashtableRecordAccessor.reset(contents[matchPointer.frameIndex]);
+ aggregator.aggregate(accessor, tupleIndex, hashtableRecordAccessor, matchPointer.tupleIndex,
+ htAggregateState);
+ hashedRawRecords++;
+ } else {
+ if (isHashtableFull) {
+ // when hash table is full: spill the record
+ int pid = partitionComputer.partition(accessor, tupleIndex, tableSize) % numOfPartitions;
+ insertSpilledPartition(accessor, tupleIndex, pid);
+ spilledPartRunSizeArrayInTuples[pid]++;
+ } else {
+ // insert a new entry into the hash table
+ internalTupleBuilder.reset();
+ for (int k = 0; k < inputKeys.length; k++) {
+ internalTupleBuilder.addField(accessor, tupleIndex, inputKeys[k]);
+ }
+
+ aggregator.init(internalTupleBuilder, accessor, tupleIndex, htAggregateState);
+
+ if (contents[currentHashTableFrame] == null) {
+ contents[currentHashTableFrame] = ctx.allocateFrame();
+ }
+
+ htInsertAppender.reset(contents[currentHashTableFrame], false);
+ if (!htInsertAppender.append(internalTupleBuilder.getFieldEndOffsets(),
+ internalTupleBuilder.getByteArray(), 0, internalTupleBuilder.getSize())) {
+ // hash table is full: try to allocate more frame
+ currentHashTableFrame++;
+ if (currentHashTableFrame >= contents.length) {
+ // no more frame to allocate: stop expending the hash table
+ isHashtableFull = true;
+
+ // reinsert the record
+ insert(accessor, tupleIndex);
+
+ return;
+ } else {
+ if (contents[currentHashTableFrame] == null) {
+ contents[currentHashTableFrame] = ctx.allocateFrame();
+ }
+
+ htInsertAppender.reset(contents[currentHashTableFrame], true);
+
+ if (!htInsertAppender.append(internalTupleBuilder.getFieldEndOffsets(),
+ internalTupleBuilder.getByteArray(), 0, internalTupleBuilder.getSize())) {
+ throw new HyracksDataException(
+ "Failed to insert an aggregation partial result into the in-memory hash table: it has the length of "
+ + internalTupleBuilder.getSize() + " and fields "
+ + internalTupleBuilder.getFieldEndOffsets().length);
+ }
+
+ }
+ }
+
+ // update hash table reference
+ if (matchPointer.frameIndex < 0) {
+ // need to initialize the hash table header
+ int headerFrameIndex = getHeaderFrameIndex(hid);
+ int headerFrameOffset = getHeaderTupleIndex(hid);
+
+ if (headers[headerFrameIndex] == null) {
+ headers[headerFrameIndex] = ctx.allocateFrame();
+ resetHeader(headerFrameIndex);
+ }
+
+ headers[headerFrameIndex].putInt(headerFrameOffset, currentHashTableFrame);
+ headers[headerFrameIndex]
+ .putInt(headerFrameOffset + INT_SIZE, htInsertAppender.getTupleCount() - 1);
+ } else {
+ // update the previous reference
+ hashtableRecordAccessor.reset(contents[matchPointer.frameIndex]);
+ int refOffset = hashtableRecordAccessor.getTupleHashReferenceOffset(matchPointer.tupleIndex);
+ contents[matchPointer.frameIndex].putInt(refOffset, currentHashTableFrame);
+ contents[matchPointer.frameIndex]
+ .putInt(refOffset + INT_SIZE, htInsertAppender.getTupleCount() - 1);
+ }
+
+ hashedKeys++;
+ hashedRawRecords++;
+ }
+ }
+ }
+
+ /**
+ * Insert record into a spilled partition, by directly copying the tuple into the output buffer.
+ *
+ * @param accessor
+ * @param tupleIndex
+ * @param pid
+ */
+ private void insertSpilledPartition(FrameTupleAccessor accessor, int tupleIndex, int pid)
+ throws HyracksDataException {
+
+ if (spilledPartOutputBuffers[pid] == null) {
+ spilledPartOutputBuffers[pid] = ctx.allocateFrame();
+ }
+
+ spilledPartInsertAppender.reset(spilledPartOutputBuffers[pid], false);
+
+ if (!spilledPartInsertAppender.append(accessor, tupleIndex)) {
+ // the output buffer is full: flush
+ flushSpilledPartitionOutputBuffer(pid);
+ // reset the output buffer
+ spilledPartInsertAppender.reset(spilledPartOutputBuffers[pid], true);
+
+ if (!spilledPartInsertAppender.append(accessor, tupleIndex)) {
+ throw new HyracksDataException("Failed to insert a record into a spilled partition!");
+ }
+ }
+
+ }
+
+ /**
+ * Flush a spilled partition's output buffer.
+ *
+ * @param pid
+ * @throws HyracksDataException
+ */
+ private void flushSpilledPartitionOutputBuffer(int pid) throws HyracksDataException {
+ if (spilledPartRunWriters[pid] == null) {
+ spilledPartRunWriters[pid] = new RunFileWriter(
+ ctx.createManagedWorkspaceFile("HashHashPrePartitionHashTable"), ctx.getIOManager());
+ spilledPartRunWriters[pid].open();
+ }
+
+ FrameUtils.flushFrame(spilledPartOutputBuffers[pid], spilledPartRunWriters[pid]);
+
+ spilledPartRunSizeArrayInFrames[pid]++;
+ }
+
+ /**
+ * Hash table lookup
+ *
+ * @param hid
+ * @param accessor
+ * @param tupleIndex
+ * @return
+ */
+ private boolean findMatch(int hid, FrameTupleAccessor accessor, int tupleIndex) {
+
+ matchPointer.frameIndex = -1;
+ matchPointer.tupleIndex = -1;
+
+ // get reference in the header
+ int headerFrameIndex = getHeaderFrameIndex(hid);
+ int headerFrameOffset = getHeaderTupleIndex(hid);
+
+ if (headers[headerFrameIndex] == null) {
+ return false;
+ }
+
+ // initialize the pointer to the first record
+ int entryFrameIndex = headers[headerFrameIndex].getInt(headerFrameOffset);
+ int entryTupleIndex = headers[headerFrameIndex].getInt(headerFrameOffset + INT_SIZE);
+
+ while (entryFrameIndex >= 0) {
+ matchPointer.frameIndex = entryFrameIndex;
+ matchPointer.tupleIndex = entryTupleIndex;
+ hashtableRecordAccessor.reset(contents[entryFrameIndex]);
+ if (compare(accessor, tupleIndex, hashtableRecordAccessor, entryTupleIndex) == 0) {
+ return true;
+ }
+ // Move to the next record in this entry following the linked list
+ int refOffset = hashtableRecordAccessor.getTupleHashReferenceOffset(entryTupleIndex);
+ int prevFrameIndex = entryFrameIndex;
+ entryFrameIndex = contents[prevFrameIndex].getInt(refOffset);
+ entryTupleIndex = contents[prevFrameIndex].getInt(refOffset + INT_SIZE);
+ }
+
+ return false;
+ }
+
+ public void finishup() throws HyracksDataException {
+ // spill all output buffers
+ for (int i = 0; i < numOfPartitions; i++) {
+ if (spilledPartOutputBuffers[i] != null) {
+ flushSpilledPartitionOutputBuffer(i);
+ }
+ }
+ spilledPartOutputBuffers = null;
+
+ // flush in-memory aggregation results: no more frame cost here as all output buffers are recycled
+ ByteBuffer outputBuffer = ctx.allocateFrame();
+ FrameTupleAppender outputBufferAppender = new FrameTupleAppender(frameSize);
+ outputBufferAppender.reset(outputBuffer, true);
+
+ ArrayTupleBuilder outFlushTupleBuilder = new ArrayTupleBuilder(outputRecordDescriptor.getFieldCount());
+
+ for (ByteBuffer htFrame : contents) {
+ if (htFrame == null) {
+ continue;
+ }
+ hashtableRecordAccessor.reset(htFrame);
+ int tupleCount = hashtableRecordAccessor.getTupleCount();
+ for (int i = 0; i < tupleCount; i++) {
+ outFlushTupleBuilder.reset();
+
+ for (int k = 0; k < internalKeys.length; k++) {
+ outFlushTupleBuilder.addField(hashtableRecordAccessor, i, internalKeys[k]);
+ }
+
+ aggregator.outputFinalResult(outFlushTupleBuilder, hashtableRecordAccessor, i, htAggregateState);
+
+ if (!outputBufferAppender.append(outFlushTupleBuilder.getFieldEndOffsets(),
+ outFlushTupleBuilder.getByteArray(), 0, outFlushTupleBuilder.getSize())) {
+ FrameUtils.flushFrame(outputBuffer, outputWriter);
+ outputBufferAppender.reset(outputBuffer, true);
+
+ if (!outputBufferAppender.append(outFlushTupleBuilder.getFieldEndOffsets(),
+ outFlushTupleBuilder.getByteArray(), 0, outFlushTupleBuilder.getSize())) {
+ throw new HyracksDataException(
+ "Failed to flush a record from in-memory hash table: record has length of "
+ + outFlushTupleBuilder.getSize() + " and fields "
+ + outFlushTupleBuilder.getFieldEndOffsets().length);
+ }
+ }
+ }
+ }
+
+ if (outputBufferAppender.getTupleCount() > 0) {
+ FrameUtils.flushFrame(outputBuffer, outputWriter);
+ }
+
+ // create run readers and statistic information for spilled runs
+ spilledPartRunReaders = new LinkedList<IFrameReader>();
+ spilledRunAggregatedPages = new LinkedList<Integer>();
+ spilledPartRunSizesInFrames = new LinkedList<Integer>();
+ spilledPartRunSizesInTuples = new LinkedList<Integer>();
+ for (int i = 0; i < numOfPartitions; i++) {
+ if (spilledPartRunWriters[i] != null) {
+ spilledPartRunReaders.add(spilledPartRunWriters[i].createReader());
+ spilledRunAggregatedPages.add(0);
+ spilledPartRunWriters[i].close();
+ spilledPartRunSizesInFrames.add(spilledPartRunSizeArrayInFrames[i]);
+ spilledPartRunSizesInTuples.add(spilledPartRunSizeArrayInTuples[i]);
+ }
+ }
+ }
+
+ /**
+ * Compare an input record with a hash table entry.
+ *
+ * @param accessor
+ * @param tupleIndex
+ * @param hashAccessor
+ * @param hashTupleIndex
+ * @return
+ */
+ private int compare(FrameTupleAccessor accessor, int tupleIndex, FrameTupleAccessorForGroupHashtable hashAccessor,
+ int hashTupleIndex) {
+ int tStart0 = accessor.getTupleStartOffset(tupleIndex);
+ int fStartOffset0 = accessor.getFieldSlotsLength() + tStart0;
+
+ int tStart1 = hashAccessor.getTupleStartOffset(hashTupleIndex);
+ int fStartOffset1 = hashAccessor.getFieldSlotsLength() + tStart1;
+
+ for (int i = 0; i < internalKeys.length; ++i) {
+ int fStart0 = accessor.getFieldStartOffset(tupleIndex, inputKeys[i]);
+ int fEnd0 = accessor.getFieldEndOffset(tupleIndex, inputKeys[i]);
+ int fLen0 = fEnd0 - fStart0;
+
+ int fStart1 = hashAccessor.getFieldStartOffset(hashTupleIndex, internalKeys[i]);
+ int fEnd1 = hashAccessor.getFieldEndOffset(hashTupleIndex, internalKeys[i]);
+ int fLen1 = fEnd1 - fStart1;
+
+ int c = comparators[i].compare(accessor.getBuffer().array(), fStart0 + fStartOffset0, fLen0, hashAccessor
+ .getBuffer().array(), fStart1 + fStartOffset1, fLen1);
+ if (c != 0) {
+ return c;
+ }
+ }
+ return 0;
+ }
+
+ /**
+ * Get the header frame index of the given hash table entry
+ *
+ * @param entry
+ * @return
+ */
+ private int getHeaderFrameIndex(int entry) {
+ int frameIndex = (entry / frameSize * 2 * INT_SIZE) + (entry % frameSize * 2 * INT_SIZE / frameSize);
+ return frameIndex;
+ }
+
+ /**
+ * Get the tuple index of the given hash table entry
+ *
+ * @param entry
+ * @return
+ */
+ private int getHeaderTupleIndex(int entry) {
+ int offset = (entry % frameSize) * 2 * INT_SIZE % frameSize;
+ return offset;
+ }
+
+ /**
+ * reset the header page.
+ *
+ * @param headerFrameIndex
+ */
+ private void resetHeader(int headerFrameIndex) {
+ for (int i = 0; i < frameSize; i += INT_SIZE) {
+ headers[headerFrameIndex].putInt(i, HEADER_REF_EMPTY);
+ }
+ }
+
+ public List<Integer> getSpilledRunsSizeInRawTuples() throws HyracksDataException {
+ return spilledPartRunSizesInTuples;
+ }
+
+ public int getHashedUniqueKeys() throws HyracksDataException {
+ return hashedKeys;
+ }
+
+ public int getHashedRawRecords() throws HyracksDataException {
+ return hashedRawRecords;
+ }
+
+ public List<Integer> getSpilledRunsAggregatedPages() throws HyracksDataException {
+ return spilledRunAggregatedPages;
+ }
+
+ public List<IFrameReader> getSpilledRuns() throws HyracksDataException {
+ return spilledPartRunReaders;
+ }
+
+ public List<Integer> getSpilledRunsSizeInPages() throws HyracksDataException {
+ return spilledPartRunSizesInFrames;
+ }
+
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/HybridHashGroupOperatorDescriptor.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/HybridHashGroupOperatorDescriptor.java
new file mode 100644
index 0000000..118ca75
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/HybridHashGroupOperatorDescriptor.java
@@ -0,0 +1,399 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hybridhash;
+
+import java.nio.ByteBuffer;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import edu.uci.ics.hyracks.api.comm.IFrameReader;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFamily;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputerFamily;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFamily;
+import edu.uci.ics.hyracks.dataflow.common.io.RunFileReader;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputUnaryOutputOperatorNodePushable;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
+import edu.uci.ics.hyracks.dataflow.std.group.hashsort.HybridHashSortGroupHashTable;
+import edu.uci.ics.hyracks.dataflow.std.group.hashsort.HybridHashSortRunMerger;
+
+public class HybridHashGroupOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor {
+
+ private static final long serialVersionUID = 1L;
+
+ private static final double HYBRID_FALLBACK_THRESHOLD = 0.8;
+
+ // merge with fudge factor
+ private static final double ESTIMATOR_MAGNIFIER = 1.2;
+
+ // input key fields
+ private final int[] keyFields;
+
+ // intermediate and final key fields
+ private final int[] storedKeyFields;
+
+ /**
+ * Input sizes as the count of the raw records.
+ */
+ private final long inputSizeInRawRecords;
+
+ /**
+ * Input size as the count of the unique keys.
+ */
+ private final long inputSizeInUniqueKeys;
+
+ // hash table size
+ private final int tableSize;
+
+ // estimated record size: used for compute the fudge factor
+ private final int userProvidedRecordSizeInBytes;
+
+ // aggregator
+ private final IAggregatorDescriptorFactory aggregatorFactory;
+
+ // merger, in case of falling back to the hash-sort algorithm for hash skewness
+ private final IAggregatorDescriptorFactory mergerFactory;
+
+ // for the sort fall-back algorithm
+ private final INormalizedKeyComputerFactory firstNormalizerFactory;
+
+ // total memory in pages
+ private final int framesLimit;
+
+ // comparator factories for key fields.
+ private final IBinaryComparatorFactory[] comparatorFactories;
+
+ /**
+ * hash families for each field: a hash function family is need as we may have
+ * more than one levels of hashing
+ */
+ private final IBinaryHashFunctionFamily[] hashFamilies;
+
+ /**
+ * Flag for input adjustment
+ */
+ private final boolean doInputAdjustment;
+
+ private final static double FUDGE_FACTOR_ESTIMATION = 1.2;
+
+ public HybridHashGroupOperatorDescriptor(JobSpecification spec, int[] keyFields, int framesLimit,
+ long inputSizeInRawRecords, long inputSizeInUniqueKeys, int recordSizeInBytes, int tableSize,
+ IBinaryComparatorFactory[] comparatorFactories, IBinaryHashFunctionFamily[] hashFamilies,
+ int hashFuncStartLevel, INormalizedKeyComputerFactory firstNormalizerFactory,
+ IAggregatorDescriptorFactory aggregatorFactory, IAggregatorDescriptorFactory mergerFactory,
+ RecordDescriptor outRecDesc) throws HyracksDataException {
+ this(spec, keyFields, framesLimit, inputSizeInRawRecords, inputSizeInUniqueKeys, recordSizeInBytes, tableSize,
+ comparatorFactories, hashFamilies, hashFuncStartLevel, firstNormalizerFactory, aggregatorFactory,
+ mergerFactory, outRecDesc, true);
+ }
+
+ public HybridHashGroupOperatorDescriptor(JobSpecification spec, int[] keyFields, int framesLimit,
+ long inputSizeInRawRecords, long inputSizeInUniqueKeys, int recordSizeInBytes, int tableSize,
+ IBinaryComparatorFactory[] comparatorFactories, IBinaryHashFunctionFamily[] hashFamilies,
+ int hashFuncStartLevel, INormalizedKeyComputerFactory firstNormalizerFactory,
+ IAggregatorDescriptorFactory aggregatorFactory, IAggregatorDescriptorFactory mergerFactory,
+ RecordDescriptor outRecDesc, boolean doInputAdjustment) throws HyracksDataException {
+ super(spec, 1, 1);
+ this.framesLimit = framesLimit;
+ this.tableSize = tableSize;
+ this.userProvidedRecordSizeInBytes = recordSizeInBytes;
+
+ this.inputSizeInRawRecords = inputSizeInRawRecords;
+ this.inputSizeInUniqueKeys = inputSizeInUniqueKeys;
+
+ if (framesLimit <= 3) {
+ // at least 3 frames: 2 for in-memory hash table, and 1 for output buffer
+ throw new HyracksDataException(
+ "Not enough memory for Hash-Hash Aggregation algorithm: at least 3 frames are necessary, but only "
+ + framesLimit + " available.");
+ }
+
+ this.keyFields = keyFields;
+ storedKeyFields = new int[keyFields.length];
+ for (int i = 0; i < storedKeyFields.length; i++) {
+ storedKeyFields[i] = i;
+ }
+
+ this.aggregatorFactory = aggregatorFactory;
+
+ this.mergerFactory = mergerFactory;
+ this.firstNormalizerFactory = firstNormalizerFactory;
+
+ this.comparatorFactories = comparatorFactories;
+
+ this.hashFamilies = hashFamilies;
+
+ recordDescriptors[0] = outRecDesc;
+
+ this.doInputAdjustment = doInputAdjustment;
+ }
+
+ @Override
+ public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
+ IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
+
+ final IBinaryComparator[] comparators = new IBinaryComparator[comparatorFactories.length];
+ for (int i = 0; i < comparators.length; i++) {
+ comparators[i] = comparatorFactories[i].createBinaryComparator();
+ }
+
+ final RecordDescriptor inRecDesc = recordDescProvider.getInputRecordDescriptor(getActivityId(), 0);
+
+ final int frameSize = ctx.getFrameSize();
+
+ final double fudgeFactor = HybridHashGroupHashTable.getHashtableOverheadRatio(tableSize, frameSize,
+ framesLimit, userProvidedRecordSizeInBytes) * FUDGE_FACTOR_ESTIMATION;
+
+ return new AbstractUnaryInputUnaryOutputOperatorNodePushable() {
+
+ HybridHashGroupHashTable topProcessor;
+
+ int observedInputSizeInRawTuples;
+ int observedInputSizeInFrames, maxRecursiveLevels;
+
+ int userProvidedInputSizeInFrames;
+
+ boolean topLevelFallbackCheck = true;
+
+ ITuplePartitionComputerFamily tpcf = new FieldHashPartitionComputerFamily(keyFields, hashFamilies);
+
+ ITuplePartitionComputerFamily tpcfMerge = new FieldHashPartitionComputerFamily(storedKeyFields,
+ hashFamilies);
+
+ ByteBuffer readAheadBuf;
+
+ /**
+ * Compute the partition numbers using hybrid-hash formula.
+ *
+ * @param tableSize
+ * @param framesLimit
+ * @param inputKeySize
+ * @param partitionInOperator
+ * @param factor
+ * @return
+ */
+ private int getNumberOfPartitions(int tableSize, int framesLimit, long inputKeySize, double factor) {
+
+ int hashtableHeaderPages = HybridHashGroupHashTable.getHeaderPages(tableSize, frameSize);
+
+ int numberOfPartitions = HybridHashUtil.hybridHashPartitionComputer((int) Math.ceil(inputKeySize),
+ framesLimit, factor);
+
+ // if the partition number is more than the available hash table contents, do pure partition.
+ if (numberOfPartitions >= framesLimit - hashtableHeaderPages) {
+ numberOfPartitions = framesLimit;
+ }
+
+ if (numberOfPartitions <= 0) {
+ numberOfPartitions = 1;
+ }
+
+ return numberOfPartitions;
+ }
+
+ @Override
+ public void open() throws HyracksDataException {
+
+ observedInputSizeInFrames = 0;
+
+ // estimate the number of unique keys for this partition, given the total raw record count and unique record count
+ long estimatedNumberOfUniqueKeys = HybridHashUtil.getEstimatedPartitionSizeOfUniqueKeys(
+ inputSizeInRawRecords, inputSizeInUniqueKeys, 1);
+
+ userProvidedInputSizeInFrames = (int) Math.ceil(estimatedNumberOfUniqueKeys
+ * userProvidedRecordSizeInBytes / frameSize);
+
+ int topPartitions = getNumberOfPartitions(tableSize, framesLimit,
+ (int) Math.ceil(userProvidedInputSizeInFrames * ESTIMATOR_MAGNIFIER), fudgeFactor);
+
+ topProcessor = new HybridHashGroupHashTable(ctx, framesLimit, tableSize, topPartitions, keyFields, 0,
+ comparators, tpcf, aggregatorFactory.createAggregator(ctx, inRecDesc, recordDescriptors[0],
+ keyFields, storedKeyFields), inRecDesc, recordDescriptors[0], writer);
+
+ writer.open();
+ topProcessor.open();
+ }
+
+ @Override
+ public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+ observedInputSizeInRawTuples += buffer.getInt(buffer.capacity() - 4);
+ observedInputSizeInFrames++;
+ topProcessor.nextFrame(buffer);
+ }
+
+ @Override
+ public void fail() throws HyracksDataException {
+ // TODO Auto-generated method stub
+
+ }
+
+ @Override
+ public void close() throws HyracksDataException {
+ // estimate the maximum recursive levels
+ maxRecursiveLevels = (int) Math.max(
+ Math.ceil(Math.log(observedInputSizeInFrames * fudgeFactor) / Math.log(framesLimit)) + 1, 1);
+
+ finishAndRecursion(topProcessor, observedInputSizeInRawTuples, inputSizeInUniqueKeys, 0,
+ topLevelFallbackCheck);
+
+ writer.close();
+
+ }
+
+ private void processRunFiles(IFrameReader runReader, int inputCardinality, int runLevel)
+ throws HyracksDataException {
+
+ boolean checkFallback = true;
+
+ int numOfPartitions = getNumberOfPartitions(tableSize, framesLimit, (long)inputCardinality
+ * userProvidedRecordSizeInBytes / frameSize, fudgeFactor);
+
+ HybridHashGroupHashTable processor = new HybridHashGroupHashTable(ctx, framesLimit, tableSize,
+ numOfPartitions, keyFields, runLevel, comparators, tpcf, aggregatorFactory.createAggregator(
+ ctx, inRecDesc, recordDescriptors[0], keyFields, storedKeyFields), inRecDesc,
+ recordDescriptors[0], writer);
+
+ processor.open();
+
+ runReader.open();
+
+ int inputRunRawSizeInTuples = 0;
+
+ if (readAheadBuf == null) {
+ readAheadBuf = ctx.allocateFrame();
+ }
+ while (runReader.nextFrame(readAheadBuf)) {
+ inputRunRawSizeInTuples += readAheadBuf.getInt(readAheadBuf.capacity() - 4);
+ processor.nextFrame(readAheadBuf);
+ }
+
+ runReader.close();
+
+ finishAndRecursion(processor, inputRunRawSizeInTuples, inputCardinality, runLevel, checkFallback);
+ }
+
+ /**
+ * Finish the hash table processing and start recursive processing on run files.
+ *
+ * @param ht
+ * @param inputRawRecordCount
+ * @param inputCardinality
+ * @param level
+ * @param checkFallback
+ * @throws HyracksDataException
+ */
+ private void finishAndRecursion(HybridHashGroupHashTable ht, long inputRawRecordCount,
+ long inputCardinality, int level, boolean checkFallback) throws HyracksDataException {
+
+ ht.finishup();
+
+ List<IFrameReader> generatedRunReaders = ht.getSpilledRuns();
+ List<Integer> partitionRawRecords = ht.getSpilledRunsSizeInRawTuples();
+
+ int directFlushKeysInTuples = ht.getHashedUniqueKeys();
+ int directFlushRawRecordsInTuples = ht.getHashedRawRecords();
+
+ ht.close();
+ ht = null;
+
+ ctx.getCounterContext().getCounter("optional.levels." + level + ".estiInputKeyCardinality", true)
+ .update(inputCardinality);
+
+ // do adjustment
+ if (doInputAdjustment && directFlushRawRecordsInTuples > 0) {
+ inputCardinality = (int) Math.ceil((double) directFlushKeysInTuples / directFlushRawRecordsInTuples
+ * inputRawRecordCount);
+ }
+
+ ctx.getCounterContext()
+ .getCounter("optional.levels." + level + ".estiInputKeyCardinalityAdjusted", true)
+ .update(inputCardinality);
+
+ IFrameReader recurRunReader;
+ int subPartitionRawRecords;
+
+ while (!generatedRunReaders.isEmpty()) {
+ recurRunReader = generatedRunReaders.remove(0);
+ subPartitionRawRecords = partitionRawRecords.remove(0);
+
+ int runKeyCardinality = (int) Math.ceil((double) inputCardinality * subPartitionRawRecords
+ / inputRawRecordCount);
+
+ if ((checkFallback && runKeyCardinality > HYBRID_FALLBACK_THRESHOLD * inputCardinality)
+ || level > maxRecursiveLevels) {
+ Logger.getLogger(HybridHashGroupOperatorDescriptor.class.getSimpleName()).warning(
+ "Hybrid-hash falls back to hash-sort algorithm! (" + level + ":" + maxRecursiveLevels
+ + ")");
+ fallback(recurRunReader, level);
+ } else {
+ processRunFiles(recurRunReader, runKeyCardinality, level + 1);
+ }
+
+ }
+ }
+
+ private void fallback(IFrameReader recurRunReader, int runLevel) throws HyracksDataException {
+ // fall back
+ FrameTupleAccessor runFrameTupleAccessor = new FrameTupleAccessor(frameSize, inRecDesc);
+ HybridHashSortGroupHashTable hhsTable = new HybridHashSortGroupHashTable(ctx, framesLimit, tableSize,
+ keyFields, comparators, tpcf.createPartitioner(runLevel + 1),
+ firstNormalizerFactory.createNormalizedKeyComputer(), aggregatorFactory.createAggregator(ctx,
+ inRecDesc, recordDescriptors[0], keyFields, storedKeyFields), inRecDesc,
+ recordDescriptors[0]);
+
+ recurRunReader.open();
+ if (readAheadBuf == null) {
+ readAheadBuf = ctx.allocateFrame();
+ }
+ while (recurRunReader.nextFrame(readAheadBuf)) {
+ runFrameTupleAccessor.reset(readAheadBuf);
+ int tupleCount = runFrameTupleAccessor.getTupleCount();
+ for (int j = 0; j < tupleCount; j++) {
+ hhsTable.insert(runFrameTupleAccessor, j);
+ }
+ }
+
+ recurRunReader.close();
+ hhsTable.finishup();
+
+ LinkedList<RunFileReader> hhsRuns = hhsTable.getRunFileReaders();
+
+ if (hhsRuns.isEmpty()) {
+ hhsTable.flushHashtableToOutput(writer);
+ hhsTable.close();
+ } else {
+ hhsTable.close();
+ HybridHashSortRunMerger hhsMerger = new HybridHashSortRunMerger(ctx, hhsRuns, storedKeyFields,
+ comparators, recordDescriptors[0], tpcfMerge.createPartitioner(runLevel + 1),
+ mergerFactory.createAggregator(ctx, recordDescriptors[0], recordDescriptors[0],
+ storedKeyFields, storedKeyFields), framesLimit, tableSize, writer, false);
+ hhsMerger.process();
+ }
+ }
+
+ };
+ }
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/HybridHashUtil.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/HybridHashUtil.java
new file mode 100644
index 0000000..5323887
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/hybridhash/HybridHashUtil.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.group.hybridhash;
+
+public class HybridHashUtil {
+
+ /**
+ * Compute the expected number of spilling partitions (in-memory partition is not included), using the hybrid-hash
+ * algorithm from [Shapiro86]. Note that 0 means that there is no need to have spilling partitions.
+ *
+ * @param inputSizeInFrames
+ * @param memorySizeInFrames
+ * @param fudgeFactor
+ * @return
+ */
+ public static int hybridHashPartitionComputer(int inputSizeOfUniqueKeysInFrames, int memorySizeInFrames,
+ double fudgeFactor) {
+ return Math.max(
+ (int) Math.ceil((inputSizeOfUniqueKeysInFrames * fudgeFactor - memorySizeInFrames)
+ / (memorySizeInFrames - 1)), 0);
+ }
+
+ /**
+ * Compute the estimated number of unique keys in a partition of a dataset, using Yao's formula
+ *
+ * @param inputSizeInRawRecords
+ * @param inputSizeInUniqueKeys
+ * @param numOfPartitions
+ * @return
+ */
+ public static long getEstimatedPartitionSizeOfUniqueKeys(long inputSizeInRawRecords, long inputSizeInUniqueKeys,
+ int numOfPartitions) {
+ if (numOfPartitions == 1) {
+ return inputSizeInUniqueKeys;
+ }
+ return (long) Math.ceil(inputSizeInUniqueKeys
+ * (1 - Math.pow(1 - ((double) inputSizeInRawRecords / (double) numOfPartitions)
+ / (double) inputSizeInRawRecords, (double) inputSizeInRawRecords
+ / (double) inputSizeInUniqueKeys)));
+ }
+}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/preclustered/PreclusteredGroupWriter.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/preclustered/PreclusteredGroupWriter.java
index 7773765..4ae43eb 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/preclustered/PreclusteredGroupWriter.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/preclustered/PreclusteredGroupWriter.java
@@ -29,20 +29,26 @@
import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
public class PreclusteredGroupWriter implements IFrameWriter {
+
+ private final static int INT_SIZE = 4;
+
private final int[] groupFields;
private final IBinaryComparator[] comparators;
private final IAggregatorDescriptor aggregator;
private final AggregateState aggregateState;
private final IFrameWriter writer;
- private final ByteBuffer copyFrame;
private final FrameTupleAccessor inFrameAccessor;
- private final FrameTupleAccessor copyFrameAccessor;
private final ByteBuffer outFrame;
private final FrameTupleAppender appender;
private final ArrayTupleBuilder tupleBuilder;
- private boolean first;
+ private final RecordDescriptor outRecordDesc;
+
+ private byte[] groupResultCache;
+ private ByteBuffer groupResultCacheBuffer;
+ private FrameTupleAccessor groupResultCacheAccessor;
+ private FrameTupleAppender groupResultCacheAppender;
public PreclusteredGroupWriter(IHyracksTaskContext ctx, int[] groupFields, IBinaryComparator[] comparators,
IAggregatorDescriptor aggregator, RecordDescriptor inRecordDesc, RecordDescriptor outRecordDesc,
@@ -52,10 +58,9 @@
this.aggregator = aggregator;
this.aggregateState = aggregator.createAggregateStates();
this.writer = writer;
- copyFrame = ctx.allocateFrame();
+ this.outRecordDesc = outRecordDesc;
+
inFrameAccessor = new FrameTupleAccessor(ctx.getFrameSize(), inRecordDesc);
- copyFrameAccessor = new FrameTupleAccessor(ctx.getFrameSize(), inRecordDesc);
- copyFrameAccessor.reset(copyFrame);
outFrame = ctx.allocateFrame();
appender = new FrameTupleAppender(ctx.getFrameSize());
@@ -67,7 +72,6 @@
@Override
public void open() throws HyracksDataException {
writer.open();
- first = true;
}
@Override
@@ -75,40 +79,45 @@
inFrameAccessor.reset(buffer);
int nTuples = inFrameAccessor.getTupleCount();
for (int i = 0; i < nTuples; ++i) {
- if (first) {
- tupleBuilder.reset();
- for (int j = 0; j < groupFields.length; j++) {
- tupleBuilder.addField(inFrameAccessor, i, groupFields[j]);
- }
- aggregator.init(tupleBuilder, inFrameAccessor, i, aggregateState);
-
- first = false;
-
- } else {
- if (i == 0) {
- switchGroupIfRequired(copyFrameAccessor, copyFrameAccessor.getTupleCount() - 1, inFrameAccessor, i);
+ if (groupResultCache != null && groupResultCacheAccessor.getTupleCount() > 0) {
+ groupResultCacheAccessor.reset(ByteBuffer.wrap(groupResultCache));
+ if (sameGroup(inFrameAccessor, i, groupResultCacheAccessor, 0)) {
+ // find match: do aggregation
+ aggregator.aggregate(inFrameAccessor, i, groupResultCacheAccessor, 0, aggregateState);
+ continue;
} else {
- switchGroupIfRequired(inFrameAccessor, i - 1, inFrameAccessor, i);
+ // write the cached group into the final output
+ writeOutput(groupResultCacheAccessor, 0);
}
-
}
- }
- FrameUtils.copy(buffer, copyFrame);
- }
-
- private void switchGroupIfRequired(FrameTupleAccessor prevTupleAccessor, int prevTupleIndex,
- FrameTupleAccessor currTupleAccessor, int currTupleIndex) throws HyracksDataException {
- if (!sameGroup(prevTupleAccessor, prevTupleIndex, currTupleAccessor, currTupleIndex)) {
- writeOutput(prevTupleAccessor, prevTupleIndex);
tupleBuilder.reset();
+
for (int j = 0; j < groupFields.length; j++) {
- tupleBuilder.addField(currTupleAccessor, currTupleIndex, groupFields[j]);
+ tupleBuilder.addField(inFrameAccessor, i, groupFields[j]);
}
- aggregator.init(tupleBuilder, currTupleAccessor, currTupleIndex, aggregateState);
- } else {
- aggregator.aggregate(currTupleAccessor, currTupleIndex, null, 0, aggregateState);
+
+ aggregator.init(tupleBuilder, inFrameAccessor, i, aggregateState);
+
+ // enlarge the cache buffer if necessary
+ int requiredSize = tupleBuilder.getSize() + tupleBuilder.getFieldEndOffsets().length * INT_SIZE + 2
+ * INT_SIZE;
+
+ if (groupResultCache == null || groupResultCache.length < requiredSize) {
+ groupResultCache = new byte[requiredSize];
+ groupResultCacheAppender = new FrameTupleAppender(groupResultCache.length);
+ groupResultCacheBuffer = ByteBuffer.wrap(groupResultCache);
+ groupResultCacheAccessor = new FrameTupleAccessor(groupResultCache.length, outRecordDesc);
+ }
+
+ groupResultCacheAppender.reset(groupResultCacheBuffer, true);
+ if (!groupResultCacheAppender.append(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray(), 0,
+ tupleBuilder.getSize())) {
+ throw new HyracksDataException("The partial result is too large to be initialized in a frame.");
+ }
+
+ groupResultCacheAccessor.reset(groupResultCacheBuffer);
}
}
@@ -117,7 +126,7 @@
tupleBuilder.reset();
for (int j = 0; j < groupFields.length; j++) {
- tupleBuilder.addField(lastTupleAccessor, lastTupleIndex, groupFields[j]);
+ tupleBuilder.addField(lastTupleAccessor, lastTupleIndex, j);
}
aggregator.outputFinalResult(tupleBuilder, lastTupleAccessor, lastTupleIndex, aggregateState);
@@ -138,8 +147,8 @@
int fIdx = groupFields[i];
int s1 = a1.getTupleStartOffset(t1Idx) + a1.getFieldSlotsLength() + a1.getFieldStartOffset(t1Idx, fIdx);
int l1 = a1.getFieldLength(t1Idx, fIdx);
- int s2 = a2.getTupleStartOffset(t2Idx) + a2.getFieldSlotsLength() + a2.getFieldStartOffset(t2Idx, fIdx);
- int l2 = a2.getFieldLength(t2Idx, fIdx);
+ int s2 = a2.getTupleStartOffset(t2Idx) + a2.getFieldSlotsLength() + a2.getFieldStartOffset(t2Idx, i);
+ int l2 = a2.getFieldLength(t2Idx, i);
if (comparators[i].compare(a1.getBuffer().array(), s1, l1, a2.getBuffer().array(), s2, l2) != 0) {
return false;
}
@@ -154,8 +163,8 @@
@Override
public void close() throws HyracksDataException {
- if (!first) {
- writeOutput(copyFrameAccessor, copyFrameAccessor.getTupleCount() - 1);
+ if (groupResultCache != null && groupResultCacheAccessor.getTupleCount() > 0) {
+ writeOutput(groupResultCacheAccessor, 0);
if (appender.getTupleCount() > 0) {
FrameUtils.flushFrame(outFrame, writer);
}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/misc/PrinterOperatorDescriptor.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/misc/PrinterOperatorDescriptor.java
index 9569a7b..088c5ee 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/misc/PrinterOperatorDescriptor.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/misc/PrinterOperatorDescriptor.java
@@ -48,10 +48,10 @@
@Override
public void writeData(Object[] data) throws HyracksDataException {
for (int i = 0; i < data.length; ++i) {
- System.err.print(StringSerializationUtils.toString(data[i]));
- System.err.print(", ");
+ // System.err.print(StringSerializationUtils.toString(data[i]));
+ // System.err.print(", ");
}
- System.err.println();
+ //System.err.println();
}
@Override
diff --git a/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatserver/pom.xml b/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatserver/pom.xml
index ba30424..d166e45 100644
--- a/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatserver/pom.xml
+++ b/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatserver/pom.xml
@@ -125,8 +125,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
diff --git a/hyracks/hyracks-examples/text-example/textserver/pom.xml b/hyracks/hyracks-examples/text-example/textserver/pom.xml
index b12f823..f67d444 100644
--- a/hyracks/hyracks-examples/text-example/textserver/pom.xml
+++ b/hyracks/hyracks-examples/text-example/textserver/pom.xml
@@ -124,8 +124,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/pom.xml b/hyracks/hyracks-hdfs/hyracks-hdfs-core/pom.xml
index 032d50d..e97f26f 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/pom.xml
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/pom.xml
@@ -66,6 +66,18 @@
</filesets>
</configuration>
</plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <version>2.2</version>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
diff --git a/patch.diff b/patch.diff
new file mode 100644
index 0000000..a333970
--- /dev/null
+++ b/patch.diff
@@ -0,0 +1,245 @@
+From 9e006501f9e33467a8428199bd94b71dbff063ef Mon Sep 17 00:00:00 2001
+From: Anbang Xu <anbangx@gmail.com>
+Date: Fri, 26 Jul 2013 14:10:33 -0700
+Subject: [PATCH] p2 pass all the tests except 9
+
+---
+ .../genomix/data/test/KmerBytesWritableTest.java | 76 +++++++++++++++++++++-
+ .../genomix/pregelix/io/VertexValueWritable.java | 2 +-
+ .../operator/pathmerge/BasicPathMergeVertex.java | 35 +++++-----
+ .../pathmerge/LogAlgorithmForPathMergeVertex.java | 8 +--
+ .../pregelix/JobRun/PathMergeSmallTestSuite.java | 2 +-
+ 5 files changed, 98 insertions(+), 25 deletions(-)
+
+diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
+index bda73e5..fbfbeeb 100644
+--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
++++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
+@@ -229,14 +229,34 @@ public class KmerBytesWritableTest {
+ merge.mergeWithRFKmer(i, kmer2);
+ Assert.assertEquals("GGCACAACAACCC", merge.toString());
+
+- String test1 = "CTA";
+- String test2 = "AGA";
++ String test1;
++ String test2;
++ test1 = "CTA";
++ test2 = "AGA";
+ KmerBytesWritable k1 = new KmerBytesWritable(3);
+ KmerBytesWritable k2 = new KmerBytesWritable(3);
+ k1.setByRead(test1.getBytes(), 0);
+ k2.setByRead(test2.getBytes(), 0);
+ k1.mergeWithRFKmer(3, k2);
+ Assert.assertEquals("TCTA", k1.toString());
++
++ test1 = "CTA";
++ test2 = "ATA"; //TAT
++ k1 = new KmerBytesWritable(3);
++ k2 = new KmerBytesWritable(3);
++ k1.setByRead(test1.getBytes(), 0);
++ k2.setByRead(test2.getBytes(), 0);
++ k1.mergeWithFRKmer(3, k2);
++ Assert.assertEquals("CTAT", k1.toString());
++
++ test1 = "ATA";
++ test2 = "CTA"; //TAT
++ k1 = new KmerBytesWritable(3);
++ k2 = new KmerBytesWritable(3);
++ k1.setByRead(test1.getBytes(), 0);
++ k2.setByRead(test2.getBytes(), 0);
++ k1.mergeWithFRKmer(3, k2);
++ Assert.assertEquals("ATAG", k1.toString());
+ }
+
+
+@@ -281,5 +301,55 @@ public class KmerBytesWritableTest {
+ }
+ }
+ }
+-
++
++ @Test
++ public void TestFinalMerge() {
++ String selfString;
++ String match;
++ String msgString;
++ int index;
++ KmerBytesWritable kmer = new KmerBytesWritable();
++ int kmerSize = 3;
++
++ String F1 = "AATAG";
++ String F2 = "TAGAA";
++ String R1 = "CTATT";
++ String R2 = "TTCTA";
++
++ //FF test
++ selfString = F1;
++ match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length());
++ msgString = F2;
++ index = msgString.indexOf(match);
++ kmer.reset(msgString.length() - index);
++ kmer.setByRead(msgString.substring(index).getBytes(), 0);
++ System.out.println(kmer.toString());
++
++ //FR test
++ selfString = F1;
++ match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length());
++ msgString = GeneCode.reverseComplement(R2);
++ index = msgString.indexOf(match);
++ kmer.reset(msgString.length() - index);
++ kmer.setByRead(msgString.substring(index).getBytes(), 0);
++ System.out.println(kmer.toString());
++
++ //RF test
++ selfString = R1;
++ match = selfString.substring(0,kmerSize - 1);
++ msgString = GeneCode.reverseComplement(F2);
++ index = msgString.lastIndexOf(match) + kmerSize - 2;
++ kmer.reset(index + 1);
++ kmer.setByReadReverse(msgString.substring(0, index + 1).getBytes(), 0);
++ System.out.println(kmer.toString());
++
++ //RR test
++ selfString = R1;
++ match = selfString.substring(0,kmerSize - 1);
++ msgString = R2;
++ index = msgString.lastIndexOf(match) + kmerSize - 2;
++ kmer.reset(index + 1);
++ kmer.setByRead(msgString.substring(0, index + 1).getBytes(), 0);
++ System.out.println(kmer.toString());
++ }
+ }
+diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/VertexValueWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/VertexValueWritable.java
+index 6d4f683..065bfd5 100644
+--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/VertexValueWritable.java
++++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/VertexValueWritable.java
+@@ -32,7 +32,7 @@ public class VertexValueWritable implements WritableComparable<VertexValueWritab
+ public static final byte SHOULD_MERGEWITHNEXT = 0b01 << 3;
+ public static final byte SHOULD_MERGEWITHPREV = 0b10 << 3;
+ public static final byte SHOULD_MERGE_MASK = 0b11 << 3;
+- public static final byte SHOULD_MERGE_CLEAR = 0b1110011;
++ public static final byte SHOULD_MERGE_CLEAR = 0b1100111;
+ }
+
+ private PositionListWritable nodeIdList;
+diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/BasicPathMergeVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/BasicPathMergeVertex.java
+index b7b0814..ec608c5 100644
+--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/BasicPathMergeVertex.java
++++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/BasicPathMergeVertex.java
+@@ -495,6 +495,7 @@ public class BasicPathMergeVertex extends
+
+ public void setStateAsMergeWithNext(){
+ byte state = getVertexValue().getState();
++ state &= State.SHOULD_MERGE_CLEAR;
+ state |= State.SHOULD_MERGEWITHNEXT;
+ getVertexValue().setState(state);
+ }
+@@ -512,6 +513,7 @@ public class BasicPathMergeVertex extends
+
+ public void setStateAsMergeWithPrev(){
+ byte state = getVertexValue().getState();
++ state &= State.SHOULD_MERGE_CLEAR;
+ state |= State.SHOULD_MERGEWITHPREV;
+ getVertexValue().setState(state);
+ }
+@@ -638,7 +640,7 @@ public class BasicPathMergeVertex extends
+ String match;
+ String msgString;
+ int index;
+- switch(neighborToMergeDir){
++ switch(neighborToMeDir){
+ case MessageFlag.DIR_FF:
+ selfString = getVertexValue().getKmer().toString();
+ match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length());
+@@ -648,28 +650,29 @@ public class BasicPathMergeVertex extends
+ kmer.setByRead(msgString.substring(index).getBytes(), 0);
+ break;
+ case MessageFlag.DIR_FR:
+- selfString = getVertexId().toString();
+- match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length());
++ selfString = getVertexValue().getKmer().toString();
++ match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length());
+ msgString = GeneCode.reverseComplement(msg.getKmer().toString());
+ index = msgString.indexOf(match);
+ kmer.reset(msgString.length() - index);
+- kmer.setByRead(msgString.substring(index).getBytes(), 0);
++ kmer.setByReadReverse(msgString.substring(index).getBytes(), 0);
+ break;
+ case MessageFlag.DIR_RF:
+- selfString = GeneCode.reverseComplement(getVertexValue().getKmer().toString());
+- match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length());
+- msgString = msg.getKmer().toString();
+- index = msgString.indexOf(match);
+- kmer.reset(msgString.length() - index);
+- kmer.setByRead(msgString.substring(index).getBytes(), 0);
++ selfString = getVertexValue().getKmer().toString();
++ match = selfString.substring(0,kmerSize - 1);
++ msgString = GeneCode.reverseComplement(msg.getKmer().toString());
++ index = msgString.lastIndexOf(match) + kmerSize - 2;
++ kmer.reset(index + 1);
++ kmer.setByReadReverse(msgString.substring(0, index + 1).getBytes(), 0);
+ break;
+ case MessageFlag.DIR_RR:
+- selfString = GeneCode.reverseComplement(getVertexValue().getKmer().toString());
+- match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length());
+- msgString = GeneCode.reverseComplement(msg.getKmer().toString());
+- index = msgString.indexOf(match);
+- kmer.reset(msgString.length() - index);
+- kmer.setByRead(msgString.substring(index).getBytes(), 0);
++ selfString = getVertexValue().getKmer().toString();
++ match = selfString.substring(0,kmerSize - 1);
++ msgString = msg.getKmer().toString();
++ index = msgString.lastIndexOf(match) + kmerSize - 2;
++ kmer.reset(index + 1);
++ kmer.setByRead(msgString.substring(0, index + 1).getBytes(), 0);
++ System.out.println(kmer.toString());
+ break;
+ }
+
+diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/LogAlgorithmForPathMergeVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/LogAlgorithmForPathMergeVertex.java
+index a68b646..3b5a782 100644
+--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/LogAlgorithmForPathMergeVertex.java
++++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/LogAlgorithmForPathMergeVertex.java
+@@ -170,22 +170,22 @@ public class LogAlgorithmForPathMergeVertex extends
+ case MessageFromHead.BothMsgsFromHead:
+ case MessageFromHead.OneMsgFromOldHeadAndOneFromHead:
+ for(int i = 0; i < 2; i++)
+- processMerge(receivedMsgList.get(i));
++ processFinalMerge(receivedMsgList.get(i)); //processMerge()
+ getVertexValue().setState(State.IS_FINAL);
+ voteToHalt();
+ break;
+ case MessageFromHead.OneMsgFromHeadAndOneFromNonHead:
+ for(int i = 0; i < 2; i++)
+- processMerge(receivedMsgList.get(i));
++ processFinalMerge(receivedMsgList.get(i));
+ getVertexValue().setState(State .IS_HEAD);
+ break;
+ case MessageFromHead.BothMsgsFromNonHead:
+ for(int i = 0; i < 2; i++)
+- processMerge(receivedMsgList.get(i));
++ processFinalMerge(receivedMsgList.get(i));
+ break;
+ case MessageFromHead.NO_MSG:
+ //halt
+- deleteVertex(getVertexId());
++ voteToHalt(); //deleteVertex(getVertexId());
+ break;
+ }
+ }
+diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java
+index 9f96b5a..1578dfc 100644
+--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java
++++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java
+@@ -52,7 +52,7 @@ public class PathMergeSmallTestSuite extends TestSuite {
+ // + "6", PreFix + File.separator
+ // + "7", PreFix + File.separator
+ // + "8", PreFix + File.separator
+- + "5"};
++ + "9"};
+ private static final String ACTUAL_RESULT_DIR = "data/actual/pathmerge";
+ private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
+ private static final String PATH_TO_CLUSTER_STORE = "src/test/resources/cluster/stores.properties";
+--
+1.7.11.1
+
diff --git a/pom.xml b/pom.xml
index a851758..810152e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -82,5 +82,6 @@
<module>algebricks</module>
<module>pregelix</module>
<module>hivesterix</module>
+ <module>genomix</module>
</modules>
</project>
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/driver/Driver.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/driver/Driver.java
index 2d4064b..300dce4 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/driver/Driver.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/driver/Driver.java
@@ -136,6 +136,7 @@
terminate = IterationUtils.readTerminationState(job.getConfiguration(), jobGen.getJobId())
|| IterationUtils.readForceTerminationState(job.getConfiguration(), jobGen.getJobId());
i++;
+
} while (!terminate);
start = System.currentTimeMillis();
diff --git a/pregelix/pregelix-core/src/test/java/edu/uci/ics/pregelix/core/join/JoinTest.java b/pregelix/pregelix-core/src/test/java/edu/uci/ics/pregelix/core/join/JoinTest.java
index 4c7f91d..e8ab2ed 100644
--- a/pregelix/pregelix-core/src/test/java/edu/uci/ics/pregelix/core/join/JoinTest.java
+++ b/pregelix/pregelix-core/src/test/java/edu/uci/ics/pregelix/core/join/JoinTest.java
@@ -102,7 +102,7 @@
ClusterConfig.setStorePath(PATH_TO_CLUSTER_STORE);
ClusterConfig.setClusterPropertiesPath(PATH_TO_CLUSTER_PROPERTIES);
cleanupStores();
- PregelixHyracksIntegrationUtil.init();
+ PregelixHyracksIntegrationUtil.init("src/test/resources/topology.xml");
FileUtils.forceMkdir(new File(EXPECT_RESULT_DIR));
FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/dataload/DataLoadTest.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/dataload/DataLoadTest.java
index b4e17b6..91aa0d7 100644
--- a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/dataload/DataLoadTest.java
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/dataload/DataLoadTest.java
@@ -76,7 +76,7 @@
ClusterConfig.setStorePath(PATH_TO_CLUSTER_STORE);
ClusterConfig.setClusterPropertiesPath(PATH_TO_CLUSTER_PROPERTIES);
cleanupStores();
- PregelixHyracksIntegrationUtil.init();
+ PregelixHyracksIntegrationUtil.init("src/test/resources/topology.xml");
LOGGER.info("Hyracks mini-cluster started");
startHDFS();
FileUtils.forceMkdir(new File(EXPECT_RESULT_DIR));
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobrun/RunJobTestSuite.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobrun/RunJobTestSuite.java
index ea89bb9..7757f86 100644
--- a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobrun/RunJobTestSuite.java
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobrun/RunJobTestSuite.java
@@ -77,7 +77,7 @@
ClusterConfig.setStorePath(PATH_TO_CLUSTER_STORE);
ClusterConfig.setClusterPropertiesPath(PATH_TO_CLUSTER_PROPERTIES);
cleanupStores();
- PregelixHyracksIntegrationUtil.init();
+ PregelixHyracksIntegrationUtil.init("src/test/resources/topology.xml");
LOGGER.info("Hyracks mini-cluster started");
FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));