Adapt SplitRepeatVertex to new graph structure. TODO: test AdjSplitRepeat
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/2/2 b/genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/2/2
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/2/2
rename to genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/2/2
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/3/3 b/genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/3/3
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/3/3
rename to genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/3/3
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/4/4 b/genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/4/4
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/4/4
rename to genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/4/4
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/5/5 b/genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/5/5
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/5/5
rename to genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/5/5
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/6/6 b/genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/6/6
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/6/6
rename to genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/6/6
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/7/7 b/genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/7/7
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/7/7
rename to genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/7/7
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/8/8 b/genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/8/8
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/8/8
rename to genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/8/8
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/9/9 b/genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/9/9
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/9/9
rename to genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/9/9
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/BridgePath/BridgePath b/genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/BridgePath/BridgePath
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/BridgePath/BridgePath
rename to genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/BridgePath/BridgePath
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/CyclePath/CyclePath b/genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/CyclePath/CyclePath
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/CyclePath/CyclePath
rename to genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/CyclePath/CyclePath
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/PairedEndTest/2_1 b/genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/PairedEndTest/2_1
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/PairedEndTest/2_1
rename to genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/PairedEndTest/2_1
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/PairedEndTest/2_2 b/genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/PairedEndTest/2_2
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/PairedEndTest/2_2
rename to genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/PairedEndTest/2_2
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/RingPath/RingPath b/genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/RingPath/RingPath
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/RingPath/RingPath
rename to genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/RingPath/RingPath
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/SelfPath/SelfPath.txt b/genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/SelfPath/SelfPath.txt
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/SelfPath/SelfPath.txt
rename to genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/SelfPath/SelfPath.txt
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/SimplePath/SimplePath b/genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/SimplePath/SimplePath
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/SimplePath/SimplePath
rename to genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/SimplePath/SimplePath
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/ThreeDuplicate/ThreeDuplicate.txt b/genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/ThreeDuplicate/ThreeDuplicate.txt
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/ThreeDuplicate/ThreeDuplicate.txt
rename to genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/ThreeDuplicate/ThreeDuplicate.txt
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/TreePath/TreePath b/genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/TreePath/TreePath
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/TreePath/TreePath
rename to genomix/genomix-hadoop/data/webmap/PathMerge_TestSet/TreePath/TreePath
diff --git a/genomix/genomix-hadoop/data/webmap/AdjSplitRepeat.txt b/genomix/genomix-hadoop/data/webmap/SplitRepeat_TestSet/AdjSplitRepeat/AdjSplitRepeat.txt
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/AdjSplitRepeat.txt
rename to genomix/genomix-hadoop/data/webmap/SplitRepeat_TestSet/AdjSplitRepeat/AdjSplitRepeat.txt
diff --git a/genomix/genomix-hadoop/data/webmap/SplitOnce.txt b/genomix/genomix-hadoop/data/webmap/SplitRepeat_TestSet/SplitOnce/SplitOnce.txt
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/SplitOnce.txt
rename to genomix/genomix-hadoop/data/webmap/SplitRepeat_TestSet/SplitOnce/SplitOnce.txt
diff --git a/genomix/genomix-hadoop/data/webmap/SplitTwice.txt b/genomix/genomix-hadoop/data/webmap/SplitRepeat_TestSet/SplitTwice/SplitTwice.txt
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/SplitTwice.txt
rename to genomix/genomix-hadoop/data/webmap/SplitRepeat_TestSet/SplitTwice/SplitTwice.txt
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestSuite.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestSuite.java
index 318afb3..5535693 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestSuite.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestSuite.java
@@ -22,9 +22,11 @@
public class GraphBuildingTestSuite extends TestSuite{
private static int SIZE_KMER = 3;
- public static final String PreFix = "data/webmap/BridgeAdd_TestSet";
+ public static final String PreFix = "data/webmap/SplitRepeat_TestSet";
public static final String[] TestDir = { PreFix + File.separator
- + "TwoLines"};
+ + "SplitOnce", PreFix + File.separator
+ + "SplitTwice", PreFix + File.separator
+ + "AdjSplitRepeat"};
// + "2", PreFix + File.separator
// + "3", PreFix + File.separator
// + "4", PreFix + File.separator
diff --git a/genomix/genomix-pregelix/data/TestSet/SplitRepeat/AdjSplitRepeat/bin/.part-00000.crc b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/AdjSplitRepeat/bin/.part-00000.crc
new file mode 100644
index 0000000..22a96a2
--- /dev/null
+++ b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/AdjSplitRepeat/bin/.part-00000.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/TestSet/SplitRepeat/AdjSplitRepeat/bin/part-00000 b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/AdjSplitRepeat/bin/part-00000
new file mode 100755
index 0000000..7f99bf1
--- /dev/null
+++ b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/AdjSplitRepeat/bin/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/TestSet/SplitRepeat/AdjSplitRepeat/graphviz/result.ps b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/AdjSplitRepeat/graphviz/result.ps
new file mode 100644
index 0000000..e52e930
--- /dev/null
+++ b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/AdjSplitRepeat/graphviz/result.ps
@@ -0,0 +1,690 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 666 270
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 630 234 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% GCA
+gsave
+0 0 0.75294 nodecolor
+newpath 0 61 moveto
+0 165 lineto
+56 165 lineto
+56 61 lineto
+closepath fill
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 0 61 moveto
+0 165 lineto
+56 165 lineto
+56 61 lineto
+closepath stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+12.5 148.4 moveto 31 (GCA) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 0 139 moveto
+56 139 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+9.5 122.4 moveto 37 (5':[2]) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 0 113 moveto
+56 113 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+8 96.4 moveto 40 (~5':[]) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 0 87 moveto
+56 87 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+16.5 70.4 moveto 23 (1.0) alignedtext
+grestore
+% ATG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+newpath 138 61 moveto
+138 165 lineto
+194 165 lineto
+194 61 lineto
+closepath stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+151 148.4 moveto 30 (ATG) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 138 139 moveto
+194 139 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+152 122.4 moveto 28 (5':[]) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 138 113 moveto
+194 113 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+146 96.4 moveto 40 (~5':[]) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 138 87 moveto
+194 87 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+154.5 70.4 moveto 23 (1.0) alignedtext
+grestore
+% GCA->ATG
+gsave
+1 setlinewidth
+0.66667 1 1 edgecolor
+newpath 56.26 113 moveto
+76.82 113 104.89 113 127.5 113 curveto
+stroke
+0.66667 1 1 edgecolor
+newpath 127.78 116.5 moveto
+137.78 113 lineto
+127.78 109.5 lineto
+closepath fill
+1 setlinewidth
+solid
+0.66667 1 1 edgecolor
+newpath 127.78 116.5 moveto
+137.78 113 lineto
+127.78 109.5 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+74 118.4 moveto 46 (FR: [2]) alignedtext
+grestore
+% ATG->GCA
+gsave
+1 setlinewidth
+0.66667 1 1 edgecolor
+newpath 137.51 96.99 moveto
+131.87 94.51 125.87 92.33 120 91 curveto
+100.06 86.48 93.94 86.48 74 91 curveto
+71.34 91.6 68.65 92.38 65.99 93.28 curveto
+stroke
+0.66667 1 1 edgecolor
+newpath 64.53 90.09 moveto
+56.49 96.99 lineto
+67.08 96.61 lineto
+closepath fill
+1 setlinewidth
+solid
+0.66667 1 1 edgecolor
+newpath 64.53 90.09 moveto
+56.49 96.99 lineto
+67.08 96.61 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+74 96.4 moveto 46 (FR: [2]) alignedtext
+grestore
+% ATA
+gsave
+0 0 0.75294 nodecolor
+newpath 276 61 moveto
+276 165 lineto
+332 165 lineto
+332 61 lineto
+closepath fill
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 276 61 moveto
+276 165 lineto
+332 165 lineto
+332 61 lineto
+closepath stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+290 148.4 moveto 28 (ATA) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 276 139 moveto
+332 139 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+285.5 122.4 moveto 37 (5':[3]) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 276 113 moveto
+332 113 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+284 96.4 moveto 40 (~5':[]) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 276 87 moveto
+332 87 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+292.5 70.4 moveto 23 (3.0) alignedtext
+grestore
+% ATG->ATA
+gsave
+1 setlinewidth
+0.33333 1 1 edgecolor
+newpath 194.26 113 moveto
+214.82 113 242.89 113 265.5 113 curveto
+stroke
+0.33333 1 1 edgecolor
+newpath 265.78 116.5 moveto
+275.78 113 lineto
+265.78 109.5 lineto
+closepath fill
+1 setlinewidth
+solid
+0.33333 1 1 edgecolor
+newpath 265.78 116.5 moveto
+275.78 113 lineto
+265.78 109.5 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+212 118.4 moveto 46 (RF: [2]) alignedtext
+grestore
+% ATA->ATG
+gsave
+1 setlinewidth
+0.33333 1 1 edgecolor
+newpath 275.83 99.35 moveto
+270.06 97.13 263.93 95.18 258 94 curveto
+237.95 90.02 232.05 90.02 212 94 curveto
+209.31 94.53 206.58 95.23 203.87 96.03 curveto
+stroke
+0.33333 1 1 edgecolor
+newpath 202.5 92.8 moveto
+194.17 99.35 lineto
+204.76 99.42 lineto
+closepath fill
+1 setlinewidth
+solid
+0.33333 1 1 edgecolor
+newpath 202.5 92.8 moveto
+194.17 99.35 lineto
+204.76 99.42 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+212 99.4 moveto 46 (RF: [2]) alignedtext
+grestore
+% CTA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+newpath 428 122 moveto
+428 226 lineto
+484 226 lineto
+484 122 lineto
+closepath stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+442 209.4 moveto 28 (CTA) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 428 200 moveto
+484 200 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+442 183.4 moveto 28 (5':[]) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 428 174 moveto
+484 174 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+436 157.4 moveto 40 (~5':[]) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 428 148 moveto
+484 148 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+444.5 131.4 moveto 23 (2.0) alignedtext
+grestore
+% ATA->CTA
+gsave
+1 setlinewidth
+0.66667 1 1 edgecolor
+newpath 332.04 141.32 moveto
+337.63 145.78 343.73 149.93 350 153 curveto
+371.04 163.31 396.78 168.56 417.63 171.23 curveto
+stroke
+0.66667 1 1 edgecolor
+newpath 417.4 174.73 moveto
+427.73 172.38 lineto
+418.19 167.77 lineto
+closepath fill
+1 setlinewidth
+solid
+0.66667 1 1 edgecolor
+newpath 417.4 174.73 moveto
+427.73 172.38 lineto
+418.19 167.77 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+350 175.4 moveto 60 (FR: [1,3]) alignedtext
+grestore
+% AAT
+gsave
+0 0 0.75294 nodecolor
+newpath 428 0 moveto
+428 104 lineto
+484 104 lineto
+484 0 lineto
+closepath fill
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 428 0 moveto
+428 104 lineto
+484 104 lineto
+484 0 lineto
+closepath stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+441.5 87.4 moveto 29 (AAT) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 428 78 moveto
+484 78 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+437.5 61.4 moveto 37 (5':[1]) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 428 52 moveto
+484 52 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+436 35.4 moveto 40 (~5':[]) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 428 26 moveto
+484 26 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+444.5 9.4 moveto 23 (1.0) alignedtext
+grestore
+% ATA->AAT
+gsave
+1 setlinewidth
+0 1 1 edgecolor
+newpath 332.17 101.23 moveto
+338.05 98.81 344.22 96.3 350 94 curveto
+376.53 83.44 383.47 81.56 410 71 curveto
+412.71 69.92 415.5 68.8 418.31 67.66 curveto
+stroke
+0 1 1 edgecolor
+newpath 419.89 70.79 moveto
+427.83 63.77 lineto
+417.25 64.31 lineto
+closepath fill
+1 setlinewidth
+solid
+0 1 1 edgecolor
+newpath 419.89 70.79 moveto
+427.83 63.77 lineto
+417.25 64.31 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+356 99.4 moveto 48 (RR: [1]) alignedtext
+grestore
+% CTA->ATA
+gsave
+1 setlinewidth
+0.66667 1 1 edgecolor
+newpath 427.77 142.96 moveto
+422.28 138.35 416.27 134.11 410 131 curveto
+389.11 120.65 363.37 116.12 342.49 114.18 curveto
+stroke
+0.66667 1 1 edgecolor
+newpath 342.6 110.68 moveto
+332.37 113.41 lineto
+342.07 117.66 lineto
+closepath fill
+1 setlinewidth
+solid
+0.66667 1 1 edgecolor
+newpath 342.6 110.68 moveto
+332.37 113.41 lineto
+342.07 117.66 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+350 136.4 moveto 60 (FR: [1,3]) alignedtext
+grestore
+% AGC
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+newpath 566 121 moveto
+566 225 lineto
+622 225 lineto
+622 121 lineto
+closepath stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+578.5 208.4 moveto 31 (AGC) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 566 199 moveto
+622 199 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+580 182.4 moveto 28 (5':[]) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 566 173 moveto
+622 173 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+574 156.4 moveto 40 (~5':[]) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 566 147 moveto
+622 147 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+582.5 130.4 moveto 23 (1.0) alignedtext
+grestore
+% CTA->AGC
+gsave
+1 setlinewidth
+0.33333 1 1 edgecolor
+newpath 484.26 173.8 moveto
+504.82 173.65 532.89 173.44 555.5 173.28 curveto
+stroke
+0.33333 1 1 edgecolor
+newpath 555.8 176.78 moveto
+565.78 173.2 lineto
+555.75 169.78 lineto
+closepath fill
+1 setlinewidth
+solid
+0.33333 1 1 edgecolor
+newpath 555.8 176.78 moveto
+565.78 173.2 lineto
+555.75 169.78 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+502 179.4 moveto 46 (RF: [3]) alignedtext
+grestore
+% AAT->ATA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 427.81 44.12 moveto
+405.46 39.47 374.08 36.65 350 49 curveto
+346.23 50.93 342.7 53.33 339.41 56.06 curveto
+stroke
+0 0 0 edgecolor
+newpath 337 53.52 moveto
+332.16 62.94 lineto
+341.82 58.6 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 337 53.52 moveto
+332.16 62.94 lineto
+341.82 58.6 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+357.5 54.4 moveto 45 (FF: [1]) alignedtext
+grestore
+% AGC->CTA
+gsave
+1 setlinewidth
+0.33333 1 1 edgecolor
+newpath 565.95 157.92 moveto
+560.17 155.47 554 153.31 548 152 curveto
+528.02 147.66 521.94 147.48 502 152 curveto
+499.34 152.6 496.65 153.38 493.99 154.28 curveto
+stroke
+0.33333 1 1 edgecolor
+newpath 492.53 151.09 moveto
+484.49 157.99 lineto
+495.08 157.61 lineto
+closepath fill
+1 setlinewidth
+solid
+0.33333 1 1 edgecolor
+newpath 492.53 151.09 moveto
+484.49 157.99 lineto
+495.08 157.61 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+502 157.4 moveto 46 (RF: [3]) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 666 270
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/TestSet/SplitRepeat/AdjSplitRepeat/test.txt b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/AdjSplitRepeat/test.txt
new file mode 100644
index 0000000..717f9ac
--- /dev/null
+++ b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/AdjSplitRepeat/test.txt
@@ -0,0 +1,6 @@
+GCA {[] [{ATG:[2]}] [] [] {5':[(2-0_0)], ~5':[]} 1.0x}
+ATA {[] [{CTA:[1,3]}] [{ATG:[2]}] [{AAT:[1]}] {5':[(3-0_0)], ~5':[]} 3.0x}
+CTA {[] [{ATA:[1,3]}] [{AGC:[3]}] [] {5':[], ~5':[]} 2.0x}
+AGC {[] [] [{CTA:[3]}] [] {5':[], ~5':[]} 1.0x}
+ATG {[] [{GCA:[2]}] [{ATA:[2]}] [] {5':[], ~5':[]} 1.0x}
+AAT {[{ATA:[1]}] [] [] [] {5':[(1-0_0)], ~5':[]} 1.0x}
diff --git a/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitOnce/bin/.part-00000.crc b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitOnce/bin/.part-00000.crc
new file mode 100644
index 0000000..a6df588
--- /dev/null
+++ b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitOnce/bin/.part-00000.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitOnce/bin/part-00000 b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitOnce/bin/part-00000
new file mode 100755
index 0000000..cae8624
--- /dev/null
+++ b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitOnce/bin/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitOnce/graphviz/result.ps b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitOnce/graphviz/result.ps
new file mode 100644
index 0000000..d1b22d7
--- /dev/null
+++ b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitOnce/graphviz/result.ps
@@ -0,0 +1,593 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 516 270
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 480 234 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% GCA
+gsave
+0 0 0.75294 nodecolor
+newpath 0 61 moveto
+0 165 lineto
+56 165 lineto
+56 61 lineto
+closepath fill
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 0 61 moveto
+0 165 lineto
+56 165 lineto
+56 61 lineto
+closepath stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+12.5 148.4 moveto 31 (GCA) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 0 139 moveto
+56 139 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+9.5 122.4 moveto 37 (5':[2]) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 0 113 moveto
+56 113 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+8 96.4 moveto 40 (~5':[]) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 0 87 moveto
+56 87 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+16.5 70.4 moveto 23 (1.0) alignedtext
+grestore
+% ATG
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+newpath 138 61 moveto
+138 165 lineto
+194 165 lineto
+194 61 lineto
+closepath stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+151 148.4 moveto 30 (ATG) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 138 139 moveto
+194 139 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+152 122.4 moveto 28 (5':[]) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 138 113 moveto
+194 113 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+146 96.4 moveto 40 (~5':[]) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 138 87 moveto
+194 87 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+154.5 70.4 moveto 23 (1.0) alignedtext
+grestore
+% GCA->ATG
+gsave
+1 setlinewidth
+0.66667 1 1 edgecolor
+newpath 56.26 113 moveto
+76.82 113 104.89 113 127.5 113 curveto
+stroke
+0.66667 1 1 edgecolor
+newpath 127.78 116.5 moveto
+137.78 113 lineto
+127.78 109.5 lineto
+closepath fill
+1 setlinewidth
+solid
+0.66667 1 1 edgecolor
+newpath 127.78 116.5 moveto
+137.78 113 lineto
+127.78 109.5 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+74 118.4 moveto 46 (FR: [2]) alignedtext
+grestore
+% ATG->GCA
+gsave
+1 setlinewidth
+0.66667 1 1 edgecolor
+newpath 137.51 96.99 moveto
+131.87 94.51 125.87 92.33 120 91 curveto
+100.06 86.48 93.94 86.48 74 91 curveto
+71.34 91.6 68.65 92.38 65.99 93.28 curveto
+stroke
+0.66667 1 1 edgecolor
+newpath 64.53 90.09 moveto
+56.49 96.99 lineto
+67.08 96.61 lineto
+closepath fill
+1 setlinewidth
+solid
+0.66667 1 1 edgecolor
+newpath 64.53 90.09 moveto
+56.49 96.99 lineto
+67.08 96.61 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+74 96.4 moveto 46 (FR: [2]) alignedtext
+grestore
+% ATA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+newpath 276 61 moveto
+276 165 lineto
+332 165 lineto
+332 61 lineto
+closepath stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+290 148.4 moveto 28 (ATA) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 276 139 moveto
+332 139 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+290 122.4 moveto 28 (5':[]) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 276 113 moveto
+332 113 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+284 96.4 moveto 40 (~5':[]) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 276 87 moveto
+332 87 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+292.5 70.4 moveto 23 (2.0) alignedtext
+grestore
+% ATG->ATA
+gsave
+1 setlinewidth
+0.33333 1 1 edgecolor
+newpath 194.26 113 moveto
+214.82 113 242.89 113 265.5 113 curveto
+stroke
+0.33333 1 1 edgecolor
+newpath 265.78 116.5 moveto
+275.78 113 lineto
+265.78 109.5 lineto
+closepath fill
+1 setlinewidth
+solid
+0.33333 1 1 edgecolor
+newpath 265.78 116.5 moveto
+275.78 113 lineto
+265.78 109.5 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+212 118.4 moveto 46 (RF: [2]) alignedtext
+grestore
+% ATA->ATG
+gsave
+1 setlinewidth
+0.33333 1 1 edgecolor
+newpath 275.83 99.35 moveto
+270.06 97.13 263.93 95.18 258 94 curveto
+237.95 90.02 232.05 90.02 212 94 curveto
+209.31 94.53 206.58 95.23 203.87 96.03 curveto
+stroke
+0.33333 1 1 edgecolor
+newpath 202.5 92.8 moveto
+194.17 99.35 lineto
+204.76 99.42 lineto
+closepath fill
+1 setlinewidth
+solid
+0.33333 1 1 edgecolor
+newpath 202.5 92.8 moveto
+194.17 99.35 lineto
+204.76 99.42 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+212 99.4 moveto 46 (RF: [2]) alignedtext
+grestore
+% CTA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+newpath 416 122 moveto
+416 226 lineto
+472 226 lineto
+472 122 lineto
+closepath stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+430 209.4 moveto 28 (CTA) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 416 200 moveto
+472 200 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+430 183.4 moveto 28 (5':[]) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 416 174 moveto
+472 174 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+424 157.4 moveto 40 (~5':[]) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 416 148 moveto
+472 148 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+432.5 131.4 moveto 23 (1.0) alignedtext
+grestore
+% ATA->CTA
+gsave
+1 setlinewidth
+0.66667 1 1 edgecolor
+newpath 332.1 141.19 moveto
+337.69 145.67 343.77 149.85 350 153 curveto
+367.22 161.7 388.01 166.82 405.65 169.83 curveto
+stroke
+0.66667 1 1 edgecolor
+newpath 405.21 173.3 moveto
+415.62 171.36 lineto
+406.27 166.38 lineto
+closepath fill
+1 setlinewidth
+solid
+0.66667 1 1 edgecolor
+newpath 405.21 173.3 moveto
+415.62 171.36 lineto
+406.27 166.38 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+351 173.4 moveto 46 (FR: [1]) alignedtext
+grestore
+% AAT
+gsave
+0 0 0.75294 nodecolor
+newpath 416 0 moveto
+416 104 lineto
+472 104 lineto
+472 0 lineto
+closepath fill
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 416 0 moveto
+416 104 lineto
+472 104 lineto
+472 0 lineto
+closepath stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+429.5 87.4 moveto 29 (AAT) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 416 78 moveto
+472 78 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+425.5 61.4 moveto 37 (5':[1]) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 416 52 moveto
+472 52 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+424 35.4 moveto 40 (~5':[]) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 416 26 moveto
+472 26 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+432.5 9.4 moveto 23 (1.0) alignedtext
+grestore
+% ATA->AAT
+gsave
+1 setlinewidth
+0 1 1 edgecolor
+newpath 332.33 100.66 moveto
+353.68 91.35 383.16 78.51 406.47 68.35 curveto
+stroke
+0 1 1 edgecolor
+newpath 407.94 71.53 moveto
+415.71 64.33 lineto
+405.15 65.11 lineto
+closepath fill
+1 setlinewidth
+solid
+0 1 1 edgecolor
+newpath 407.94 71.53 moveto
+415.71 64.33 lineto
+405.15 65.11 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+350 97.4 moveto 48 (RR: [1]) alignedtext
+grestore
+% CTA->ATA
+gsave
+1 setlinewidth
+0.66667 1 1 edgecolor
+newpath 415.97 143.29 moveto
+410.42 138.58 404.33 134.22 398 131 curveto
+380.79 122.25 359.84 117.73 342.09 115.41 curveto
+stroke
+0.66667 1 1 edgecolor
+newpath 342.38 111.92 moveto
+332.06 114.28 lineto
+341.6 118.88 lineto
+closepath fill
+1 setlinewidth
+solid
+0.66667 1 1 edgecolor
+newpath 342.38 111.92 moveto
+332.06 114.28 lineto
+341.6 118.88 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+351 136.4 moveto 46 (FR: [1]) alignedtext
+grestore
+% AAT->ATA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 415.77 45.66 moveto
+396.27 42.75 370.3 41.88 350 52 curveto
+346.36 53.81 342.94 56.06 339.73 58.62 curveto
+stroke
+0 0 0 edgecolor
+newpath 337.19 56.21 moveto
+332.19 65.55 lineto
+341.92 61.36 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 337.19 56.21 moveto
+332.19 65.55 lineto
+341.92 61.36 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+351.5 57.4 moveto 45 (FF: [1]) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 516 270
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitOnce/test.txt b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitOnce/test.txt
new file mode 100644
index 0000000..17db920
--- /dev/null
+++ b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitOnce/test.txt
@@ -0,0 +1,5 @@
+GCA {[] [{ATG:[2]}] [] [] {5':[(2-0_0)], ~5':[]} 1.0x}
+ATA {[] [{CTA:[1]}] [{ATG:[2]}] [{AAT:[1]}] {5':[], ~5':[]} 2.0x}
+CTA {[] [{ATA:[1]}] [] [] {5':[], ~5':[]} 1.0x}
+ATG {[] [{GCA:[2]}] [{ATA:[2]}] [] {5':[], ~5':[]} 1.0x}
+AAT {[{ATA:[1]}] [] [] [] {5':[(1-0_0)], ~5':[]} 1.0x}
diff --git a/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitTwice/bin/.part-00000.crc b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitTwice/bin/.part-00000.crc
new file mode 100644
index 0000000..72b8bf2
--- /dev/null
+++ b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitTwice/bin/.part-00000.crc
Binary files differ
diff --git a/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitTwice/bin/part-00000 b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitTwice/bin/part-00000
new file mode 100755
index 0000000..a361d8e
--- /dev/null
+++ b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitTwice/bin/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitTwice/graphviz/result.ps b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitTwice/graphviz/result.ps
new file mode 100644
index 0000000..c435522
--- /dev/null
+++ b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitTwice/graphviz/result.ps
@@ -0,0 +1,594 @@
+%!PS-Adobe-3.0
+%%Creator: graphviz version 2.26.3 (20100126.1600)
+%%Title: G
+%%Pages: (atend)
+%%BoundingBox: (atend)
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+ dup dup findfont dup length dict begin
+ { 1 index /FID ne { def }{ pop pop } ifelse
+ } forall
+ /Encoding EncodingVector def
+ currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+ dup 1 exch div /InvScaleFactor exch def
+ scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+
+% hooks for setting color
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage { % i j npages
+ /npages exch def
+ /j exch def
+ /i exch def
+ /str 10 string def
+ npages 1 gt {
+ gsave
+ coordfont setfont
+ 0 0 moveto
+ (\() show i str cvs show (,) show j str cvs show (\)) show
+ grestore
+ } if
+} bind def
+
+/set_font {
+ findfont exch
+ scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext { % width text
+ /text exch def
+ /width exch def
+ gsave
+ width 0 gt {
+ [] 0 setdash
+ text stringwidth pop width exch sub text length div 0 text ashow
+ } if
+ grestore
+} def
+
+/boxprim { % xcorner ycorner xsize ysize
+ 4 2 roll
+ moveto
+ 2 copy
+ exch 0 rlineto
+ 0 exch rlineto
+ pop neg 0 rlineto
+ closepath
+} bind def
+
+/ellipse_path {
+ /ry exch def
+ /rx exch def
+ /y exch def
+ /x exch def
+ matrix currentmatrix
+ newpath
+ x y translate
+ rx ry scale
+ 0 0 1 0 360 arc
+ setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+ [ % layer color sequence - darkest to lightest
+ [0 0 0]
+ [.2 .8 .8]
+ [.4 .8 .8]
+ [.6 .8 .8]
+ [.8 .8 .8]
+ ]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+ layercolorseq curlayer 1 sub layerlen mod get
+ aload pop sethsbcolor
+ /nodecolor {nopcolor} def
+ /edgecolor {nopcolor} def
+ /graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+ /myupper exch def
+ /mylower exch def
+ curlayer mylower lt
+ curlayer myupper gt
+ or
+ {invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+ userdict (<<) cvn ([) cvn load put
+ userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 250 514
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 214 478 boxprim clip newpath
+1 1 set_scale 0 rotate 40 41 translate
+% ATA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+newpath 0 182 moveto
+0 286 lineto
+56 286 lineto
+56 182 lineto
+closepath stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+14 269.4 moveto 28 (ATA) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 0 260 moveto
+56 260 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+14 243.4 moveto 28 (5':[]) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 0 234 moveto
+56 234 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+8 217.4 moveto 40 (~5':[]) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 0 208 moveto
+56 208 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+16.5 191.4 moveto 23 (2.0) alignedtext
+grestore
+% GTA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+newpath 145 366 moveto
+145 470 lineto
+201 470 lineto
+201 366 lineto
+closepath stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+158.5 453.4 moveto 29 (GTA) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 145 444 moveto
+201 444 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+159 427.4 moveto 28 (5':[]) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 145 418 moveto
+201 418 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+153 401.4 moveto 40 (~5':[]) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 145 392 moveto
+201 392 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+161.5 375.4 moveto 23 (1.0) alignedtext
+grestore
+% ATA->GTA
+gsave
+1 setlinewidth
+0.66667 1 1 edgecolor
+newpath 39.53 286.12 moveto
+48.36 321.88 61.15 365.25 74 379 curveto
+90.04 396.17 114.58 405.91 135.21 411.36 curveto
+stroke
+0.66667 1 1 edgecolor
+newpath 134.43 414.77 moveto
+144.97 413.7 lineto
+136.07 407.96 lineto
+closepath fill
+1 setlinewidth
+solid
+0.66667 1 1 edgecolor
+newpath 134.43 414.77 moveto
+144.97 413.7 lineto
+136.07 407.96 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+75 411.4 moveto 46 (FR: [2]) alignedtext
+grestore
+% CTA
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+newpath 145 244 moveto
+145 348 lineto
+201 348 lineto
+201 244 lineto
+closepath stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+159 331.4 moveto 28 (CTA) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 145 322 moveto
+201 322 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+159 305.4 moveto 28 (5':[]) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 145 296 moveto
+201 296 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+153 279.4 moveto 40 (~5':[]) alignedtext
+1 setlinewidth
+0 0 0 nodecolor
+newpath 145 270 moveto
+201 270 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+161.5 253.4 moveto 23 (1.0) alignedtext
+grestore
+% ATA->CTA
+gsave
+1 setlinewidth
+0.66667 1 1 edgecolor
+newpath 56.1 262.2 moveto
+61.69 266.67 67.76 270.85 74 274 curveto
+92.83 283.5 115.72 288.93 134.75 292.01 curveto
+stroke
+0.66667 1 1 edgecolor
+newpath 134.47 295.51 moveto
+144.87 293.5 lineto
+135.48 288.58 lineto
+closepath fill
+1 setlinewidth
+solid
+0.66667 1 1 edgecolor
+newpath 134.47 295.51 moveto
+144.87 293.5 lineto
+135.48 288.58 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+75 294.4 moveto 46 (FR: [1]) alignedtext
+grestore
+% ATG
+gsave
+0 0 0.75294 nodecolor
+newpath 140.5 122 moveto
+140.5 226 lineto
+205.5 226 lineto
+205.5 122 lineto
+closepath fill
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 140.5 122 moveto
+140.5 226 lineto
+205.5 226 lineto
+205.5 122 lineto
+closepath stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+158 209.4 moveto 30 (ATG) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 140.5 200 moveto
+205.5 200 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+159 183.4 moveto 28 (5':[]) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 140.5 174 moveto
+205.5 174 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+148.5 157.4 moveto 49 (~5':[2]) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 140.5 148 moveto
+205.5 148 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+161.5 131.4 moveto 23 (1.0) alignedtext
+grestore
+% ATA->ATG
+gsave
+1 setlinewidth
+0.33333 1 1 edgecolor
+newpath 56.3 222.29 moveto
+77.62 213.47 107.18 201.23 131.22 191.29 curveto
+stroke
+0.33333 1 1 edgecolor
+newpath 132.57 194.52 moveto
+140.47 187.46 lineto
+129.89 188.05 lineto
+closepath fill
+1 setlinewidth
+solid
+0.33333 1 1 edgecolor
+newpath 132.57 194.52 moveto
+140.47 187.46 lineto
+129.89 188.05 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+75 219.4 moveto 46 (RF: [2]) alignedtext
+grestore
+% AAT
+gsave
+0 0 0.75294 nodecolor
+newpath 145 0 moveto
+145 104 lineto
+201 104 lineto
+201 0 lineto
+closepath fill
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 145 0 moveto
+145 104 lineto
+201 104 lineto
+201 0 lineto
+closepath stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+158.5 87.4 moveto 29 (AAT) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 145 78 moveto
+201 78 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+154.5 61.4 moveto 37 (5':[1]) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 145 52 moveto
+201 52 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+153 35.4 moveto 40 (~5':[]) alignedtext
+1 setlinewidth
+filled
+0 0 0 nodecolor
+newpath 145 26 moveto
+201 26 lineto
+stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+161.5 9.4 moveto 23 (1.0) alignedtext
+grestore
+% ATA->AAT
+gsave
+1 setlinewidth
+0 1 1 edgecolor
+newpath 49.89 181.89 moveto
+56.68 168.31 64.83 154.1 74 142 curveto
+91.95 118.3 116.71 95.8 136.97 79.22 curveto
+stroke
+0 1 1 edgecolor
+newpath 139.17 81.94 moveto
+144.77 72.95 lineto
+134.79 76.48 lineto
+closepath fill
+1 setlinewidth
+solid
+0 1 1 edgecolor
+newpath 139.17 81.94 moveto
+144.77 72.95 lineto
+134.79 76.48 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+74 147.4 moveto 48 (RR: [1]) alignedtext
+grestore
+% GTA->ATA
+gsave
+1 setlinewidth
+0.66667 1 1 edgecolor
+newpath 144.79 382.91 moveto
+137.54 374.28 129.63 365.17 122 357 curveto
+101.83 335.41 92.41 334.11 74 311 curveto
+69.76 305.68 65.63 299.94 61.7 294.06 curveto
+stroke
+0.66667 1 1 edgecolor
+newpath 64.46 291.89 moveto
+56.08 285.41 lineto
+58.59 295.7 lineto
+closepath fill
+1 setlinewidth
+solid
+0.66667 1 1 edgecolor
+newpath 64.46 291.89 moveto
+56.08 285.41 lineto
+58.59 295.7 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+75 362.4 moveto 46 (FR: [2]) alignedtext
+grestore
+% CTA->ATA
+gsave
+1 setlinewidth
+0.66667 1 1 edgecolor
+newpath 144.96 267.3 moveto
+137.95 261.46 130.11 255.92 122 252 curveto
+104.74 243.65 83.95 239.17 66.32 236.77 curveto
+stroke
+0.66667 1 1 edgecolor
+newpath 66.69 233.29 moveto
+56.35 235.58 lineto
+65.86 240.24 lineto
+closepath fill
+1 setlinewidth
+solid
+0.66667 1 1 edgecolor
+newpath 66.69 233.29 moveto
+56.35 235.58 lineto
+65.86 240.24 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+75 257.4 moveto 46 (FR: [1]) alignedtext
+grestore
+% ATG->ATA
+gsave
+1 setlinewidth
+0.33333 1 1 edgecolor
+newpath 140.11 166.4 moveto
+119.99 163.46 94.31 162.88 74 173 curveto
+70.36 174.81 66.94 177.06 63.73 179.62 curveto
+stroke
+0.33333 1 1 edgecolor
+newpath 61.19 177.21 moveto
+56.19 186.55 lineto
+65.92 182.36 lineto
+closepath fill
+1 setlinewidth
+solid
+0.33333 1 1 edgecolor
+newpath 61.19 177.21 moveto
+56.19 186.55 lineto
+65.92 182.36 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+75 178.4 moveto 46 (RF: [2]) alignedtext
+grestore
+% AAT->ATA
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 144.77 49.61 moveto
+122.81 49.34 92.99 52.71 74 70 curveto
+58.7 83.93 46.38 131.65 38.35 172.05 curveto
+stroke
+0 0 0 edgecolor
+newpath 34.92 171.38 moveto
+36.46 181.87 lineto
+41.79 172.71 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 34.92 171.38 moveto
+36.46 181.87 lineto
+41.79 172.71 lineto
+closepath stroke
+0 0 0 edgecolor
+14 /Times-Roman set_font
+75.5 75.4 moveto 45 (FF: [1]) alignedtext
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+%%Pages: 1
+%%BoundingBox: 36 36 250 514
+end
+restore
+%%EOF
diff --git a/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitTwice/test.txt b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitTwice/test.txt
new file mode 100644
index 0000000..1a39b81
--- /dev/null
+++ b/genomix/genomix-pregelix/data/TestSet/SplitRepeat/SplitTwice/test.txt
@@ -0,0 +1,5 @@
+ATA {[] [{GTA:[2]}, {CTA:[1]}] [{ATG:[2]}] [{AAT:[1]}] {5':[], ~5':[]} 2.0x}
+CTA {[] [{ATA:[1]}] [] [] {5':[], ~5':[]} 1.0x}
+GTA {[] [{ATA:[2]}] [] [] {5':[], ~5':[]} 1.0x}
+ATG {[] [] [{ATA:[2]}] [] {5':[], ~5':[(2-0_0)]} 1.0x}
+AAT {[{ATA:[1]}] [] [] [] {5':[(1-0_0)], ~5':[]} 1.0x}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/SplitRepeatMessageWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/SplitRepeatMessageWritable.java
new file mode 100644
index 0000000..890a6ed
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/SplitRepeatMessageWritable.java
@@ -0,0 +1,43 @@
+package edu.uci.ics.genomix.pregelix.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import edu.uci.ics.genomix.type.EdgeWritable;
+
+public class SplitRepeatMessageWritable extends MessageWritable {
+
+ private EdgeWritable createdEdge;
+
+ public SplitRepeatMessageWritable(){
+ super();
+ createdEdge = new EdgeWritable();
+ }
+
+ public void reset(){
+ super.reset();
+ createdEdge.reset();
+ }
+
+ public EdgeWritable getCreatedEdge() {
+ return createdEdge;
+ }
+
+ public void setCreatedEdge(EdgeWritable createdEdge) {
+ this.createdEdge = createdEdge;
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ reset();
+ super.readFields(in);
+ createdEdge.readFields(in);
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ super.write(out);
+ createdEdge.write(out);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/splitrepeat/SimpleSplitRepeatVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/splitrepeat/SimpleSplitRepeatVertex.java
deleted file mode 100644
index cd502cd..0000000
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/splitrepeat/SimpleSplitRepeatVertex.java
+++ /dev/null
@@ -1,313 +0,0 @@
-package edu.uci.ics.genomix.pregelix.operator.splitrepeat;
-
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Random;
-import java.util.Set;
-
-import edu.uci.ics.genomix.pregelix.io.MessageWritable;
-import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
-import edu.uci.ics.genomix.pregelix.operator.BasicGraphCleanVertex;
-import edu.uci.ics.genomix.pregelix.type.MessageFlag;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerListWritable;
-import edu.uci.ics.pregelix.api.graph.Vertex;
-import edu.uci.ics.pregelix.api.util.BspUtils;
-
-public class SimpleSplitRepeatVertex extends
- BasicGraphCleanVertex<MessageWritable>{
-
- public class EdgeDir{
- public static final byte DIR_FF = 0 << 0;
- public static final byte DIR_FR = 1 << 0;
- public static final byte DIR_RF = 2 << 0;
- public static final byte DIR_RR = 3 << 0;
- }
-
- public class DeletedEdge{
- private byte dir;
- private VKmerBytesWritable edge;
-
- public DeletedEdge(){
- dir = 0;
- edge = new VKmerBytesWritable(kmerSize);
- }
-
- public byte getDir() {
- return dir;
- }
-
- public void setDir(byte dir) {
- this.dir = dir;
- }
-
- public VKmerBytesWritable getEdge() {
- return edge;
- }
-
- public void setEdge(VKmerBytesWritable edge) {
- this.edge.setAsCopy(edge);
- }
- }
-
- private byte[][] connectedTable = new byte[][]{
- {EdgeDir.DIR_RF, EdgeDir.DIR_FF},
- {EdgeDir.DIR_RF, EdgeDir.DIR_FR},
- {EdgeDir.DIR_RR, EdgeDir.DIR_FF},
- {EdgeDir.DIR_RR, EdgeDir.DIR_FR}
- };
-
- public static Set<String> existKmerString = new HashSet<String>();
- protected VKmerBytesWritable createdVertexId = null;
- private Set<Long> incomingReadIdSet = new HashSet<Long>();
- private Set<Long> outgoingReadIdSet = new HashSet<Long>();
- private Set<Long> neighborEdgeIntersection = new HashSet<Long>();
- private VKmerListWritable incomingEdgeList = null;
- private VKmerListWritable outgoingEdgeList = null;
- private byte incomingEdgeDir = 0;
- private byte outgoingEdgeDir = 0;
-
- /**
- * initiate kmerSize, maxIteration
- */
- public void initVertex() {
- if (kmerSize == -1)
- kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
- if (maxIteration < 0)
- maxIteration = getContext().getConfiguration().getInt(ITERATIONS, 1000000);
- if(incomingMsg == null)
- incomingMsg = new MessageWritable();
- if(outgoingMsg == null)
- outgoingMsg = new MessageWritable();
- else
- outgoingMsg.reset();
- if(destVertexId == null)
- destVertexId = new VKmerBytesWritable();
- if(tmpKmer == null)
- tmpKmer = new VKmerBytesWritable();
- if(incomingEdgeList == null)
- incomingEdgeList = new VKmerListWritable();
- if(outgoingEdgeList == null)
- outgoingEdgeList = new VKmerListWritable();
- if(createdVertexId == null)
- createdVertexId = new VKmerBytesWritable();
- }
-
- /**
- * Generate random string from [ACGT]
- */
- public String generaterRandomString(int n){
- char[] chars = "ACGT".toCharArray();
- StringBuilder sb = new StringBuilder();
- Random random = new Random();
- while(true){
- for (int i = 0; i < n; i++) {
- char c = chars[random.nextInt(chars.length)];
- sb.append(c);
- }
- if(!existKmerString.contains(sb.toString()))
- break;
- }
- existKmerString.add(sb.toString());
- return sb.toString();
- }
-
- public void randomGenerateVertexId(int numOfSuffix){
- String newVertexId = getVertexId().toString() + generaterRandomString(numOfSuffix);;
- createdVertexId.setByRead(kmerSize + numOfSuffix, newVertexId.getBytes(), 0);
- }
-
- @SuppressWarnings({ "rawtypes", "unchecked" })
- public void createNewVertex(int i, VKmerBytesWritable incomingEdge, VKmerBytesWritable outgoingEdge){
- Vertex vertex = (Vertex) BspUtils.createVertex(getContext().getConfiguration());
- vertex.getMsgList().clear();
- vertex.getEdges().clear();
- VKmerBytesWritable vertexId = new VKmerBytesWritable(kmerSize);
- VertexValueWritable vertexValue = new VertexValueWritable(); //kmerSize
- //add the corresponding edge to new vertex
- switch(connectedTable[i][0]){
- case EdgeDir.DIR_RF:
- vertexValue.getRFList().add(incomingEdge);
- break;
- case EdgeDir.DIR_RR:
- vertexValue.getRRList().append(incomingEdge);
- break;
- }
- switch(connectedTable[i][1]){
- case EdgeDir.DIR_FF:
- vertexValue.getFFList().append(outgoingEdge);
- break;
- case EdgeDir.DIR_FR:
- vertexValue.getFRList().append(outgoingEdge);
- break;
- }
- vertexId.setAsCopy(createdVertexId);
- vertex.setVertexId(vertexId);
- vertex.setVertexValue(vertexValue);
-
- addVertex(vertexId, vertex);
- }
-
- public void sendMsgToUpdateEdge(VKmerBytesWritable incomingEdge, VKmerBytesWritable outgoingEdge){
- outgoingMsg.setCreatedVertexId(createdVertexId);
- outgoingMsg.setSourceVertexId(getVertexId());
-
- outgoingMsg.setFlag(incomingEdgeDir);
- destVertexId.setAsCopy(incomingEdge);
- sendMsg(destVertexId, outgoingMsg);
-
- outgoingMsg.setFlag(outgoingEdgeDir);
- destVertexId.setAsCopy(outgoingEdge);
- sendMsg(destVertexId, outgoingMsg);
- }
-
- public void storeDeletedEdge(Set<DeletedEdge> deletedEdges, int i, VKmerBytesWritable incomingEdge, VKmerBytesWritable outgoingEdge){
- DeletedEdge deletedIncomingEdge = new DeletedEdge();
- DeletedEdge deletedOutgoingEdge = new DeletedEdge();
- switch(connectedTable[i][0]){
- case EdgeDir.DIR_RF:
- deletedIncomingEdge.setDir(EdgeDir.DIR_RF);
- deletedIncomingEdge.setEdge(incomingEdge);
- break;
- case EdgeDir.DIR_RR:
- deletedIncomingEdge.setDir(EdgeDir.DIR_RR);
- deletedIncomingEdge.setEdge(incomingEdge);
- break;
- }
- switch(connectedTable[i][1]){
- case EdgeDir.DIR_FF:
- deletedOutgoingEdge.setDir(EdgeDir.DIR_FF);
- deletedOutgoingEdge.setEdge(outgoingEdge);
- break;
- case EdgeDir.DIR_FR:
- deletedOutgoingEdge.setDir(EdgeDir.DIR_FR);
- deletedOutgoingEdge.setEdge(outgoingEdge);
- break;
- }
- deletedEdges.add(deletedIncomingEdge);
- deletedEdges.add(deletedOutgoingEdge);
- }
- public void deleteEdgeFromOldVertex(DeletedEdge deleteEdge){
- switch(deleteEdge.dir){
- case EdgeDir.DIR_RF:
- getVertexValue().getRFList().remove(deleteEdge.getEdge());
- break;
- case EdgeDir.DIR_RR:
- getVertexValue().getRRList().remove(deleteEdge.getEdge());
- break;
- case EdgeDir.DIR_FF:
- getVertexValue().getFFList().remove(deleteEdge.getEdge());
- break;
- case EdgeDir.DIR_FR:
- getVertexValue().getFRList().remove(deleteEdge.getEdge());
- break;
- }
- }
-
- public void setEdgeListAndEdgeDir(int i){
- switch(connectedTable[i][0]){
- case EdgeDir.DIR_RF:
- incomingEdgeList.setCopy(getVertexValue().getRFList());
- incomingEdgeDir = MessageFlag.DIR_RF;
- break;
- case EdgeDir.DIR_RR:
- incomingEdgeList.setCopy(getVertexValue().getRRList());
- incomingEdgeDir = MessageFlag.DIR_RR;
- break;
- }
- switch(connectedTable[i][1]){
- case EdgeDir.DIR_FF:
- outgoingEdgeList.setCopy(getVertexValue().getFFList());
- outgoingEdgeDir = MessageFlag.DIR_FF;
- break;
- case EdgeDir.DIR_FR:
- outgoingEdgeList.setCopy(getVertexValue().getFRList());
- outgoingEdgeDir = MessageFlag.DIR_FR;
- break;
- }
- }
-
- public void setNeighborEdgeIntersection(VKmerBytesWritable incomingEdge, VKmerBytesWritable outgoingEdge){
-// incomingReadIdSet.clear();
-// outgoingReadIdSet.clear();
-// tmpKmer.setAsCopy(incomingEdge);
-// incomingReadIdSet.addAll(kmerMap.get(tmpKmer));
-// tmpKmer.setAsCopy(outgoingEdge);
-// outgoingReadIdSet.addAll(kmerMap.get(tmpKmer));
-//
-// //set all neighberEdge readId intersection
-// neighborEdgeIntersection.addAll(selfReadIdSet);
- neighborEdgeIntersection.retainAll(incomingReadIdSet);
- neighborEdgeIntersection.retainAll(outgoingReadIdSet);
- }
-
- public void updateEdgeListPointToNewVertex(){
- byte meToNeighborDir = incomingMsg.getFlag();
- byte neighborToMeDir = mirrorDirection(meToNeighborDir);
- switch(neighborToMeDir){
- case MessageFlag.DIR_FF:
- getVertexValue().getFFList().remove(incomingMsg.getSourceVertexId());
- getVertexValue().getFFList().append(incomingMsg.getCreatedVertexId());
- break;
- case MessageFlag.DIR_FR:
- getVertexValue().getFRList().remove(incomingMsg.getSourceVertexId());
- getVertexValue().getFRList().append(incomingMsg.getCreatedVertexId());
- break;
- case MessageFlag.DIR_RF:
- getVertexValue().getRFList().remove(incomingMsg.getSourceVertexId());
- getVertexValue().getRFList().append(incomingMsg.getCreatedVertexId());
- break;
- case MessageFlag.DIR_RR:
- getVertexValue().getRRList().remove(incomingMsg.getSourceVertexId());
- getVertexValue().getRRList().append(incomingMsg.getCreatedVertexId());
- break;
- }
- }
-
- @Override
- public void compute(Iterator<MessageWritable> msgIterator) {
- initVertex();
- if(getSuperstep() == 1){
- if(getVertexValue().getDegree() > 2){
- //A set storing deleted edges
- Set<DeletedEdge> deletedEdges = new HashSet<DeletedEdge>();
- /** process connectedTable **/
- for(int i = 0; i < 4; i++){
- /** set edgeList and edgeDir based on connectedTable **/
- setEdgeListAndEdgeDir(i);
-
- for(VKmerBytesWritable incomingEdge : incomingEdgeList){
- for(VKmerBytesWritable outgoingEdge : outgoingEdgeList){
- /** set neighborEdge readId intersection **/
- setNeighborEdgeIntersection(incomingEdge, outgoingEdge);
-
- if(!neighborEdgeIntersection.isEmpty()){
- /** random generate vertexId of new vertex **/
- randomGenerateVertexId(3);
-
- /** create new/created vertex **/
- createNewVertex(i, incomingEdge, outgoingEdge);
-
- /** send msg to neighbors to update their edges to new vertex **/
- sendMsgToUpdateEdge(incomingEdge, outgoingEdge);
-
- /** store deleted edge **/
- storeDeletedEdge(deletedEdges, i, incomingEdge, outgoingEdge);
- }
- }
- }
- }
- /** delete extra edges from old vertex **/
- for(DeletedEdge deletedEdge : deletedEdges){
- deleteEdgeFromOldVertex(deletedEdge);
- }
-
- /** Old vertex delete or voteToHalt **/
- if(getVertexValue().getDegree() == 0)//if no any edge, delete
- deleteVertex(getVertexId());
- else
- voteToHalt();
- }
- }
- }
-}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/splitrepeat/SplitRepeatVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/splitrepeat/SplitRepeatVertex.java
index c9f5a8d..d3dec3a 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/splitrepeat/SplitRepeatVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/splitrepeat/SplitRepeatVertex.java
@@ -1,43 +1,30 @@
package edu.uci.ics.genomix.pregelix.operator.splitrepeat;
-import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
-import java.util.Map;
import java.util.Random;
import java.util.Set;
-import edu.uci.ics.genomix.pregelix.client.Client;
-import edu.uci.ics.genomix.pregelix.format.GraphCleanOutputFormat;
-import edu.uci.ics.genomix.pregelix.format.InitialGraphCleanInputFormat;
-import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.SplitRepeatMessageWritable;
import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
import edu.uci.ics.genomix.pregelix.operator.BasicGraphCleanVertex;
import edu.uci.ics.genomix.pregelix.type.MessageFlag;
-import edu.uci.ics.genomix.type.PositionWritable;
-import edu.uci.ics.genomix.type.VKmerListWritable;
+import edu.uci.ics.genomix.type.EdgeListWritable;
+import edu.uci.ics.genomix.type.EdgeWritable;
import edu.uci.ics.genomix.type.VKmerBytesWritable;
import edu.uci.ics.pregelix.api.graph.Vertex;
-import edu.uci.ics.pregelix.api.job.PregelixJob;
import edu.uci.ics.pregelix.api.util.BspUtils;
public class SplitRepeatVertex extends
- BasicGraphCleanVertex{
-
- public class EdgeDir{
- public static final byte DIR_FF = 0 << 0;
- public static final byte DIR_FR = 1 << 0;
- public static final byte DIR_RF = 2 << 0;
- public static final byte DIR_RR = 3 << 0;
- }
+ BasicGraphCleanVertex<SplitRepeatMessageWritable>{
public class DeletedEdge{
private byte dir;
- private VKmerBytesWritable edge;
+ private EdgeWritable edge;
public DeletedEdge(){
dir = 0;
- edge = new VKmerBytesWritable(kmerSize);
+ edge = new EdgeWritable();
}
public byte getDir() {
@@ -48,35 +35,32 @@
this.dir = dir;
}
- public VKmerBytesWritable getEdge() {
+ public EdgeWritable getEdge() {
return edge;
}
- public void setEdge(VKmerBytesWritable edge) {
+ public void setEdge(EdgeWritable edge) {
this.edge.setAsCopy(edge);
}
}
private byte[][] connectedTable = new byte[][]{
- {EdgeDir.DIR_RF, EdgeDir.DIR_FF},
- {EdgeDir.DIR_RF, EdgeDir.DIR_FR},
- {EdgeDir.DIR_RR, EdgeDir.DIR_FF},
- {EdgeDir.DIR_RR, EdgeDir.DIR_FR}
+ {MessageFlag.DIR_RF, MessageFlag.DIR_FF},
+ {MessageFlag.DIR_RF, MessageFlag.DIR_FR},
+ {MessageFlag.DIR_RR, MessageFlag.DIR_FF},
+ {MessageFlag.DIR_RR, MessageFlag.DIR_FR}
};
+
public static Set<String> existKmerString = new HashSet<String>();
- private Set<Long> readIdSet;
+ protected VKmerBytesWritable createdVertexId = null;
private Set<Long> incomingReadIdSet = new HashSet<Long>();
private Set<Long> outgoingReadIdSet = new HashSet<Long>();
- private Set<Long> selfReadIdSet = new HashSet<Long>();
private Set<Long> neighborEdgeIntersection = new HashSet<Long>();
- private Map<VKmerBytesWritable, Set<Long>> kmerMap = new HashMap<VKmerBytesWritable, Set<Long>>();
- private VKmerListWritable incomingEdgeList = null;
- private VKmerListWritable outgoingEdgeList = null;
+ private EdgeListWritable incomingEdgeList = null;
+ private EdgeListWritable outgoingEdgeList = null;
private byte incomingEdgeDir = 0;
private byte outgoingEdgeDir = 0;
- protected VKmerBytesWritable createdVertexId = null;
-
/**
* initiate kmerSize, maxIteration
*/
@@ -86,21 +70,21 @@
if (maxIteration < 0)
maxIteration = getContext().getConfiguration().getInt(ITERATIONS, 1000000);
if(incomingMsg == null)
- incomingMsg = new MessageWritable(kmerSize);
+ incomingMsg = new SplitRepeatMessageWritable();
if(outgoingMsg == null)
- outgoingMsg = new MessageWritable(kmerSize);
+ outgoingMsg = new SplitRepeatMessageWritable();
else
- outgoingMsg.reset(kmerSize);
- if(incomingEdgeList == null)
- incomingEdgeList = new VKmerListWritable();
- if(outgoingEdgeList == null)
- outgoingEdgeList = new VKmerListWritable();
- if(createdVertexId == null)
- createdVertexId = new VKmerBytesWritable(kmerSize + 1);
+ outgoingMsg.reset();
if(destVertexId == null)
- destVertexId = new VKmerBytesWritable(kmerSize);
+ destVertexId = new VKmerBytesWritable();
if(tmpKmer == null)
tmpKmer = new VKmerBytesWritable();
+ if(incomingEdgeList == null)
+ incomingEdgeList = new EdgeListWritable();
+ if(outgoingEdgeList == null)
+ outgoingEdgeList = new EdgeListWritable();
+ if(createdVertexId == null)
+ createdVertexId = new VKmerBytesWritable();
}
/**
@@ -122,66 +106,23 @@
return sb.toString();
}
- /**
- * GenerateString only for test
- */
- public String generateString(){
- if(existKmerString.isEmpty()){
- existKmerString.add("AAA");
- return "AAA";
- }
- else
- return "GGG";
- }
-
public void randomGenerateVertexId(int numOfSuffix){
String newVertexId = getVertexId().toString() + generaterRandomString(numOfSuffix);;
createdVertexId.setByRead(kmerSize + numOfSuffix, newVertexId.getBytes(), 0);
}
- public void generateKmerMap(Iterator<MessageWritable> msgIterator){
- kmerMap.clear();
- while(msgIterator.hasNext()){
- incomingMsg = msgIterator.next();
- readIdSet = new HashSet<Long>();
- for(PositionWritable nodeId : incomingMsg.getNodeIdList()){
- readIdSet.add(nodeId.getReadId());
- }
- kmerMap.put(incomingMsg.getSourceVertexId(), readIdSet);
- }
- }
-
- public void setSelfReadIdSet(){
- selfReadIdSet.clear();
-// for(PositionWritable nodeId : getVertexValue().getNodeIdList()){
-// selfReadIdSet.add(nodeId.getReadId());
-// }
- }
-
@SuppressWarnings({ "rawtypes", "unchecked" })
- public void createNewVertex(int i, VKmerBytesWritable incomingEdge, VKmerBytesWritable outgoingEdge){
+ public void createNewVertex(int i, EdgeWritable incomingEdge, EdgeWritable outgoingEdge){
Vertex vertex = (Vertex) BspUtils.createVertex(getContext().getConfiguration());
vertex.getMsgList().clear();
vertex.getEdges().clear();
- VKmerBytesWritable vertexId = new VKmerBytesWritable(kmerSize);
- VertexValueWritable vertexValue = new VertexValueWritable(); //kmerSize
+ VKmerBytesWritable vertexId = new VKmerBytesWritable();
+ VertexValueWritable vertexValue = new VertexValueWritable();
//add the corresponding edge to new vertex
- switch(connectedTable[i][0]){
- case EdgeDir.DIR_RF:
- vertexValue.getRFList().append(incomingEdge);
- break;
- case EdgeDir.DIR_RR:
- vertexValue.getRRList().append(incomingEdge);
- break;
- }
- switch(connectedTable[i][1]){
- case EdgeDir.DIR_FF:
- vertexValue.getFFList().append(outgoingEdge);
- break;
- case EdgeDir.DIR_FR:
- vertexValue.getFRList().append(outgoingEdge);
- break;
- }
+ vertexValue.getEdgeList(connectedTable[i][0]).add(incomingEdge);
+
+ vertexValue.getEdgeList(connectedTable[i][1]).add(outgoingEdge);
+
vertexId.setAsCopy(createdVertexId);
vertex.setVertexId(vertexId);
vertex.setVertexValue(vertexValue);
@@ -189,189 +130,118 @@
addVertex(vertexId, vertex);
}
- public void sendMsgToUpdateEdge(VKmerBytesWritable incomingEdge, VKmerBytesWritable outgoingEdge){
- outgoingMsg.setCreatedVertexId(createdVertexId);
+ public void sendMsgToUpdateEdge(EdgeWritable incomingEdge, EdgeWritable outgoingEdge){
+ EdgeWritable createdEdge = new EdgeWritable();
+ createdEdge.setKey(createdVertexId);
+ for(Long readId: neighborEdgeIntersection)
+ createdEdge.appendReadID(readId);
+ outgoingMsg.setCreatedEdge(createdEdge);
outgoingMsg.setSourceVertexId(getVertexId());
outgoingMsg.setFlag(incomingEdgeDir);
- destVertexId.setAsCopy(incomingEdge);
+ destVertexId.setAsCopy(incomingEdge.getKey());
sendMsg(destVertexId, outgoingMsg);
outgoingMsg.setFlag(outgoingEdgeDir);
- destVertexId.setAsCopy(outgoingEdge);
+ destVertexId.setAsCopy(outgoingEdge.getKey());
sendMsg(destVertexId, outgoingMsg);
}
- public void storeDeletedEdge(Set<DeletedEdge> deletedEdges, int i, VKmerBytesWritable incomingEdge, VKmerBytesWritable outgoingEdge){
+ public void storeDeletedEdge(Set<DeletedEdge> deletedEdges, int i, EdgeWritable incomingEdge, EdgeWritable outgoingEdge){
DeletedEdge deletedIncomingEdge = new DeletedEdge();
DeletedEdge deletedOutgoingEdge = new DeletedEdge();
- switch(connectedTable[i][0]){
- case EdgeDir.DIR_RF:
- deletedIncomingEdge.setDir(EdgeDir.DIR_RF);
- deletedIncomingEdge.setEdge(incomingEdge);
- break;
- case EdgeDir.DIR_RR:
- deletedIncomingEdge.setDir(EdgeDir.DIR_RR);
- deletedIncomingEdge.setEdge(incomingEdge);
- break;
- }
- switch(connectedTable[i][1]){
- case EdgeDir.DIR_FF:
- deletedOutgoingEdge.setDir(EdgeDir.DIR_FF);
- deletedOutgoingEdge.setEdge(outgoingEdge);
- break;
- case EdgeDir.DIR_FR:
- deletedOutgoingEdge.setDir(EdgeDir.DIR_FR);
- deletedOutgoingEdge.setEdge(outgoingEdge);
- break;
- }
+
+ deletedIncomingEdge.setDir(connectedTable[i][0]);
+ deletedIncomingEdge.setEdge(incomingEdge);
+
+ deletedOutgoingEdge.setDir(connectedTable[i][1]);
+ deletedOutgoingEdge.setEdge(outgoingEdge);
+
deletedEdges.add(deletedIncomingEdge);
deletedEdges.add(deletedOutgoingEdge);
}
+
public void deleteEdgeFromOldVertex(DeletedEdge deleteEdge){
- switch(deleteEdge.dir){
- case EdgeDir.DIR_RF:
- getVertexValue().getRFList().remove(deleteEdge.getEdge());
- break;
- case EdgeDir.DIR_RR:
- getVertexValue().getRRList().remove(deleteEdge.getEdge());
- break;
- case EdgeDir.DIR_FF:
- getVertexValue().getFFList().remove(deleteEdge.getEdge());
- break;
- case EdgeDir.DIR_FR:
- getVertexValue().getFRList().remove(deleteEdge.getEdge());
- break;
- }
+ getVertexValue().getEdgeList(deleteEdge.dir).remove(deleteEdge.getEdge());
}
public void setEdgeListAndEdgeDir(int i){
- switch(connectedTable[i][0]){
- case EdgeDir.DIR_RF:
- incomingEdgeList.setCopy(getVertexValue().getRFList());
- incomingEdgeDir = MessageFlag.DIR_RF;
- break;
- case EdgeDir.DIR_RR:
- incomingEdgeList.setCopy(getVertexValue().getRRList());
- incomingEdgeDir = MessageFlag.DIR_RR;
- break;
- }
- switch(connectedTable[i][1]){
- case EdgeDir.DIR_FF:
- outgoingEdgeList.setCopy(getVertexValue().getFFList());
- outgoingEdgeDir = MessageFlag.DIR_FF;
- break;
- case EdgeDir.DIR_FR:
- outgoingEdgeList.setCopy(getVertexValue().getFRList());
- outgoingEdgeDir = MessageFlag.DIR_FR;
- break;
- }
+ incomingEdgeList.setAsCopy(getVertexValue().getEdgeList(connectedTable[i][0]));
+ incomingEdgeDir = connectedTable[i][0];
+
+ outgoingEdgeList.setAsCopy(getVertexValue().getEdgeList(connectedTable[i][1]));
+ outgoingEdgeDir = connectedTable[i][1];
}
- public void setNeighborEdgeIntersection(VKmerBytesWritable incomingEdge, VKmerBytesWritable outgoingEdge){
+ public void setNeighborEdgeIntersection(EdgeWritable incomingEdge, EdgeWritable outgoingEdge){
incomingReadIdSet.clear();
- outgoingReadIdSet.clear();
- tmpKmer.setAsCopy(incomingEdge);
- incomingReadIdSet.addAll(kmerMap.get(tmpKmer));
- tmpKmer.setAsCopy(outgoingEdge);
- outgoingReadIdSet.addAll(kmerMap.get(tmpKmer));
-
- //set all neighberEdge readId intersection
- neighborEdgeIntersection.addAll(selfReadIdSet);
- neighborEdgeIntersection.retainAll(incomingReadIdSet);
+ long[] incomingReadIds = incomingEdge.getReadIDs().toReadIDArray();
+ for(long readId : incomingReadIds){
+ incomingReadIdSet.add(readId);
+ }
+ outgoingReadIdSet.clear();
+ long[] outgoingReadIds = outgoingEdge.getReadIDs().toReadIDArray();
+ for(long readId : outgoingReadIds){
+ outgoingReadIdSet.add(readId);
+ }
+ neighborEdgeIntersection.clear();
+ neighborEdgeIntersection.addAll(incomingReadIdSet);
neighborEdgeIntersection.retainAll(outgoingReadIdSet);
}
public void updateEdgeListPointToNewVertex(){
byte meToNeighborDir = incomingMsg.getFlag();
byte neighborToMeDir = mirrorDirection(meToNeighborDir);
- switch(neighborToMeDir){
- case MessageFlag.DIR_FF:
- getVertexValue().getFFList().remove(incomingMsg.getSourceVertexId());
- getVertexValue().getFFList().append(incomingMsg.getCreatedVertexId());
- break;
- case MessageFlag.DIR_FR:
- getVertexValue().getFRList().remove(incomingMsg.getSourceVertexId());
- getVertexValue().getFRList().append(incomingMsg.getCreatedVertexId());
- break;
- case MessageFlag.DIR_RF:
- getVertexValue().getRFList().remove(incomingMsg.getSourceVertexId());
- getVertexValue().getRFList().append(incomingMsg.getCreatedVertexId());
- break;
- case MessageFlag.DIR_RR:
- getVertexValue().getRRList().remove(incomingMsg.getSourceVertexId());
- getVertexValue().getRRList().append(incomingMsg.getCreatedVertexId());
- break;
- }
+
+ getVertexValue().getEdgeList(neighborToMeDir).remove(incomingMsg.getSourceVertexId());
+ getVertexValue().getEdgeList(neighborToMeDir).add(incomingMsg.getCreatedEdge());
}
@Override
- public void compute(Iterator<MessageWritable> msgIterator) {
+ public void compute(Iterator<SplitRepeatMessageWritable> msgIterator) {
initVertex();
if(getSuperstep() == 1){
if(getVertexValue().getDegree() > 2){
- outgoingMsg.setSourceVertexId(getVertexId());
- sendMsgToAllNeighborNodes(getVertexValue());
- }
- voteToHalt();
- } else if(getSuperstep() == 2){
- while(msgIterator.hasNext()){
- incomingMsg = msgIterator.next();
-// outgoingMsg.setNodeIdList(getVertexValue().getNodeIdList());
- outgoingMsg.setSourceVertexId(getVertexId());
- sendMsg(incomingMsg.getSourceVertexId(), outgoingMsg);
- }
- voteToHalt();
- } else if(getSuperstep() == 3){
- /** generate KmerMap map kmer(key) to readIdSet(value) **/
- generateKmerMap(msgIterator);
-
- /** set self readId set **/
- setSelfReadIdSet();
-
- //A set storing deleted edges
- Set<DeletedEdge> deletedEdges = new HashSet<DeletedEdge>();
- /** process connectedTable **/
- for(int i = 0; i < 4; i++){
- /** set edgeList and edgeDir based on connectedTable **/
- setEdgeListAndEdgeDir(i);
-
- VKmerBytesWritable incomingEdge = new VKmerBytesWritable();
- VKmerBytesWritable outgoingEdge = new VKmerBytesWritable();
- for(int x = 0; x < incomingEdgeList.getCountOfPosition(); x++){
- for(int y = 0; y < outgoingEdgeList.getCountOfPosition(); y++){
- incomingEdge.setAsCopy(incomingEdgeList.getPosition(x));
- outgoingEdge.setAsCopy(outgoingEdgeList.getPosition(y));
- /** set neighborEdge readId intersection **/
- setNeighborEdgeIntersection(incomingEdge, outgoingEdge);
-
- if(!neighborEdgeIntersection.isEmpty()){
- /** random generate vertexId of new vertex **/
- randomGenerateVertexId(3);
+ //A set storing deleted edges
+ Set<DeletedEdge> deletedEdges = new HashSet<DeletedEdge>();
+ /** process connectedTable **/
+ for(int i = 0; i < 4; i++){
+ /** set edgeList and edgeDir based on connectedTable **/
+ setEdgeListAndEdgeDir(i);
+
+ for(EdgeWritable incomingEdge : incomingEdgeList){
+ for(EdgeWritable outgoingEdge : outgoingEdgeList){
+ /** set neighborEdge readId intersection **/
+ setNeighborEdgeIntersection(incomingEdge, outgoingEdge);
- /** create new/created vertex **/
- createNewVertex(i, incomingEdge, outgoingEdge);
-
- /** send msg to neighbors to update their edges to new vertex **/
- sendMsgToUpdateEdge(incomingEdge, outgoingEdge);
-
- /** store deleted edge **/
- storeDeletedEdge(deletedEdges, i, incomingEdge, outgoingEdge);
+ if(!neighborEdgeIntersection.isEmpty()){
+ /** random generate vertexId of new vertex **/
+ randomGenerateVertexId(3);
+
+ /** create new/created vertex **/
+ createNewVertex(i, incomingEdge, outgoingEdge);
+
+ /** send msg to neighbors to update their edges to new vertex **/
+ sendMsgToUpdateEdge(incomingEdge, outgoingEdge);
+
+ /** store deleted edge **/
+ storeDeletedEdge(deletedEdges, i, incomingEdge, outgoingEdge);
+ }
}
- }
- }
+ }
+ }
+ /** delete extra edges from old vertex **/
+ for(DeletedEdge deletedEdge : deletedEdges){
+ deleteEdgeFromOldVertex(deletedEdge);
+ }
+
+ /** Old vertex delete or voteToHalt **/
+ if(getVertexValue().getDegree() == 0)//if no any edge, delete
+ deleteVertex(getVertexId());
+ else
+ voteToHalt();
}
- /** delete extra edges from old vertex **/
- for(DeletedEdge deletedEdge : deletedEdges){
- deleteEdgeFromOldVertex(deletedEdge);
- }
-
- /** Old vertex delete or voteToHalt **/
- if(getVertexValue().getDegree() == 0)//if no any edge, delete
- deleteVertex(getVertexId());
- else
- voteToHalt();
- } else if(getSuperstep() == 4){
+ } else if(getSuperstep() == 2){
while(msgIterator.hasNext()){
incomingMsg = msgIterator.next();
/** update edgelist to new/created vertex **/
@@ -379,19 +249,5 @@
}
voteToHalt();
}
- }
-
- public static void main(String[] args) throws Exception {
- PregelixJob job = new PregelixJob(SplitRepeatVertex.class.getSimpleName());
- job.setVertexClass(SplitRepeatVertex.class);
- /**
- * BinaryInput and BinaryOutput
- */
- job.setVertexInputFormatClass(InitialGraphCleanInputFormat.class);
- job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
- job.setDynamicVertexValueSize(true);
- job.setOutputKeyClass(VKmerBytesWritable.class);
- job.setOutputValueClass(VertexValueWritable.class);
- Client.run(args, job);
- }
+ }
}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/SplitRepeatSmallTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/SplitRepeatSmallTestSuite.java
deleted file mode 100644
index ca8062f..0000000
--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/SplitRepeatSmallTestSuite.java
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright 2009-2010 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.pregelix.JobRun;
-
-import java.io.BufferedReader;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.logging.Logger;
-
-import junit.framework.Test;
-import junit.framework.TestResult;
-import junit.framework.TestSuite;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
-import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
-
-@SuppressWarnings("deprecation")
-public class SplitRepeatSmallTestSuite extends TestSuite {
- private static final Logger LOGGER = Logger.getLogger(SplitRepeatSmallTestSuite.class.getName());
- //P4ForMergeGraph/bin/read
- public static final String PreFix = "data/SplitRepeat";
- public static final String[] TestDir = { PreFix + File.separator
- + "SplitOnce", PreFix + File.separator
- + "SplitTwice"};
- private static final String ACTUAL_RESULT_DIR = "data/actual/splitrepeat";
- private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
- private static final String PATH_TO_CLUSTER_STORE = "src/test/resources/cluster/stores.properties";
- private static final String PATH_TO_CLUSTER_PROPERTIES = "src/test/resources/cluster/cluster.properties";
- private static final String PATH_TO_JOBS = "src/test/resources/jobs/";
- private static final String PATH_TO_ONLY = "src/test/resources/only_splitrepeat.txt";
-
- public static final String HDFS_INPUTPATH = "/PathTestSet";
-
- private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
- private MiniDFSCluster dfsCluster;
-
- private JobConf conf = new JobConf();
- private int numberOfNC = 1;
-
- public void setUp() throws Exception {
- ClusterConfig.setStorePath(PATH_TO_CLUSTER_STORE);
- ClusterConfig.setClusterPropertiesPath(PATH_TO_CLUSTER_PROPERTIES);
- cleanupStores();
- PregelixHyracksIntegrationUtil.init();
- LOGGER.info("Hyracks mini-cluster started");
- FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
- FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
- startHDFS();
- }
-
- private void startHDFS() throws IOException {
- conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
- conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
- conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- lfs.delete(new Path("build"), true);
- System.setProperty("hadoop.log.dir", "logs");
- dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
- FileSystem dfs = FileSystem.get(conf);
-
- for (String testDir : TestDir) {
- File src = new File(testDir);
- Path dest = new Path(HDFS_INPUTPATH + File.separator + src.getName());
- dfs.mkdirs(dest);
- //src.listFiles()
- //src.listFiles((FilenameFilter)(new WildcardFileFilter("part*")))
- for (File f : src.listFiles()) {
- dfs.copyFromLocalFile(new Path(f.getAbsolutePath()), dest);
- }
- }
-
- DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
- conf.writeXml(confOutput);
- confOutput.flush();
- confOutput.close();
- }
-
- private void cleanupStores() throws IOException {
- FileUtils.forceMkdir(new File("teststore"));
- FileUtils.forceMkdir(new File("build"));
- FileUtils.cleanDirectory(new File("teststore"));
- FileUtils.cleanDirectory(new File("build"));
- }
-
- /**
- * cleanup hdfs cluster
- */
- private void cleanupHDFS() throws Exception {
- dfsCluster.shutdown();
- }
-
- public void tearDown() throws Exception {
- PregelixHyracksIntegrationUtil.deinit();
- LOGGER.info("Hyracks mini-cluster shut down");
- cleanupHDFS();
- }
-
- public static Test suite() throws Exception {
- List<String> onlys = getFileList(PATH_TO_ONLY);
- File testData = new File(PATH_TO_JOBS);
- File[] queries = testData.listFiles();
- SplitRepeatSmallTestSuite testSuite = new SplitRepeatSmallTestSuite();
- testSuite.setUp();
- boolean onlyEnabled = false;
- FileSystem dfs = FileSystem.get(testSuite.conf);
-
- if (onlys.size() > 0) {
- onlyEnabled = true;
- }
-
- for (File qFile : queries) {
- if (qFile.isFile()) {
- if (onlyEnabled && !isInList(onlys, qFile.getName())) {
- continue;
- } else {
- for (String testPathStr : TestDir) {
- File testDir = new File(testPathStr);
- String resultFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
- + File.separator + "bin" + File.separator + testDir.getName();
- String textFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
- + File.separator + "txt" + File.separator + testDir.getName();
- String graphvizFileName = ACTUAL_RESULT_DIR + File.separator + jobExtToResExt(qFile.getName())
- + File.separator + "graphviz" + File.separator + testDir.getName();
- testSuite.addTest(new BasicSmallTestCase(HADOOP_CONF_PATH, qFile.getName(), qFile
- .getAbsolutePath().toString(), dfs,
- HDFS_INPUTPATH + File.separator + testDir.getName(), resultFileName, textFileName, graphvizFileName));
- }
- }
- }
- }
- return testSuite;
- }
-
- /**
- * Runs the tests and collects their result in a TestResult.
- */
- @Override
- public void run(TestResult result) {
- try {
- int testCount = countTestCases();
- for (int i = 0; i < testCount; i++) {
- // cleanupStores();
- Test each = this.testAt(i);
- if (result.shouldStop())
- break;
- runTest(each, result);
- }
- tearDown();
- } catch (Exception e) {
- throw new IllegalStateException(e);
- }
- }
-
- protected static List<String> getFileList(String ignorePath) throws FileNotFoundException, IOException {
- BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
- String s = null;
- List<String> ignores = new ArrayList<String>();
- while ((s = reader.readLine()) != null) {
- ignores.add(s);
- }
- reader.close();
- return ignores;
- }
-
- private static String jobExtToResExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot);
- }
-
- private static boolean isInList(List<String> onlys, String name) {
- for (String only : onlys)
- if (name.indexOf(only) >= 0)
- return true;
- return false;
- }
-
-}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/SplitRepeatTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/SplitRepeatTestSuite.java
new file mode 100644
index 0000000..e1df8b5
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/SplitRepeatTestSuite.java
@@ -0,0 +1,14 @@
+package edu.uci.ics.genomix.pregelix.JobRun;
+
+import junit.framework.Test;
+
+public class SplitRepeatTestSuite extends BasicGraphCleanTestSuite{
+
+ public static Test suite() throws Exception {
+ String pattern ="SplitRepeat";
+ String testSet[] = {"SplitOnce", "SplitTwice"};
+ init(pattern, testSet);
+ BasicGraphCleanTestSuite testSuite = new BasicGraphCleanTestSuite();
+ return makeTestSuite(testSuite);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/test/resources/only_SplitRepeat.txt b/genomix/genomix-pregelix/src/test/resources/only_SplitRepeat.txt
new file mode 100644
index 0000000..824d7e1
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/only_SplitRepeat.txt
@@ -0,0 +1 @@
+SplitRepeatGraph.xml