adding fuzzyjoin code to git
diff --git a/asterix-fuzzyjoin/.gitignore b/asterix-fuzzyjoin/.gitignore
new file mode 100644
index 0000000..ea8c4bf
--- /dev/null
+++ b/asterix-fuzzyjoin/.gitignore
@@ -0,0 +1 @@
+/target
diff --git a/asterix-fuzzyjoin/data/dblp-small.expected/recordpairs-000/expected.txt b/asterix-fuzzyjoin/data/dblp-small.expected/recordpairs-000/expected.txt
new file mode 100644
index 0000000..96037e7
--- /dev/null
+++ b/asterix-fuzzyjoin/data/dblp-small.expected/recordpairs-000/expected.txt
@@ -0,0 +1,14 @@
+61:books/aw/stonebraker86/Stonebraker86:Design of Relational Systems (Introduction to Section 1).:Michael Stonebraker:2002-01-03 1-3 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86 db/books/collections/Stonebraker86/Stonebraker86.html ingres/P001.pdf;0.5;62:books/aw/stonebraker86/Stonebraker86a:Supporting Studies on Relational Systems (Introduction to Section 2).:Michael Stonebraker:2002-01-03 83-85 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86a db/books/collections/Stonebraker86/Stonebraker86a.html ingres/P083.pdf
+61:books/aw/stonebraker86/Stonebraker86:Design of Relational Systems (Introduction to Section 1).:Michael Stonebraker:2002-01-03 1-3 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86 db/books/collections/Stonebraker86/Stonebraker86.html ingres/P001.pdf;0.5;67:books/aw/stonebraker86/Stonebraker86f:Database Design (Introduction to Section 6).:Michael Stonebraker:2002-01-03 393-394 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86f db/books/collections/Stonebraker86/Stonebraker86f.html ingres/P393.pdf
+63:books/aw/stonebraker86/Stonebraker86b:Distributed Database Systems (Introduction to Section 3).:Michael Stonebraker:2002-01-03 183-186 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86b db/books/collections/Stonebraker86/Stonebraker86b.html ingres/P183.pdf;0.53846157;65:books/aw/stonebraker86/Stonebraker86d:User Interfaces for Database Systems (Introduction to Section 4).:Michael Stonebraker:2002-01-03 243-245 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86d db/books/collections/Stonebraker86/Stonebraker86d.html ingres/P243.pdf
+63:books/aw/stonebraker86/Stonebraker86b:Distributed Database Systems (Introduction to Section 3).:Michael Stonebraker:2002-01-03 183-186 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86b db/books/collections/Stonebraker86/Stonebraker86b.html ingres/P183.pdf;0.54545456;67:books/aw/stonebraker86/Stonebraker86f:Database Design (Introduction to Section 6).:Michael Stonebraker:2002-01-03 393-394 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86f db/books/collections/Stonebraker86/Stonebraker86f.html ingres/P393.pdf
+73:books/aw/AhoHU74:The Design and Analysis of Computer Algorithms.:Alfred V. Aho John E. Hopcroft Jeffrey D. Ullman:2002-01-03 Addison-Wesley 1974 0-201-00029-6;0.6111111;75:books/aw/AhoHU83:Data Structures and Algorithms.:Alfred V. Aho John E. Hopcroft Jeffrey D. Ullman:2002-01-03 Addison-Wesley 1983 0-201-00023-7
+81:journals/siamcomp/AspnesW96:Randomized Consensus in Expected O(n log² n) Operations Per Processor.:James Aspnes Orli Waarts:2002-01-03 1024-1044 1996 25 SIAM J. Comput. 5 db/journals/siamcomp/siamcomp25.html#AspnesW96;0.8235294;82:conf/focs/AspnesW92:Randomized Consensus in Expected O(n log ^2 n) Operations Per Processor:James Aspnes Orli Waarts:2006-04-25 137-146 conf/focs/FOCS33 1992 FOCS db/conf/focs/focs92.html#AspnesW92
+83:journals/siamcomp/Bloniarz83:A Shortest-Path Algorithm with Expected Time O(n² log n log* n).:Peter A. Bloniarz:2002-01-03 588-600 1983 12 SIAM J. Comput. 3 db/journals/siamcomp/siamcomp12.html#Bloniarz83;0.8333333;84:conf/stoc/Bloniarz80:A Shortest-Path Algorithm with Expected Time O(n^2 log n log ^* n):Peter A. Bloniarz:2006-04-25 378-384 conf/stoc/STOC12 1980 STOC db/conf/stoc/stoc80.html#Bloniarz80
+85:journals/siamcomp/Megiddo83a:Linear-Time Algorithms for Linear Programming in R³ and Related Problems.:Nimrod Megiddo:2002-01-03 759-776 1983 12 SIAM J. Comput. 4 db/journals/siamcomp/siamcomp12.html#Megiddo83a;0.8;86:conf/focs/Megiddo82:Linear-Time Algorithms for Linear Programming in R^3 and Related Problems:Nimrod Megiddo:2006-04-25 329-338 conf/focs/FOCS23 1982 FOCS db/conf/focs/focs82.html#Megiddo82
+87:journals/siamcomp/MoffatT87:An All Pairs Shortest Path Algorithm with Expected Time O(n² log n).:Alistair Moffat Tadao Takaoka:2002-01-03 1023-1031 1987 16 SIAM J. Comput. 6 db/journals/siamcomp/siamcomp16.html#MoffatT87;0.8;88:conf/focs/MoffatT85:An All Pairs Shortest Path Algorithm with Expected Running Time O(n^2 log n):Alistair Moffat Tadao Takaoka:2006-04-25 101-105 conf/focs/FOCS26 1985 FOCS db/conf/focs/focs85.html#MoffatT85
+89:conf/icip/SchonfeldL98:VORTEX Video Retrieval and Tracking from Compressed Multimedia Databases.:Dan Schonfeld Dan Lelescu:2002-11-05 123-127 1998 ICIP (3) db/conf/icip/icip1998-3.html#SchonfeldL98;0.7647059;90:conf/hicss/SchonfeldL99:VORTEX Video Retrieval and Tracking from Compressed Multimedia Databases ¾ Visual Search Engine.:Dan Schonfeld Dan Lelescu:2002-01-03 1999 HICSS http //computer.org/proceedings/hicss/0001/00013/00013006abs.htm db/conf/hicss/hicss1999-3.html#SchonfeldL99
+91:journals/corr/abs-0802-2861:Geometric Set Cover and Hitting Sets for Polytopes in $R^3$:Sören Laue:2008-03-03 http //arxiv.org/abs/0802.2861 2008 CoRR abs/0802.2861 db/journals/corr/corr0802.html#abs-0802-2861 informal publication;0.78571427;92:conf/stacs/Laue08:Geometric Set Cover and Hitting Sets for Polytopes in R³.:Sören Laue:2008-03-04 2008 STACS 479-490 http //drops.dagstuhl.de/opus/volltexte/2008/1367 conf/stacs/2008 db/conf/stacs/stacs2008.html#Laue08
+93:journals/iandc/IbarraJCR91:Some Classes of Languages in NC¹:Oscar H. Ibarra Tao Jiang Jik H. Chang Bala Ravikumar:2006-04-25 86-106 Inf. Comput. January 1991 90 1 db/journals/iandc/iandc90.html#IbarraJCR91;0.7222222;94:conf/awoc/IbarraJRC88:On Some Languages in NC.:Oscar H. Ibarra Tao Jiang Bala Ravikumar Jik H. Chang:2002-08-06 64-73 1988 conf/awoc/1988 AWOC db/conf/awoc/awoc88.html#IbarraJRC88
+95:journals/jacm/GalilHLSW87:An O(n³log n) deterministic and an O(n³) Las Vegs isomorphism test for trivalent graphs.:Zvi Galil Christoph M. Hoffmann Eugene M. Luks Claus-Peter Schnorr Andreas Weber:2003-11-20 513-531 1987 34 J. ACM 3 http //doi.acm.org/10.1145/28869.28870 db/journals/jacm/jacm34.html#GalilHLSW87;0.71428573;96:conf/focs/GalilHLSW82:An O(n^3 log n) Deterministic and an O(n^3) Probabilistic Isomorphism Test for Trivalent Graphs:Zvi Galil Christoph M. Hoffmann Eugene M. Luks Claus-Peter Schnorr Andreas Weber:2006-04-25 118-125 conf/focs/FOCS23 1982 FOCS db/conf/focs/focs82.html#GalilHLSW82
+97:journals/jacm/GalilT88:An O(n²(m + n log n)log n) min-cost flow algorithm.:Zvi Galil Éva Tardos:2003-11-20 374-386 1988 35 J. ACM 2 http //doi.acm.org/10.1145/42282.214090 db/journals/jacm/jacm35.html#GalilT88;0.84210527;98:conf/focs/GalilT86:An O(n^2 (m + n log n) log n) Min-Cost Flow Algorithm:Zvi Galil Éva Tardos:2006-04-25 1-9 conf/focs/FOCS27 1986 FOCS db/conf/focs/focs86.html#GalilT86
diff --git a/asterix-fuzzyjoin/data/dblp-small.expected/ridpairs-000/expected.txt b/asterix-fuzzyjoin/data/dblp-small.expected/ridpairs-000/expected.txt
new file mode 100644
index 0000000..97161a8
--- /dev/null
+++ b/asterix-fuzzyjoin/data/dblp-small.expected/ridpairs-000/expected.txt
@@ -0,0 +1,14 @@
+62 61 0.5
+65 63 0.53846157
+67 61 0.5
+67 63 0.54545456
+75 73 0.6111111
+82 81 0.8235294
+84 83 0.8333333
+86 85 0.8
+88 87 0.8
+90 89 0.7647059
+92 91 0.78571427
+94 93 0.7222222
+96 95 0.71428573
+98 97 0.84210527
diff --git a/asterix-fuzzyjoin/data/dblp-small.expected/ssjoin.out-000/expected.txt b/asterix-fuzzyjoin/data/dblp-small.expected/ssjoin.out-000/expected.txt
new file mode 100644
index 0000000..ceafc70
--- /dev/null
+++ b/asterix-fuzzyjoin/data/dblp-small.expected/ssjoin.out-000/expected.txt
@@ -0,0 +1,2 @@
+65 63 0.500
+73 75 0.522
diff --git a/asterix-fuzzyjoin/data/dblp-small.expected/tokens-000/expected.txt b/asterix-fuzzyjoin/data/dblp-small.expected/tokens-000/expected.txt
new file mode 100644
index 0000000..d8e0af0
--- /dev/null
+++ b/asterix-fuzzyjoin/data/dblp-small.expected/tokens-000/expected.txt
@@ -0,0 +1,597 @@
+¾_1
+1_1
+2_1
+3_1
+3_2
+4_1
+5_1
+6_1
+80_1
+a_1
+a_2
+abiteboul_1
+abraham_1
+active_1
+adds_1
+ahad_1
+ahmed_1
+aho_1
+albert_1
+alfons_1
+alfred_1
+algorithm_1
+algorithms_1
+alistair_1
+all_1
+allen_1
+amelia_1
+amihai_1
+amit_1
+an_1
+an_2
+analysis_1
+and_1
+and_2
+anders_1
+andrea_1
+andreas_1
+andrew_1
+andrews_1
+angelika_1
+anne_1
+annevelink_1
+application_1
+applications_1
+approach_1
+approaches_1
+architecture_1
+aref_1
+arthur_1
+as_1
+aspnes_1
+authorization_1
+awk_1
+b_1
+bala_1
+ballou_1
+based_1
+beech_1
+benchmark_1
+bernstein_1
+better_1
+beyond_1
+bindings_1
+björnerstedt_1
+blakeley_1
+bloniarz_1
+breitbart_1
+bretl_1
+brian_1
+brom_1
+brozos_1
+bruce_1
+c_1
+c_2
+cad_1
+canonical_1
+capability_1
+carey_1
+carlson_1
+cat_1
+catalyst_1
+chang_1
+changing_1
+chien_1
+choi_1
+chou_1
+chow_1
+chris_1
+christer_1
+christodoulakis_1
+christoph_1
+clarence_1
+classes_1
+claus_1
+clement_1
+commercial_1
+common_1
+comparison_1
+compilers_1
+compressed_1
+computer_1
+concepts_1
+concurrency_1
+concurrent_1
+connors_1
+consensus_1
+contents_1
+control_1
+cooperative_1
+cost_1
+cover_1
+craig_1
+critical_1
+critique_1
+d_1
+dale_1
+dan_1
+dan_2
+daniel_1
+darrell_1
+data_1
+database_1
+databases_1
+david_1
+davis_1
+dayal_1
+dbms_1
+dbmss_1
+declarative_1
+decouchant_1
+dennis_1
+design_1
+deterministic_1
+developing_1
+dewitt_1
+diederich_1
+directions_1
+distributed_1
+dittrich_1
+dittrich_2
+do_1
+dominique_1
+donald_1
+du_1
+e_1
+ealities_1
+early_1
+eda_1
+eduardo_1
+edward_1
+eliot_1
+ellis_1
+engine_1
+engineers_1
+enough_1
+environments_1
+epilogue_1
+eric_1
+eugene_1
+éva_1
+execution_1
+exodus_1
+expected_1
+experiences_1
+extended_1
+extending_1
+f_1
+fast_1
+features_1
+fishman_1
+flow_1
+for_1
+for_2
+form_1
+formal_1
+foundations_1
+frame_1
+frank_1
+fred_1
+frederick_1
+from_1
+future_1
+g_1
+gail_1
+gala_1
+galil_1
+garcia_1
+garcía_1
+garza_1
+gemstone_1
+generation_1
+genome_1
+geometric_1
+geometry_1
+gibbs_1
+gilkey_1
+goodman_1
+graham_1
+graphics_1
+graphs_1
+guide_1
+guido_1
+h_1
+h_2
+hanan_1
+hanson_1
+hardware_1
+harold_1
+hasan_1
+hector_1
+henry_1
+heterogeneity_1
+heterogeneous_1
+heytens_1
+hitting_1
+hoch_1
+hoffmann_1
+hong_1
+hopcroft_1
+hsu_1
+hull_1
+hulten_1
+ibarra_1
+implementation_1
+in_1
+indexing_1
+inegration_1
+inequalities_1
+inference_1
+information_1
+ingram_1
+ingres_1
+injun_1
+integrated_1
+interfaces_1
+interoperability_1
+interoperating_1
+introduction_1
+iris_1
+is_1
+isomorphism_1
+issues_1
+its_1
+j_1
+j_2
+j_3
+jack_1
+jacob_1
+james_1
+jason_1
+java_1
+jeffrey_1
+jennifer_1
+jiang_1
+jik_1
+jim_1
+joel_1
+john_1
+jordan_1
+jorge_1
+josé_1
+jurgen_1
+k_1
+kaiser_1
+kelley_1
+kemper_1
+kent_1
+kernighan_1
+kevin_1
+kifer_1
+kim_1
+kim_2
+king_1
+klaus_1
+knuth_1
+kotz_1
+koveos_1
+kowalski_1
+krieger_1
+kyung_1
+l_1
+lamport_1
+landscape_1
+language_1
+languages_1
+las_1
+latex_1
+laue_1
+lawrence_1
+legacy_1
+leichner_1
+lelescu_1
+leonidas_1
+leslie_1
+lewis_1
+lieberman_1
+lindholm_1
+linear_1
+linear_2
+list_1
+lochovsky_1
+log_1
+log_2
+log²_1
+lorenzo_1
+luks_1
+lunt_1
+lyngbæk_1
+lynn_1
+m_1
+m_2
+machine_1
+mahbod_1
+maier_1
+majorization_1
+making_1
+management_1
+manager_1
+managing_1
+manifolds_1
+manual_1
+mapping_1
+marek_1
+marie_1
+mark_1
+marshall_1
+megiddo_1
+meichun_1
+mellender_1
+meng_1
+message_1
+messages_1
+michael_1
+miguel_1
+milton_1
+min_1
+ming_1
+model_1
+models_1
+modern_1
+moerkotte_1
+moffat_1
+molina_1
+monty_1
+moon_1
+moss_1
+motro_1
+multidatabase_1
+multimedia_1
+multiuser_1
+my_1
+n_1
+n_2
+n²_1
+n_3
+n³_1
+n³log_1
+n_4
+nat_1
+nathan_1
+nc_1
+nc¹_1
+neimat_1
+next_1
+nierstrasz_1
+nikcevic_1
+nimrod_1
+nong_1
+nonmonotonic_1
+o_1
+o_2
+object_1
+object_2
+objects_1
+of_1
+of_2
+office_1
+olkin_1
+omg_1
+omiecinski_1
+on_1
+operations_1
+optimizing_1
+oql_1
+orientation_1
+oriented_1
+orion_1
+orlando_1
+orli_1
+oscar_1
+otis_1
+overview_1
+oz_1
+özsu_1
+p_1
+pairs_1
+parallel_1
+part_1
+path_1
+pegasus_1
+penney_1
+per_1
+performance_1
+peter_1
+philip_1
+physical_1
+pogo_1
+polytopes_1
+posc_1
+possibilities_1
+practice_1
+preface_1
+princiles_1
+probabilistic_1
+problems_1
+processing_1
+processor_1
+program_1
+programming_1
+promises_1
+proposal_1
+proteus_1
+query_1
+r_1
+r³_1
+rafi_1
+rafiul_1
+ralph_1
+rámon_1
+randomized_1
+ravi_1
+ravikumar_1
+reality_1
+reference_1
+references_1
+related_1
+relational_1
+representation_1
+requirements_1
+research_1
+resolving_1
+retrieval_1
+reyes_1
+richard_1
+richardson_1
+riegel_1
+río_1
+risch_1
+robert_1
+roger_1
+rowe_1
+rules_1
+running_1
+rusinkiewicz_1
+russinoff_1
+s_1
+samet_1
+scheevel_1
+schema_1
+schematic_1
+schnorr_1
+schonfeld_1
+schuchardt_1
+search_1
+section_1
+sedgewick_1
+semantics_1
+serge_1
+set_1
+sethi_1
+sets_1
+shan_1
+shared_1
+sharing_1
+shekita_1
+sheth_1
+shortest_1
+should_1
+silberschatz_1
+simon_1
+skarra_1
+smalltalk_1
+snodgrass_1
+software_1
+soley_1
+solution_1
+some_1
+sören_1
+spatial_1
+specification_1
+specifying_1
+sql_1
+stana_1
+standard_1
+standards_1
+stanley_1
+stavros_1
+stein_1
+stephen_1
+steve_1
+steven_1
+stonebraker_1
+storage_1
+story_1
+stout_1
+straw_1
+structures_1
+studies_1
+sunit_1
+support_1
+supporting_1
+survey_1
+system_1
+systems_1
+t_1
+tadao_1
+tai_1
+takaoka_1
+tamer_1
+tao_1
+tardos_1
+tarlton_1
+tarlton_2
+techniques_1
+technology_1
+temporal_1
+teresa_1
+test_1
+tex_1
+the_1
+theory_1
+thompson_1
+tim_1
+time_1
+title_1
+tla_1
+to_1
+tom_1
+tomlinson_1
+tools_1
+tore_1
+tracking_1
+transaction_1
+transactional_1
+transactions_1
+treaty_1
+trivalent_1
+tsichritzis_1
+ullman_1
+umeshwar_1
+uncerainty_1
+ungar_1
+unisql_1
+user_1
+v_1
+vázquez_1
+vázquez_2
+vegs_1
+version_1
+vianu_1
+victor_1
+video_1
+view_1
+vincent_1
+virtual_1
+visual_1
+vortex_1
+w_1
+w_2
+waarts_1
+walid_1
+walker_1
+wand_1
+waqar_1
+war_1
+weber_1
+weimin_1
+weinberger_1
+weintraub_1
+weiser_1
+weiyi_1
+where_1
+widom_1
+wilkinson_1
+william_1
+williams_1
+williams_2
+with_1
+woelk_1
+won_1
+workflows_1
+yair_1
+yellin_1
+yu_1
+yuri_1
+zdonik_1
+zvi_1
diff --git a/asterix-fuzzyjoin/data/dblp-small.expected/tokens.dblp-000/expected.txt b/asterix-fuzzyjoin/data/dblp-small.expected/tokens.dblp-000/expected.txt
new file mode 100644
index 0000000..a472104
--- /dev/null
+++ b/asterix-fuzzyjoin/data/dblp-small.expected/tokens.dblp-000/expected.txt
@@ -0,0 +1,438 @@
+135:1:[135, 193, 234, 266, 274, 286, 287, 288, 290, 294, 295, 296]
+193:1:[135, 193, 234, 266, 274, 286, 287, 288, 290, 294, 295, 296]
+234:1:[135, 193, 234, 266, 274, 286, 287, 288, 290, 294, 295, 296]
+266:1:[135, 193, 234, 266, 274, 286, 287, 288, 290, 294, 295, 296]
+274:1:[135, 193, 234, 266, 274, 286, 287, 288, 290, 294, 295, 296]
+286:1:[135, 193, 234, 266, 274, 286, 287, 288, 290, 294, 295, 296]
+287:1:[135, 193, 234, 266, 274, 286, 287, 288, 290, 294, 295, 296]
+3:2:[3, 19, 24, 150, 253, 254, 269, 285, 296]
+19:2:[3, 19, 24, 150, 253, 254, 269, 285, 296]
+24:2:[3, 19, 24, 150, 253, 254, 269, 285, 296]
+150:2:[3, 19, 24, 150, 253, 254, 269, 285, 296]
+253:2:[3, 19, 24, 150, 253, 254, 269, 285, 296]
+256:3:[225, 256, 275, 289, 293]
+275:3:[225, 256, 275, 289, 293]
+225:3:[225, 256, 275, 289, 293]
+252:4:[27, 69, 238, 252, 289, 295]
+27:4:[27, 69, 238, 252, 289, 295]
+69:4:[27, 69, 238, 252, 289, 295]
+238:4:[27, 69, 238, 252, 289, 295]
+232:5:[232, 289, 292]
+289:5:[232, 289, 292]
+15:6:[15, 59, 71, 80, 129, 152, 168, 180, 212, 270, 287, 291, 296]
+59:6:[15, 59, 71, 80, 129, 152, 168, 180, 212, 270, 287, 291, 296]
+71:6:[15, 59, 71, 80, 129, 152, 168, 180, 212, 270, 287, 291, 296]
+80:6:[15, 59, 71, 80, 129, 152, 168, 180, 212, 270, 287, 291, 296]
+129:6:[15, 59, 71, 80, 129, 152, 168, 180, 212, 270, 287, 291, 296]
+152:6:[15, 59, 71, 80, 129, 152, 168, 180, 212, 270, 287, 291, 296]
+168:6:[15, 59, 71, 80, 129, 152, 168, 180, 212, 270, 287, 291, 296]
+286:7:[255, 286]
+255:7:[255, 286]
+120:8:[57, 119, 120, 131, 140, 205, 253, 285, 287, 291, 292, 293, 296]
+131:8:[57, 119, 120, 131, 140, 205, 253, 285, 287, 291, 292, 293, 296]
+140:8:[57, 119, 120, 131, 140, 205, 253, 285, 287, 291, 292, 293, 296]
+205:8:[57, 119, 120, 131, 140, 205, 253, 285, 287, 291, 292, 293, 296]
+253:8:[57, 119, 120, 131, 140, 205, 253, 285, 287, 291, 292, 293, 296]
+57:8:[57, 119, 120, 131, 140, 205, 253, 285, 287, 291, 292, 293, 296]
+119:8:[57, 119, 120, 131, 140, 205, 253, 285, 287, 291, 292, 293, 296]
+28:9:[28, 52, 76, 151, 290]
+52:9:[28, 52, 76, 151, 290]
+76:9:[28, 52, 76, 151, 290]
+39:10:[39, 103, 216, 249, 256, 281, 288, 294]
+103:10:[39, 103, 216, 249, 256, 281, 288, 294]
+216:10:[39, 103, 216, 249, 256, 281, 288, 294]
+249:10:[39, 103, 216, 249, 256, 281, 288, 294]
+256:10:[39, 103, 216, 249, 256, 281, 288, 294]
+275:11:[42, 275, 296]
+42:11:[42, 275, 296]
+191:12:[63, 109, 191, 237, 245, 278, 283, 292]
+237:12:[63, 109, 191, 237, 245, 278, 283, 292]
+245:12:[63, 109, 191, 237, 245, 278, 283, 292]
+63:12:[63, 109, 191, 237, 245, 278, 283, 292]
+109:12:[63, 109, 191, 237, 245, 278, 283, 292]
+20:13:[20, 45, 87, 289, 291, 292, 295, 296]
+45:13:[20, 45, 87, 289, 291, 292, 295, 296]
+87:13:[20, 45, 87, 289, 291, 292, 295, 296]
+289:13:[20, 45, 87, 289, 291, 292, 295, 296]
+291:13:[20, 45, 87, 289, 291, 292, 295, 296]
+17:14:[17, 50, 237, 245, 276, 278, 283, 286, 290]
+50:14:[17, 50, 237, 245, 276, 278, 283, 286, 290]
+237:14:[17, 50, 237, 245, 276, 278, 283, 286, 290]
+245:14:[17, 50, 237, 245, 276, 278, 283, 286, 290]
+276:14:[17, 50, 237, 245, 276, 278, 283, 286, 290]
+11:15:[11, 100, 133, 256, 270, 289, 293]
+100:15:[11, 100, 133, 256, 270, 289, 293]
+133:15:[11, 100, 133, 256, 270, 289, 293]
+256:15:[11, 100, 133, 256, 270, 289, 293]
+25:16:[25, 112, 144, 247, 287, 289, 290, 291, 292, 296]
+112:16:[25, 112, 144, 247, 287, 289, 290, 291, 292, 296]
+144:16:[25, 112, 144, 247, 287, 289, 290, 291, 292, 296]
+247:16:[25, 112, 144, 247, 287, 289, 290, 291, 292, 296]
+287:16:[25, 112, 144, 247, 287, 289, 290, 291, 292, 296]
+289:16:[25, 112, 144, 247, 287, 289, 290, 291, 292, 296]
+85:17:[85, 241, 270, 286, 289, 291, 293, 296]
+241:17:[85, 241, 270, 286, 289, 291, 293, 296]
+270:17:[85, 241, 270, 286, 289, 291, 293, 296]
+286:17:[85, 241, 270, 286, 289, 291, 293, 296]
+289:17:[85, 241, 270, 286, 289, 291, 293, 296]
+4:18:[4, 48, 134, 142, 173, 265, 283, 294]
+48:18:[4, 48, 134, 142, 173, 265, 283, 294]
+134:18:[4, 48, 134, 142, 173, 265, 283, 294]
+142:18:[4, 48, 134, 142, 173, 265, 283, 294]
+173:18:[4, 48, 134, 142, 173, 265, 283, 294]
+171:19:[171, 253, 283, 285, 292, 296]
+253:19:[171, 253, 283, 285, 292, 296]
+283:19:[171, 253, 283, 285, 292, 296]
+285:19:[171, 253, 283, 285, 292, 296]
+9:20:[9, 286, 291, 293, 296]
+286:20:[9, 286, 291, 293, 296]
+291:20:[9, 286, 291, 293, 296]
+254:21:[254, 256, 258, 289, 293]
+256:21:[254, 256, 258, 289, 293]
+258:21:[254, 256, 258, 289, 293]
+114:22:[114, 275, 288, 289, 292, 293]
+275:22:[114, 275, 288, 289, 292, 293]
+288:22:[114, 275, 288, 289, 292, 293]
+289:22:[114, 275, 288, 289, 292, 293]
+62:23:[62, 260, 289, 292]
+260:23:[62, 260, 289, 292]
+289:23:[62, 260, 289, 292]
+254:24:[254, 258, 289, 291, 292, 293, 296]
+258:24:[254, 258, 289, 291, 292, 293, 296]
+289:24:[254, 258, 289, 291, 292, 293, 296]
+291:24:[254, 258, 289, 291, 292, 293, 296]
+30:25:[30, 46, 185, 200, 288, 295]
+46:25:[30, 46, 185, 200, 288, 295]
+185:25:[30, 46, 185, 200, 288, 295]
+200:25:[30, 46, 185, 200, 288, 295]
+209:26:[209, 236, 265]
+236:26:[209, 236, 265]
+84:27:[84, 209, 254, 258, 265, 295]
+209:27:[84, 209, 254, 258, 265, 295]
+254:27:[84, 209, 254, 258, 265, 295]
+258:27:[84, 209, 254, 258, 265, 295]
+32:28:[32, 81, 238, 275, 281, 287]
+81:28:[32, 81, 238, 275, 281, 287]
+238:28:[32, 81, 238, 275, 281, 287]
+275:28:[32, 81, 238, 275, 281, 287]
+2:29:[2, 31, 143, 286, 287, 291, 296]
+31:29:[2, 31, 143, 286, 287, 291, 296]
+143:29:[2, 31, 143, 286, 287, 291, 296]
+286:29:[2, 31, 143, 286, 287, 291, 296]
+6:30:[6, 262, 294, 296]
+262:30:[6, 262, 294, 296]
+294:30:[6, 262, 294, 296]
+93:31:[93, 234]
+234:31:[93, 234]
+72:32:[72, 163, 184, 292, 294]
+163:32:[72, 163, 184, 292, 294]
+184:32:[72, 163, 184, 292, 294]
+67:33:[67, 240, 281, 288, 294]
+240:33:[67, 240, 281, 288, 294]
+281:33:[67, 240, 281, 288, 294]
+14:34:[14, 33, 98, 262, 289, 292, 294, 295, 296]
+33:34:[14, 33, 98, 262, 289, 292, 294, 295, 296]
+98:34:[14, 33, 98, 262, 289, 292, 294, 295, 296]
+262:34:[14, 33, 98, 262, 289, 292, 294, 295, 296]
+289:34:[14, 33, 98, 262, 289, 292, 294, 295, 296]
+12:35:[12, 55, 106, 213, 214, 288, 295]
+55:35:[12, 55, 106, 213, 214, 288, 295]
+106:35:[12, 55, 106, 213, 214, 288, 295]
+213:35:[12, 55, 106, 213, 214, 288, 295]
+174:36:[174, 197, 216, 285, 291, 293, 296]
+197:36:[174, 197, 216, 285, 291, 293, 296]
+216:36:[174, 197, 216, 285, 291, 293, 296]
+285:36:[174, 197, 216, 285, 291, 293, 296]
+107:37:[107, 265, 275, 281, 294]
+265:37:[107, 265, 275, 281, 294]
+275:37:[107, 265, 275, 281, 294]
+29:38:[29, 170, 275, 293]
+170:38:[29, 170, 275, 293]
+275:38:[29, 170, 275, 293]
+102:39:[102, 128, 188, 255, 281, 287, 290, 294, 296]
+128:39:[102, 128, 188, 255, 281, 287, 290, 294, 296]
+188:39:[102, 128, 188, 255, 281, 287, 290, 294, 296]
+255:39:[102, 128, 188, 255, 281, 287, 290, 294, 296]
+281:39:[102, 128, 188, 255, 281, 287, 290, 294, 296]
+43:40:[43, 126, 251, 274, 292, 293, 295]
+126:40:[43, 126, 251, 274, 292, 293, 295]
+251:40:[43, 126, 251, 274, 292, 293, 295]
+274:40:[43, 126, 251, 274, 292, 293, 295]
+47:41:[47, 56, 232, 251, 295]
+56:41:[47, 56, 232, 251, 295]
+232:41:[47, 56, 232, 251, 295]
+89:42:[89, 205, 240, 288, 294]
+205:42:[89, 205, 240, 288, 294]
+240:42:[89, 205, 240, 288, 294]
+68:43:[68, 97, 281, 288, 291, 292, 294, 296]
+97:43:[68, 97, 281, 288, 291, 292, 294, 296]
+281:43:[68, 97, 281, 288, 291, 292, 294, 296]
+288:43:[68, 97, 281, 288, 291, 292, 294, 296]
+291:43:[68, 97, 281, 288, 291, 292, 294, 296]
+1:44:[1, 198, 286, 290, 291, 296]
+198:44:[1, 198, 286, 290, 291, 296]
+286:44:[1, 198, 286, 290, 291, 296]
+290:44:[1, 198, 286, 290, 291, 296]
+66:45:[66, 99, 108, 291, 296]
+99:45:[66, 99, 108, 291, 296]
+108:45:[66, 99, 108, 291, 296]
+22:46:[22, 74, 115, 116, 157, 289, 290, 292]
+74:46:[22, 74, 115, 116, 157, 289, 290, 292]
+115:46:[22, 74, 115, 116, 157, 289, 290, 292]
+116:46:[22, 74, 115, 116, 157, 289, 290, 292]
+157:46:[22, 74, 115, 116, 157, 289, 290, 292]
+188:47:[44, 53, 188, 247]
+44:47:[44, 53, 188, 247]
+53:47:[44, 53, 188, 247]
+266:48:[94, 111, 156, 266, 268, 291, 294, 296]
+268:48:[94, 111, 156, 266, 268, 291, 294, 296]
+94:48:[94, 111, 156, 266, 268, 291, 294, 296]
+111:48:[94, 111, 156, 266, 268, 291, 294, 296]
+156:48:[94, 111, 156, 266, 268, 291, 294, 296]
+118:49:[118, 148, 159, 177, 266, 290, 292, 296]
+148:49:[118, 148, 159, 177, 266, 290, 292, 296]
+159:49:[118, 148, 159, 177, 266, 290, 292, 296]
+177:49:[118, 148, 159, 177, 266, 290, 292, 296]
+266:49:[118, 148, 159, 177, 266, 290, 292, 296]
+5:50:[5, 182, 287, 288, 291, 296]
+182:50:[5, 182, 287, 288, 291, 296]
+287:50:[5, 182, 287, 288, 291, 296]
+288:50:[5, 182, 287, 288, 291, 296]
+8:51:[8, 90, 289]
+90:51:[8, 90, 289]
+83:52:[83, 88, 95, 147, 212, 281, 287]
+88:52:[83, 88, 95, 147, 212, 281, 287]
+95:52:[83, 88, 95, 147, 212, 281, 287]
+147:52:[83, 88, 95, 147, 212, 281, 287]
+164:53:[164, 197, 286, 291, 295, 296]
+197:53:[164, 197, 286, 291, 295, 296]
+286:53:[164, 197, 286, 291, 295, 296]
+291:53:[164, 197, 286, 291, 295, 296]
+18:54:[18, 61, 86, 132, 146, 241, 287, 288, 294]
+61:54:[18, 61, 86, 132, 146, 241, 287, 288, 294]
+86:54:[18, 61, 86, 132, 146, 241, 287, 288, 294]
+132:54:[18, 61, 86, 132, 146, 241, 287, 288, 294]
+146:54:[18, 61, 86, 132, 146, 241, 287, 288, 294]
+77:55:[77, 104, 105, 186, 281, 287, 290]
+104:55:[77, 104, 105, 186, 281, 287, 290]
+105:55:[77, 104, 105, 186, 281, 287, 290]
+186:55:[77, 104, 105, 186, 281, 287, 290]
+162:56:[162, 248, 268, 291, 296]
+248:56:[162, 248, 268, 291, 296]
+268:56:[162, 248, 268, 291, 296]
+291:57:[7, 41, 291, 293, 296]
+7:57:[7, 41, 291, 293, 296]
+41:57:[7, 41, 291, 293, 296]
+251:58:[36, 101, 175, 251, 262, 287, 288, 290]
+262:58:[36, 101, 175, 251, 262, 287, 288, 290]
+36:58:[36, 101, 175, 251, 262, 287, 288, 290]
+101:58:[36, 101, 175, 251, 262, 287, 288, 290]
+175:58:[36, 101, 175, 251, 262, 287, 288, 290]
+16:59:[16, 281, 285, 291, 292, 296]
+281:59:[16, 281, 285, 291, 292, 296]
+285:59:[16, 281, 285, 291, 292, 296]
+291:59:[16, 281, 285, 291, 292, 296]
+21:60:[21, 82, 220, 294]
+82:60:[21, 82, 220, 294]
+220:60:[21, 82, 220, 294]
+191:61:[191, 260, 274, 277, 278, 283, 288, 289]
+260:61:[191, 260, 274, 277, 278, 283, 288, 289]
+274:61:[191, 260, 274, 277, 278, 283, 288, 289]
+277:61:[191, 260, 274, 277, 278, 283, 288, 289]
+278:61:[191, 260, 274, 277, 278, 283, 288, 289]
+123:62:[123, 137, 260, 270, 276, 277, 278, 283, 289]
+137:62:[123, 137, 260, 270, 276, 277, 278, 283, 289]
+260:62:[123, 137, 260, 270, 276, 277, 278, 283, 289]
+270:62:[123, 137, 260, 270, 276, 277, 278, 283, 289]
+276:62:[123, 137, 260, 270, 276, 277, 278, 283, 289]
+255:63:[255, 257, 277, 278, 283, 289, 292]
+257:63:[255, 257, 277, 278, 283, 289, 292]
+277:63:[255, 257, 277, 278, 283, 289, 292]
+278:63:[255, 257, 277, 278, 283, 289, 292]
+220:64:[193, 220, 255, 274, 288, 294, 295]
+255:64:[193, 220, 255, 274, 288, 294, 295]
+274:64:[193, 220, 255, 274, 288, 294, 295]
+193:64:[193, 220, 255, 274, 288, 294, 295]
+278:65:[35, 54, 217, 277, 278, 283, 289, 290, 292]
+35:65:[35, 54, 217, 277, 278, 283, 289, 290, 292]
+54:65:[35, 54, 217, 277, 278, 283, 289, 290, 292]
+217:65:[35, 54, 217, 277, 278, 283, 289, 290, 292]
+277:65:[35, 54, 217, 277, 278, 283, 289, 290, 292]
+110:66:[110, 149, 183, 260, 262, 277, 278, 283, 290, 294]
+149:66:[110, 149, 183, 260, 262, 277, 278, 283, 290, 294]
+183:66:[110, 149, 183, 260, 262, 277, 278, 283, 290, 294]
+260:66:[110, 149, 183, 260, 262, 277, 278, 283, 290, 294]
+262:66:[110, 149, 183, 260, 262, 277, 278, 283, 290, 294]
+277:66:[110, 149, 183, 260, 262, 277, 278, 283, 290, 294]
+78:67:[78, 274, 277, 278, 283, 292]
+274:67:[78, 274, 277, 278, 283, 292]
+277:67:[78, 274, 277, 278, 283, 292]
+278:67:[78, 274, 277, 278, 283, 292]
+125:68:[125, 158, 179]
+158:68:[125, 158, 179]
+91:69:[91]
+141:70:[124, 141, 294]
+124:70:[124, 141, 294]
+160:71:[160, 286, 288]
+286:71:[160, 286, 288]
+38:72:[38, 122, 153, 166, 169, 217]
+122:72:[38, 122, 153, 166, 169, 217]
+153:72:[38, 122, 153, 166, 169, 217]
+166:72:[38, 122, 153, 166, 169, 217]
+34:73:[34, 40, 267, 274, 288, 294, 295]
+40:73:[34, 40, 267, 274, 288, 294, 295]
+267:73:[34, 40, 267, 274, 288, 294, 295]
+274:73:[34, 40, 267, 274, 288, 294, 295]
+51:74:[51, 64, 121, 161, 167, 172, 207, 266, 289, 290, 294, 295]
+64:74:[51, 64, 121, 161, 167, 172, 207, 266, 289, 290, 294, 295]
+121:74:[51, 64, 121, 161, 167, 172, 207, 266, 289, 290, 294, 295]
+161:74:[51, 64, 121, 161, 167, 172, 207, 266, 289, 290, 294, 295]
+167:74:[51, 64, 121, 161, 167, 172, 207, 266, 289, 290, 294, 295]
+172:74:[51, 64, 121, 161, 167, 172, 207, 266, 289, 290, 294, 295]
+207:74:[51, 64, 121, 161, 167, 172, 207, 266, 289, 290, 294, 295]
+267:75:[236, 265, 267, 295]
+236:75:[236, 265, 267, 295]
+265:75:[236, 265, 267, 295]
+258:76:[127, 213, 225, 258, 285, 286, 291, 295]
+285:76:[127, 213, 225, 258, 285, 286, 291, 295]
+127:76:[127, 213, 225, 258, 285, 286, 291, 295]
+213:76:[127, 213, 225, 258, 285, 286, 291, 295]
+225:76:[127, 213, 225, 258, 285, 286, 291, 295]
+155:77:[155, 266, 268, 294]
+266:77:[155, 266, 268, 294]
+268:77:[155, 266, 268, 294]
+92:78:[92, 113, 145, 200, 294]
+113:78:[92, 113, 145, 200, 294]
+145:78:[92, 113, 145, 200, 294]
+198:79:[165, 181, 198, 207, 295]
+181:79:[165, 181, 198, 207, 295]
+165:79:[165, 181, 198, 207, 295]
+267:80:[267]
+222:81:[139, 192, 211, 218, 222, 233, 271, 280, 282, 284, 293]
+233:81:[139, 192, 211, 218, 222, 233, 271, 280, 282, 284, 293]
+139:81:[139, 192, 211, 218, 222, 233, 271, 280, 282, 284, 293]
+192:81:[139, 192, 211, 218, 222, 233, 271, 280, 282, 284, 293]
+211:81:[139, 192, 211, 218, 222, 233, 271, 280, 282, 284, 293]
+218:81:[139, 192, 211, 218, 222, 233, 271, 280, 282, 284, 293]
+192:82:[192, 211, 218, 222, 233, 271, 276, 279, 280, 282, 284, 293]
+211:82:[192, 211, 218, 222, 233, 271, 276, 279, 280, 282, 284, 293]
+218:82:[192, 211, 218, 222, 233, 271, 276, 279, 280, 282, 284, 293]
+222:82:[192, 211, 218, 222, 233, 271, 276, 279, 280, 282, 284, 293]
+233:82:[192, 211, 218, 222, 233, 271, 276, 279, 280, 282, 284, 293]
+271:82:[192, 211, 218, 222, 233, 271, 276, 279, 280, 282, 284, 293]
+276:82:[192, 211, 218, 222, 233, 271, 276, 279, 280, 282, 284, 293]
+250:83:[250, 259, 261, 263, 269, 271, 272, 273, 279, 280, 282, 284, 287]
+259:83:[250, 259, 261, 263, 269, 271, 272, 273, 279, 280, 282, 284, 287]
+261:83:[250, 259, 261, 263, 269, 271, 272, 273, 279, 280, 282, 284, 287]
+263:83:[250, 259, 261, 263, 269, 271, 272, 273, 279, 280, 282, 284, 287]
+269:83:[250, 259, 261, 263, 269, 271, 272, 273, 279, 280, 282, 284, 287]
+271:83:[250, 259, 261, 263, 269, 271, 272, 273, 279, 280, 282, 284, 287]
+272:83:[250, 259, 261, 263, 269, 271, 272, 273, 279, 280, 282, 284, 287]
+259:84:[259, 261, 263, 264, 269, 271, 272, 273, 276, 279, 280, 282, 284, 287]
+261:84:[259, 261, 263, 264, 269, 271, 272, 273, 276, 279, 280, 282, 284, 287]
+263:84:[259, 261, 263, 264, 269, 271, 272, 273, 276, 279, 280, 282, 284, 287]
+264:84:[259, 261, 263, 264, 269, 271, 272, 273, 276, 279, 280, 282, 284, 287]
+269:84:[259, 261, 263, 264, 269, 271, 272, 273, 276, 279, 280, 282, 284, 287]
+271:84:[259, 261, 263, 264, 269, 271, 272, 273, 276, 279, 280, 282, 284, 287]
+272:84:[259, 261, 263, 264, 269, 271, 272, 273, 276, 279, 280, 282, 284, 287]
+273:84:[259, 261, 263, 264, 269, 271, 272, 273, 276, 279, 280, 282, 284, 287]
+196:85:[195, 196, 201, 242, 244, 267, 268, 273, 290, 293, 295]
+201:85:[195, 196, 201, 242, 244, 267, 268, 273, 290, 293, 295]
+242:85:[195, 196, 201, 242, 244, 267, 268, 273, 290, 293, 295]
+244:85:[195, 196, 201, 242, 244, 267, 268, 273, 290, 293, 295]
+267:85:[195, 196, 201, 242, 244, 267, 268, 273, 290, 293, 295]
+195:85:[195, 196, 201, 242, 244, 267, 268, 273, 290, 293, 295]
+267:86:[195, 196, 215, 242, 244, 257, 267, 268, 273, 290, 293, 295]
+195:86:[195, 196, 215, 242, 244, 257, 267, 268, 273, 290, 293, 295]
+196:86:[195, 196, 215, 242, 244, 257, 267, 268, 273, 290, 293, 295]
+215:86:[195, 196, 215, 242, 244, 257, 267, 268, 273, 290, 293, 295]
+242:86:[195, 196, 215, 242, 244, 257, 267, 268, 273, 290, 293, 295]
+244:86:[195, 196, 215, 242, 244, 257, 267, 268, 273, 290, 293, 295]
+257:86:[195, 196, 215, 242, 244, 257, 267, 268, 273, 290, 293, 295]
+190:87:[190, 246, 250, 259, 261, 269, 271, 272, 273, 279, 282, 284, 285]
+246:87:[190, 246, 250, 259, 261, 269, 271, 272, 273, 279, 282, 284, 285]
+250:87:[190, 246, 250, 259, 261, 269, 271, 272, 273, 279, 282, 284, 285]
+259:87:[190, 246, 250, 259, 261, 269, 271, 272, 273, 279, 282, 284, 285]
+261:87:[190, 246, 250, 259, 261, 269, 271, 272, 273, 279, 282, 284, 285]
+269:87:[190, 246, 250, 259, 261, 269, 271, 272, 273, 279, 282, 284, 285]
+271:87:[190, 246, 250, 259, 261, 269, 271, 272, 273, 279, 282, 284, 285]
+0:88:[0, 190, 246, 259, 261, 269, 271, 272, 273, 276, 279, 280, 282, 284, 285]
+190:88:[0, 190, 246, 259, 261, 269, 271, 272, 273, 276, 279, 280, 282, 284, 285]
+246:88:[0, 190, 246, 259, 261, 269, 271, 272, 273, 276, 279, 280, 282, 284, 285]
+259:88:[0, 190, 246, 259, 261, 269, 271, 272, 273, 276, 279, 280, 282, 284, 285]
+261:88:[0, 190, 246, 259, 261, 269, 271, 272, 273, 276, 279, 280, 282, 284, 285]
+269:88:[0, 190, 246, 259, 261, 269, 271, 272, 273, 276, 279, 280, 282, 284, 285]
+271:88:[0, 190, 246, 259, 261, 269, 271, 272, 273, 276, 279, 280, 282, 284, 285]
+272:88:[0, 190, 246, 259, 261, 269, 271, 272, 273, 276, 279, 280, 282, 284, 285]
+223:89:[189, 204, 223, 228, 229, 239, 252, 286, 295]
+228:89:[189, 204, 223, 228, 229, 239, 252, 286, 295]
+229:89:[189, 204, 223, 228, 229, 239, 252, 286, 295]
+189:89:[189, 204, 223, 228, 229, 239, 252, 286, 295]
+204:89:[189, 204, 223, 228, 229, 239, 252, 286, 295]
+204:90:[10, 65, 96, 136, 189, 204, 223, 228, 229, 239, 252, 286, 295]
+223:90:[10, 65, 96, 136, 189, 204, 223, 228, 229, 239, 252, 286, 295]
+10:90:[10, 65, 96, 136, 189, 204, 223, 228, 229, 239, 252, 286, 295]
+65:90:[10, 65, 96, 136, 189, 204, 223, 228, 229, 239, 252, 286, 295]
+96:90:[10, 65, 96, 136, 189, 204, 223, 228, 229, 239, 252, 286, 295]
+136:90:[10, 65, 96, 136, 189, 204, 223, 228, 229, 239, 252, 286, 295]
+189:90:[10, 65, 96, 136, 189, 204, 223, 228, 229, 239, 252, 286, 295]
+199:91:[199, 208, 215, 221, 224, 231, 235, 257, 290, 293, 295]
+208:91:[199, 208, 215, 221, 224, 231, 235, 257, 290, 293, 295]
+215:91:[199, 208, 215, 221, 224, 231, 235, 257, 290, 293, 295]
+221:91:[199, 208, 215, 221, 224, 231, 235, 257, 290, 293, 295]
+224:91:[199, 208, 215, 221, 224, 231, 235, 257, 290, 293, 295]
+231:91:[199, 208, 215, 221, 224, 231, 235, 257, 290, 293, 295]
+199:92:[199, 201, 208, 221, 224, 231, 235, 290, 293, 295]
+201:92:[199, 201, 208, 221, 224, 231, 235, 290, 293, 295]
+208:92:[199, 201, 208, 221, 224, 231, 235, 290, 293, 295]
+221:92:[199, 201, 208, 221, 224, 231, 235, 290, 293, 295]
+224:92:[199, 201, 208, 221, 224, 231, 235, 290, 293, 295]
+231:92:[199, 201, 208, 221, 224, 231, 235, 290, 293, 295]
+226:93:[23, 154, 226, 248, 288, 293]
+23:93:[23, 154, 226, 248, 288, 293]
+154:93:[23, 154, 226, 248, 288, 293]
+248:93:[23, 154, 226, 248, 288, 293]
+178:94:[178, 226, 248, 270, 293]
+226:94:[178, 226, 248, 270, 293]
+248:94:[178, 226, 248, 270, 293]
+206:95:[26, 79, 130, 138, 187, 202, 203, 206, 210, 219, 243, 282, 284, 285, 290, 295]
+210:95:[26, 79, 130, 138, 187, 202, 203, 206, 210, 219, 243, 282, 284, 285, 290, 295]
+26:95:[26, 79, 130, 138, 187, 202, 203, 206, 210, 219, 243, 282, 284, 285, 290, 295]
+79:95:[26, 79, 130, 138, 187, 202, 203, 206, 210, 219, 243, 282, 284, 285, 290, 295]
+130:95:[26, 79, 130, 138, 187, 202, 203, 206, 210, 219, 243, 282, 284, 285, 290, 295]
+138:95:[26, 79, 130, 138, 187, 202, 203, 206, 210, 219, 243, 282, 284, 285, 290, 295]
+187:95:[26, 79, 130, 138, 187, 202, 203, 206, 210, 219, 243, 282, 284, 285, 290, 295]
+202:95:[26, 79, 130, 138, 187, 202, 203, 206, 210, 219, 243, 282, 284, 285, 290, 295]
+203:95:[26, 79, 130, 138, 187, 202, 203, 206, 210, 219, 243, 282, 284, 285, 290, 295]
+60:96:[60, 75, 187, 202, 203, 206, 210, 219, 243, 257, 264, 279, 280, 282, 284, 285, 290, 295]
+75:96:[60, 75, 187, 202, 203, 206, 210, 219, 243, 257, 264, 279, 280, 282, 284, 285, 290, 295]
+187:96:[60, 75, 187, 202, 203, 206, 210, 219, 243, 257, 264, 279, 280, 282, 284, 285, 290, 295]
+202:96:[60, 75, 187, 202, 203, 206, 210, 219, 243, 257, 264, 279, 280, 282, 284, 285, 290, 295]
+203:96:[60, 75, 187, 202, 203, 206, 210, 219, 243, 257, 264, 279, 280, 282, 284, 285, 290, 295]
+206:96:[60, 75, 187, 202, 203, 206, 210, 219, 243, 257, 264, 279, 280, 282, 284, 285, 290, 295]
+210:96:[60, 75, 187, 202, 203, 206, 210, 219, 243, 257, 264, 279, 280, 282, 284, 285, 290, 295]
+219:96:[60, 75, 187, 202, 203, 206, 210, 219, 243, 257, 264, 279, 280, 282, 284, 285, 290, 295]
+243:96:[60, 75, 187, 202, 203, 206, 210, 219, 243, 257, 264, 279, 280, 282, 284, 285, 290, 295]
+257:96:[60, 75, 187, 202, 203, 206, 210, 219, 243, 257, 264, 279, 280, 282, 284, 285, 290, 295]
+227:97:[194, 227, 230, 249, 250, 263, 264, 272, 279, 280, 282, 284, 285]
+230:97:[194, 227, 230, 249, 250, 263, 264, 272, 279, 280, 282, 284, 285]
+249:97:[194, 227, 230, 249, 250, 263, 264, 272, 279, 280, 282, 284, 285]
+250:97:[194, 227, 230, 249, 250, 263, 264, 272, 279, 280, 282, 284, 285]
+263:97:[194, 227, 230, 249, 250, 263, 264, 272, 279, 280, 282, 284, 285]
+264:97:[194, 227, 230, 249, 250, 263, 264, 272, 279, 280, 282, 284, 285]
+194:97:[194, 227, 230, 249, 250, 263, 264, 272, 279, 280, 282, 284, 285]
+272:98:[176, 194, 227, 230, 249, 263, 264, 272, 276, 279, 280, 282, 284, 285]
+176:98:[176, 194, 227, 230, 249, 263, 264, 272, 276, 279, 280, 282, 284, 285]
+194:98:[176, 194, 227, 230, 249, 263, 264, 272, 276, 279, 280, 282, 284, 285]
+227:98:[176, 194, 227, 230, 249, 263, 264, 272, 276, 279, 280, 282, 284, 285]
+230:98:[176, 194, 227, 230, 249, 263, 264, 272, 276, 279, 280, 282, 284, 285]
+249:98:[176, 194, 227, 230, 249, 263, 264, 272, 276, 279, 280, 282, 284, 285]
+263:98:[176, 194, 227, 230, 249, 263, 264, 272, 276, 279, 280, 282, 284, 285]
+264:98:[176, 194, 227, 230, 249, 263, 264, 272, 276, 279, 280, 282, 284, 285]
+49:99:[49, 58, 70, 73, 214, 295]
+58:99:[49, 58, 70, 73, 214, 295]
+70:99:[49, 58, 70, 73, 214, 295]
+73:99:[49, 58, 70, 73, 214, 295]
+13:100:[13, 37, 117, 288, 294]
+37:100:[13, 37, 117, 288, 294]
+117:100:[13, 37, 117, 288, 294]
diff --git a/asterix-fuzzyjoin/data/dblp-small/dblp-small-id.txt b/asterix-fuzzyjoin/data/dblp-small/dblp-small-id.txt
new file mode 100644
index 0000000..7ea80e7
--- /dev/null
+++ b/asterix-fuzzyjoin/data/dblp-small/dblp-small-id.txt
@@ -0,0 +1,100 @@
+1:books/acm/kim95/AnnevelinkACFHK95:Object SQL - A Language for the Design and Implementation of Object Databases.:Jurgen Annevelink Rafiul Ahad Amelia Carlson Daniel H. Fishman Michael L. Heytens William Kent:2002-01-03 42-68 1995 Modern Database Systems db/books/collections/kim95.html#AnnevelinkACFHK95
+2:books/acm/kim95/Blakeley95:OQL[C++] Extending C++ with an Object Query Capability.:José A. Blakeley:2002-01-03 69-88 Modern Database Systems db/books/collections/kim95.html#Blakeley95 1995
+3:books/acm/kim95/BreitbartGS95:Transaction Management in Multidatabase Systems.:Yuri Breitbart Hector Garcia-Molina Abraham Silberschatz:2004-03-08 573-591 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#BreitbartGS95 1995
+4:books/acm/kim95/ChristodoulakisK95:Multimedia Information Systems Issues and Approaches.:Stavros Christodoulakis Leonidas Koveos:2002-01-03 318-337 1995 Modern Database Systems db/books/collections/kim95.html#ChristodoulakisK95
+5:books/acm/kim95/DayalHW95:Active Database Systems.:Umeshwar Dayal Eric N. Hanson Jennifer Widom:2002-01-03 434-456 1995 Modern Database Systems db/books/collections/kim95.html#DayalHW95
+6:books/acm/kim95/DittrichD95:Where Object-Oriented DBMSs Should Do Better A Critique Based on Early Experiences.:Angelika Kotz Dittrich Klaus R. Dittrich:2002-01-03 238-254 1995 Modern Database Systems db/books/collections/kim95.html#DittrichD95
+7:books/acm/kim95/Garcia-MolinaH95:Distributed Databases.:Hector Garcia-Molina Meichun Hsu:2002-01-03 477-493 1995 Modern Database Systems db/books/collections/kim95.html#Garcia-MolinaH95
+8:books/acm/kim95/Goodman95:An Object-Oriented DBMS War Story Developing a Genome Mapping Database in C++.:Nathan Goodman:2002-01-03 216-237 1995 Modern Database Systems db/books/collections/kim95.html#Goodman95
+9:books/acm/kim95/Kaiser95:Cooperative Transactions for Multiuser Environments.:Gail E. Kaiser:2002-01-03 409-433 1995 Modern Database Systems db/books/collections/kim95.html#Kaiser95
+10:books/acm/kim95/KelleyGKRG95:Schema Architecture of the UniSQL/M Multidatabase System:William Kelley Sunit K. Gala Won Kim Tom C. Reyes Bruce Graham:2004-03-08 Modern Database Systems books/acm/Kim95 621-648 1995 db/books/collections/kim95.html#KelleyGKRG95
+11:books/acm/kim95/KemperM95:Physical Object Management.:Alfons Kemper Guido Moerkotte:2002-01-03 175-202 1995 Modern Database Systems db/books/collections/kim95.html#KemperM95
+12:books/acm/kim95/Kim95:Introduction to Part 1 Next-Generation Database Technology.:Won Kim:2002-01-03 5-17 1995 Modern Database Systems db/books/collections/kim95.html#Kim95
+13:books/acm/kim95/Kim95a:Object-Oriented Database Systems Promises, Reality, and Future.:Won Kim:2002-01-03 255-280 1995 Modern Database Systems db/books/collections/kim95.html#Kim95a
+14:books/acm/kim95/Kim95b:Introduction to Part 2 Technology for Interoperating Legacy Databases.:Won Kim:2002-01-03 515-520 1995 Modern Database Systems db/books/collections/kim95.html#Kim95b
+15:books/acm/kim95/KimCGS95:On Resolving Schematic Heterogeneity in Multidatabase Systems.:Won Kim Injun Choi Sunit K. Gala Mark Scheevel:2002-01-03 521-550 1995 Modern Database Systems db/books/collections/kim95.html#KimCGS95
+16:books/acm/kim95/KimG95:Requirements for a Performance Benchmark for Object-Oriented Database Systems.:Won Kim Jorge F. Garza:2002-01-03 203-215 1995 Modern Database Systems db/books/collections/kim95.html#KimG95
+17:books/acm/kim95/KimK95:On View Support in Object-Oriented Databases Systems.:Won Kim William Kelley:2002-01-03 108-129 1995 Modern Database Systems db/books/collections/kim95.html#KimK95
+18:books/acm/kim95/Kowalski95:The POSC Solution to Managing E&P Data.:Vincent J. Kowalski:2002-01-03 281-301 1995 Modern Database Systems db/books/collections/kim95.html#Kowalski95
+19:books/acm/kim95/KriegerA95:C++ Bindings to an Object Database.:David Krieger Tim Andrews:2002-01-03 89-107 1995 Modern Database Systems db/books/collections/kim95.html#KriegerA95
+20:books/acm/kim95/Lunt95:Authorization in Object-Oriented Databases.:Teresa F. Lunt:2002-01-03 130-145 1995 Modern Database Systems db/books/collections/kim95.html#Lunt95
+21:books/acm/kim95/MengY95:Query Processing in Multidatabase Systems.:Weiyi Meng Clement T. Yu:2002-01-03 551-572 1995 Modern Database Systems db/books/collections/kim95.html#MengY95
+22:books/acm/kim95/Motro95:Management of Uncerainty in database Systems.:Amihai Motro:2002-01-03 457-476 1995 Modern Database Systems db/books/collections/kim95.html#Motro95
+23:books/acm/kim95/Omiecinski95:Parallel Relational Database Systems.:Edward Omiecinski:2002-01-03 494-512 1995 Modern Database Systems db/books/collections/kim95.html#Omiecinski95
+24:books/acm/kim95/OzsuB95:Query Processing in Object-Oriented Database Systems.:M. Tamer Özsu José A. Blakeley:2002-01-03 146-174 1995 Modern Database Systems db/books/collections/kim95.html#OzsuB95
+25:books/acm/kim95/RusinkiewiczS95:Specification and Execution of Transactional Workflows.:Marek Rusinkiewicz Amit P. Sheth:2004-03-08 592-620 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#RusinkiewiczS95 1995
+26:books/acm/kim95/Samet95:Spatial Data Structures.:Hanan Samet:2004-03-08 361-385 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#Samet95 1995
+27:books/acm/kim95/SametA95:Spatial Data Models and Query Processing.:Hanan Samet Walid G. Aref:2002-01-03 338-360 1995 Modern Database Systems db/books/collections/kim95.html#SametA95
+28:books/acm/kim95/ShanADDK95:Pegasus A Heterogeneous Information Management System.:Ming-Chien Shan Rafi Ahmed Jim Davis Weimin Du William Kent:2004-03-08 664-682 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#ShanADDK95 1995
+29:books/acm/kim95/Snodgrass95:Temporal Object-Oriented Databases A Critical Comparison.:Richard T. Snodgrass:2002-01-03 386-408 1995 Modern Database Systems db/books/collections/kim95.html#Snodgrass95
+30:books/acm/kim95/SoleyK95:The OMG Object Model.:Richard Mark Soley William Kent:2002-01-03 18-41 1995 Modern Database Systems db/books/collections/kim95.html#SoleyK95
+31:books/acm/kim95/Stout95:EDA/SQL.:Ralph L. Stout:2004-03-08 649-663 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#Stout95 1995
+32:books/acm/kim95/Thompson95:The Changing Database Standards Landscape.:Craig W. Thompson:2002-01-03 302-317 1995 Modern Database Systems db/books/collections/kim95.html#Thompson95
+33:books/acm/kim95/BreitbartR95:Overview of the ADDS System.:Yuri Breitbart Tom C. Reyes:2009-06-12 683-701 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#BreitbartR95 1995
+34:books/acm/Kim95:Modern Database Systems The Object Model, Interoperability, and Beyond.::2004-03-08 Won Kim Modern Database Systems ACM Press and Addison-Wesley 1995 0-201-59098-0 db/books/collections/kim95.html
+35:books/ap/MarshallO79:Inequalities Theory of Majorization and Its Application.:Albert W. Marshall Ingram Olkin:2002-01-03 Academic Press 1979 0-12-473750-1
+36:books/aw/kimL89/BjornerstedtH89:Version Control in an Object-Oriented Architecture.:Anders Björnerstedt Christer Hulten:2006-02-24 451-485 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#BjornerstedtH89
+37:books/aw/kimL89/BretlMOPSSWW89:The GemStone Data Management System.:Robert Bretl David Maier Allen Otis D. Jason Penney Bruce Schuchardt Jacob Stein E. Harold Williams Monty Williams:2002-01-03 283-308 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#BretlMOPSSWW89
+38:books/aw/kimL89/CareyDRS89:Storage Management in EXODUS.:Michael J. Carey David J. DeWitt Joel E. Richardson Eugene J. Shekita:2002-01-03 341-369 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#CareyDRS89
+39:books/aw/kimL89/Decouchant89:A Distributed Object Manager for the Smalltalk-80 System.:Dominique Decouchant:2002-01-03 487-520 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Decouchant89
+40:books/aw/kimL89/DiederichM89:Objects, Messages, and Rules in Database Design.:Jim Diederich Jack Milton:2002-01-03 177-197 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#DiederichM89
+41:books/aw/kimL89/EllisG89:Active Objects Ealities and Possibilities.:Clarence A. Ellis Simon J. Gibbs:2002-01-03 561-572 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#EllisG89
+42:books/aw/kimL89/FishmanABCCDHHKLLMNRSW89:Overview of the Iris DBMS.:Daniel H. Fishman Jurgen Annevelink David Beech E. C. Chow Tim Connors J. W. Davis Waqar Hasan C. G. Hoch William Kent S. Leichner Peter Lyngbæk Brom Mahbod Marie-Anne Neimat Tore Risch Ming-Chien Shan W. Kevin Wilkinson:2002-01-03 219-250 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#FishmanABCCDHHKLLMNRSW89
+43:books/aw/kimL89/KimBCGW89:Features of the ORION Object-Oriented Database System.:Won Kim Nat Ballou Hong-Tai Chou Jorge F. Garza Darrell Woelk:2002-01-03 251-282 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#KimBCGW89
+44:books/aw/kimL89/KimKD89:Indexing Techniques for Object-Oriented Databases.:Won Kim Kyung-Chang Kim Alfred G. Dale:2002-01-03 371-394 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#KimKD89
+45:books/aw/kimL89/King89:My Cat Is Object-Oriented.:Roger King:2002-01-03 23-30 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#King89
+46:books/aw/kimL89/Maier89:Making Database Systems Fast Enough for CAD Applications.:David Maier:2002-01-03 573-582 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Maier89
+47:books/aw/kimL89/MellenderRS89:Optimizing Smalltalk Message Performance.:Fred Mellender Steve Riegel Andrew Straw:2002-01-03 423-450 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#MellenderRS89
+48:books/aw/kimL89/Moon89:The Common List Object-Oriented Programming Language Standard.:David A. Moon:2002-01-03 49-78 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Moon89
+49:books/aw/kimL89/Moss89:Object Orientation as Catalyst for Language-Database Inegration.:J. Eliot B. Moss:2002-01-03 583-592 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Moss89
+50:books/aw/kimL89/Nierstrasz89:A Survey of Object-Oriented Concepts.:Oscar Nierstrasz:2002-01-03 3-21 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Nierstrasz89
+51:books/aw/kimL89/NierstraszT89:Integrated Office Systems.:Oscar Nierstrasz Dennis Tsichritzis:2002-01-03 199-215 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#NierstraszT89
+52:books/aw/kimL89/Russinoff89:Proteus A Frame-Based Nonmonotonic Inference System.:David M. Russinoff:2002-01-03 127-150 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#Russinoff89
+53:books/aw/kimL89/SkarraZ89:Concurrency Control and Object-Oriented Databases.:Andrea H. Skarra Stanley B. Zdonik:2002-01-03 395-421 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#SkarraZ89
+54:books/aw/kimL89/SteinLU89:A Shared View of Sharing The Treaty of Orlando.:Lynn Andrea Stein Henry Lieberman David Ungar:2002-01-03 31-48 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#SteinLU89
+55:books/aw/kimL89/TarltonT89:Pogo A Declarative Representation System for Graphics.:Mark A. Tarlton P. Nong Tarlton:2002-01-03 151-176 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#TarltonT89
+56:books/aw/kimL89/TomlinsonS89:Concurrent Object-Oriented Programming Languages.:Chris Tomlinson Mark Scheevel:2002-01-03 79-124 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#TomlinsonS89
+57:books/aw/kimL89/TsichritzisN89:Directions in Object-Oriented Research.:Dennis Tsichritzis Oscar Nierstrasz:2002-01-03 523-536 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#TsichritzisN89
+58:books/aw/kimL89/Wand89:A Proposal for a Formal Model of Objects.:Yair Wand:2002-01-03 537-559 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Wand89
+59:books/aw/kimL89/WeiserL89:OZ+ An Object-Oriented Database System.:Stephen P. Weiser Frederick H. Lochovsky:2002-01-03 309-337 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#WeiserL89
+60:books/aw/stonebraker86/RoweS86:The Commercial INGRES Epilogue.:Lawrence A. Rowe Michael Stonebraker:2002-01-03 63-82 1986 The INGRES Papers db/books/collections/Stonebraker86.html#RoweS86 db/books/collections/Stonebraker86/RoweS86.html ingres/P063.pdf
+61:books/aw/stonebraker86/Stonebraker86:Design of Relational Systems (Introduction to Section 1).:Michael Stonebraker:2002-01-03 1-3 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86 db/books/collections/Stonebraker86/Stonebraker86.html ingres/P001.pdf
+62:books/aw/stonebraker86/Stonebraker86a:Supporting Studies on Relational Systems (Introduction to Section 2).:Michael Stonebraker:2002-01-03 83-85 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86a db/books/collections/Stonebraker86/Stonebraker86a.html ingres/P083.pdf
+63:books/aw/stonebraker86/Stonebraker86b:Distributed Database Systems (Introduction to Section 3).:Michael Stonebraker:2002-01-03 183-186 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86b db/books/collections/Stonebraker86/Stonebraker86b.html ingres/P183.pdf
+64:books/aw/stonebraker86/Stonebraker86c:The Design and Implementation of Distributed INGRES.:Michael Stonebraker:2002-01-03 187-196 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86c db/books/collections/Stonebraker86/Stonebraker86c.html ingres/P187.pdf
+65:books/aw/stonebraker86/Stonebraker86d:User Interfaces for Database Systems (Introduction to Section 4).:Michael Stonebraker:2002-01-03 243-245 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86d db/books/collections/Stonebraker86/Stonebraker86d.html ingres/P243.pdf
+66:books/aw/stonebraker86/Stonebraker86e:Extended Semantics for the Relational Model (Introduction to Section 5).:Michael Stonebraker:2002-01-03 313-316 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86e db/books/collections/Stonebraker86/Stonebraker86e.html ingres/P313.pdf
+67:books/aw/stonebraker86/Stonebraker86f:Database Design (Introduction to Section 6).:Michael Stonebraker:2002-01-03 393-394 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86f db/books/collections/Stonebraker86/Stonebraker86f.html ingres/P393.pdf
+68:books/aw/stonebraker86/X86:Title, Preface, Contents.::2002-01-03 1986 The INGRES Papers db/books/collections/Stonebraker86.html#X86 db/books/collections/Stonebraker86/X86.html ingres/frontmatter.pdf
+69:books/aw/stonebraker86/X86a:References.::2002-01-03 429-444 1986 The INGRES Papers db/books/collections/Stonebraker86.html#X86a db/books/collections/Stonebraker86/X86a.html ingres/P429.pdf
+70:books/aw/Knuth86a:TeX The Program:Donald E. Knuth:2002-01-03 Addison-Wesley 1986 0-201-13437-3
+71:conf/appt/LiDCMY07:Design and Implementation of a High-Speed Reconfigurable Modular Arithmetic Unit.:Wei Li Zibin Dai Tao Chen Tao Meng Xuan Yang:2007-11-09 50-59 2007 conf/appt/2007 APPT http //dx.doi.org/10.1007/978-3-540-76837-1_9 db/conf/appt/appt2007.html#LiDCMY07
+72:books/aw/Lamport86:LaTeX User's Guide & Reference Manual:Leslie Lamport:2002-01-03 Addison-Wesley 1986 0-201-15790-X
+73:books/aw/AhoHU74:The Design and Analysis of Computer Algorithms.:Alfred V. Aho John E. Hopcroft Jeffrey D. Ullman:2002-01-03 Addison-Wesley 1974 0-201-00029-6
+74:books/aw/Lamport2002:Specifying Systems, The TLA+ Language and Tools for Hardware and Software Engineers:Leslie Lamport:2005-07-28 Addison-Wesley 2002 0-3211-4306-X http //research.microsoft.com/users/lamport/tla/book.html
+75:books/aw/AhoHU83:Data Structures and Algorithms.:Alfred V. Aho John E. Hopcroft Jeffrey D. Ullman:2002-01-03 Addison-Wesley 1983 0-201-00023-7
+76:books/aw/LewisBK01:Databases and Transaction Processing An Application-Oriented Approach:Philip M. Lewis Arthur J. Bernstein Michael Kifer:2002-01-03 Addison-Wesley 2001 0-201-70872-8
+77:books/aw/AhoKW88:The AWK Programming Language:Alfred V. Aho Brian W. Kernighan Peter J. Weinberger:2002-01-03 Addison-Wesley 1988
+78:books/aw/LindholmY97:The Java Virtual Machine Specification:Tim Lindholm Frank Yellin:2002-01-28 Addison-Wesley 1997 0-201-63452-X
+79:books/aw/AhoSU86:Compilers Princiles, Techniques, and Tools.:Alfred V. Aho Ravi Sethi Jeffrey D. Ullman:2002-01-03 Addison-Wesley 1986 0-201-10088-6
+80:books/aw/Sedgewick83:Algorithms:Robert Sedgewick:2002-01-03 Addison-Wesley 1983 0-201-06672-6
+81:journals/siamcomp/AspnesW96:Randomized Consensus in Expected O(n log² n) Operations Per Processor.:James Aspnes Orli Waarts:2002-01-03 1024-1044 1996 25 SIAM J. Comput. 5 db/journals/siamcomp/siamcomp25.html#AspnesW96
+82:conf/focs/AspnesW92:Randomized Consensus in Expected O(n log ^2 n) Operations Per Processor:James Aspnes Orli Waarts:2006-04-25 137-146 conf/focs/FOCS33 1992 FOCS db/conf/focs/focs92.html#AspnesW92
+83:journals/siamcomp/Bloniarz83:A Shortest-Path Algorithm with Expected Time O(n² log n log* n).:Peter A. Bloniarz:2002-01-03 588-600 1983 12 SIAM J. Comput. 3 db/journals/siamcomp/siamcomp12.html#Bloniarz83
+84:conf/stoc/Bloniarz80:A Shortest-Path Algorithm with Expected Time O(n^2 log n log ^* n):Peter A. Bloniarz:2006-04-25 378-384 conf/stoc/STOC12 1980 STOC db/conf/stoc/stoc80.html#Bloniarz80
+85:journals/siamcomp/Megiddo83a:Linear-Time Algorithms for Linear Programming in R³ and Related Problems.:Nimrod Megiddo:2002-01-03 759-776 1983 12 SIAM J. Comput. 4 db/journals/siamcomp/siamcomp12.html#Megiddo83a
+86:conf/focs/Megiddo82:Linear-Time Algorithms for Linear Programming in R^3 and Related Problems:Nimrod Megiddo:2006-04-25 329-338 conf/focs/FOCS23 1982 FOCS db/conf/focs/focs82.html#Megiddo82
+87:journals/siamcomp/MoffatT87:An All Pairs Shortest Path Algorithm with Expected Time O(n² log n).:Alistair Moffat Tadao Takaoka:2002-01-03 1023-1031 1987 16 SIAM J. Comput. 6 db/journals/siamcomp/siamcomp16.html#MoffatT87
+88:conf/focs/MoffatT85:An All Pairs Shortest Path Algorithm with Expected Running Time O(n^2 log n):Alistair Moffat Tadao Takaoka:2006-04-25 101-105 conf/focs/FOCS26 1985 FOCS db/conf/focs/focs85.html#MoffatT85
+89:conf/icip/SchonfeldL98:VORTEX Video Retrieval and Tracking from Compressed Multimedia Databases.:Dan Schonfeld Dan Lelescu:2002-11-05 123-127 1998 ICIP (3) db/conf/icip/icip1998-3.html#SchonfeldL98
+90:conf/hicss/SchonfeldL99:VORTEX Video Retrieval and Tracking from Compressed Multimedia Databases ¾ Visual Search Engine.:Dan Schonfeld Dan Lelescu:2002-01-03 1999 HICSS http //computer.org/proceedings/hicss/0001/00013/00013006abs.htm db/conf/hicss/hicss1999-3.html#SchonfeldL99
+91:journals/corr/abs-0802-2861:Geometric Set Cover and Hitting Sets for Polytopes in $R^3$:Sören Laue:2008-03-03 http //arxiv.org/abs/0802.2861 2008 CoRR abs/0802.2861 db/journals/corr/corr0802.html#abs-0802-2861 informal publication
+92:conf/stacs/Laue08:Geometric Set Cover and Hitting Sets for Polytopes in R³.:Sören Laue:2008-03-04 2008 STACS 479-490 http //drops.dagstuhl.de/opus/volltexte/2008/1367 conf/stacs/2008 db/conf/stacs/stacs2008.html#Laue08
+93:journals/iandc/IbarraJCR91:Some Classes of Languages in NC¹:Oscar H. Ibarra Tao Jiang Jik H. Chang Bala Ravikumar:2006-04-25 86-106 Inf. Comput. January 1991 90 1 db/journals/iandc/iandc90.html#IbarraJCR91
+94:conf/awoc/IbarraJRC88:On Some Languages in NC.:Oscar H. Ibarra Tao Jiang Bala Ravikumar Jik H. Chang:2002-08-06 64-73 1988 conf/awoc/1988 AWOC db/conf/awoc/awoc88.html#IbarraJRC88
+95:journals/jacm/GalilHLSW87:An O(n³log n) deterministic and an O(n³) Las Vegs isomorphism test for trivalent graphs.:Zvi Galil Christoph M. Hoffmann Eugene M. Luks Claus-Peter Schnorr Andreas Weber:2003-11-20 513-531 1987 34 J. ACM 3 http //doi.acm.org/10.1145/28869.28870 db/journals/jacm/jacm34.html#GalilHLSW87
+96:conf/focs/GalilHLSW82:An O(n^3 log n) Deterministic and an O(n^3) Probabilistic Isomorphism Test for Trivalent Graphs:Zvi Galil Christoph M. Hoffmann Eugene M. Luks Claus-Peter Schnorr Andreas Weber:2006-04-25 118-125 conf/focs/FOCS23 1982 FOCS db/conf/focs/focs82.html#GalilHLSW82
+97:journals/jacm/GalilT88:An O(n²(m + n log n)log n) min-cost flow algorithm.:Zvi Galil Éva Tardos:2003-11-20 374-386 1988 35 J. ACM 2 http //doi.acm.org/10.1145/42282.214090 db/journals/jacm/jacm35.html#GalilT88
+98:conf/focs/GalilT86:An O(n^2 (m + n log n) log n) Min-Cost Flow Algorithm:Zvi Galil Éva Tardos:2006-04-25 1-9 conf/focs/FOCS27 1986 FOCS db/conf/focs/focs86.html#GalilT86
+99:series/synthesis/2009Weintraub:Jordan Canonical Form Theory and Practice:Steven H. Weintraub:2009-09-06 Jordan Canonical Form Theory and Practice http //dx.doi.org/10.2200/S00218ED1V01Y200908MAS006 http //dx.doi.org/10.2200/S00218ED1V01Y200908MAS006 2009 Synthesis Lectures on Mathematics & Statistics Morgan & Claypool Publishers
+100:series/synthesis/2009Brozos:The Geometry of Walker Manifolds:Miguel Brozos-Vázquez Eduardo García-Río Peter Gilkey Stana Nikcevic Rámon Vázquez-Lorenzo:2009-09-06 The Geometry of Walker Manifolds http //dx.doi.org/10.2200/S00197ED1V01Y200906MAS005 http //dx.doi.org/10.2200/S00197ED1V01Y200906MAS005 2009 Synthesis Lectures on Mathematics & Statistics Morgan & Claypool Publishers
diff --git a/asterix-fuzzyjoin/data/dblp-small/raw.dblp-000/part-00000 b/asterix-fuzzyjoin/data/dblp-small/raw.dblp-000/part-00000
new file mode 100644
index 0000000..be9c045
--- /dev/null
+++ b/asterix-fuzzyjoin/data/dblp-small/raw.dblp-000/part-00000
@@ -0,0 +1,100 @@
+books/acm/kim95/AnnevelinkACFHK95:Object SQL - A Language for the Design and Implementation of Object Databases.:Jurgen Annevelink Rafiul Ahad Amelia Carlson Daniel H. Fishman Michael L. Heytens William Kent:2002-01-03 42-68 1995 Modern Database Systems db/books/collections/kim95.html#AnnevelinkACFHK95
+books/acm/kim95/Blakeley95:OQL[C++] Extending C++ with an Object Query Capability.:José A. Blakeley:2002-01-03 69-88 Modern Database Systems db/books/collections/kim95.html#Blakeley95 1995
+books/acm/kim95/BreitbartGS95:Transaction Management in Multidatabase Systems.:Yuri Breitbart Hector Garcia-Molina Abraham Silberschatz:2004-03-08 573-591 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#BreitbartGS95 1995
+books/acm/kim95/ChristodoulakisK95:Multimedia Information Systems Issues and Approaches.:Stavros Christodoulakis Leonidas Koveos:2002-01-03 318-337 1995 Modern Database Systems db/books/collections/kim95.html#ChristodoulakisK95
+books/acm/kim95/DayalHW95:Active Database Systems.:Umeshwar Dayal Eric N. Hanson Jennifer Widom:2002-01-03 434-456 1995 Modern Database Systems db/books/collections/kim95.html#DayalHW95
+books/acm/kim95/DittrichD95:Where Object-Oriented DBMSs Should Do Better A Critique Based on Early Experiences.:Angelika Kotz Dittrich Klaus R. Dittrich:2002-01-03 238-254 1995 Modern Database Systems db/books/collections/kim95.html#DittrichD95
+books/acm/kim95/Garcia-MolinaH95:Distributed Databases.:Hector Garcia-Molina Meichun Hsu:2002-01-03 477-493 1995 Modern Database Systems db/books/collections/kim95.html#Garcia-MolinaH95
+books/acm/kim95/Goodman95:An Object-Oriented DBMS War Story Developing a Genome Mapping Database in C++.:Nathan Goodman:2002-01-03 216-237 1995 Modern Database Systems db/books/collections/kim95.html#Goodman95
+books/acm/kim95/Kaiser95:Cooperative Transactions for Multiuser Environments.:Gail E. Kaiser:2002-01-03 409-433 1995 Modern Database Systems db/books/collections/kim95.html#Kaiser95
+books/acm/kim95/KelleyGKRG95:Schema Architecture of the UniSQL/M Multidatabase System:William Kelley Sunit K. Gala Won Kim Tom C. Reyes Bruce Graham:2004-03-08 Modern Database Systems books/acm/Kim95 621-648 1995 db/books/collections/kim95.html#KelleyGKRG95
+books/acm/kim95/KemperM95:Physical Object Management.:Alfons Kemper Guido Moerkotte:2002-01-03 175-202 1995 Modern Database Systems db/books/collections/kim95.html#KemperM95
+books/acm/kim95/Kim95:Introduction to Part 1 Next-Generation Database Technology.:Won Kim:2002-01-03 5-17 1995 Modern Database Systems db/books/collections/kim95.html#Kim95
+books/acm/kim95/Kim95a:Object-Oriented Database Systems Promises, Reality, and Future.:Won Kim:2002-01-03 255-280 1995 Modern Database Systems db/books/collections/kim95.html#Kim95a
+books/acm/kim95/Kim95b:Introduction to Part 2 Technology for Interoperating Legacy Databases.:Won Kim:2002-01-03 515-520 1995 Modern Database Systems db/books/collections/kim95.html#Kim95b
+books/acm/kim95/KimCGS95:On Resolving Schematic Heterogeneity in Multidatabase Systems.:Won Kim Injun Choi Sunit K. Gala Mark Scheevel:2002-01-03 521-550 1995 Modern Database Systems db/books/collections/kim95.html#KimCGS95
+books/acm/kim95/KimG95:Requirements for a Performance Benchmark for Object-Oriented Database Systems.:Won Kim Jorge F. Garza:2002-01-03 203-215 1995 Modern Database Systems db/books/collections/kim95.html#KimG95
+books/acm/kim95/KimK95:On View Support in Object-Oriented Databases Systems.:Won Kim William Kelley:2002-01-03 108-129 1995 Modern Database Systems db/books/collections/kim95.html#KimK95
+books/acm/kim95/Kowalski95:The POSC Solution to Managing E&P Data.:Vincent J. Kowalski:2002-01-03 281-301 1995 Modern Database Systems db/books/collections/kim95.html#Kowalski95
+books/acm/kim95/KriegerA95:C++ Bindings to an Object Database.:David Krieger Tim Andrews:2002-01-03 89-107 1995 Modern Database Systems db/books/collections/kim95.html#KriegerA95
+books/acm/kim95/Lunt95:Authorization in Object-Oriented Databases.:Teresa F. Lunt:2002-01-03 130-145 1995 Modern Database Systems db/books/collections/kim95.html#Lunt95
+books/acm/kim95/MengY95:Query Processing in Multidatabase Systems.:Weiyi Meng Clement T. Yu:2002-01-03 551-572 1995 Modern Database Systems db/books/collections/kim95.html#MengY95
+books/acm/kim95/Motro95:Management of Uncerainty in database Systems.:Amihai Motro:2002-01-03 457-476 1995 Modern Database Systems db/books/collections/kim95.html#Motro95
+books/acm/kim95/Omiecinski95:Parallel Relational Database Systems.:Edward Omiecinski:2002-01-03 494-512 1995 Modern Database Systems db/books/collections/kim95.html#Omiecinski95
+books/acm/kim95/OzsuB95:Query Processing in Object-Oriented Database Systems.:M. Tamer Özsu José A. Blakeley:2002-01-03 146-174 1995 Modern Database Systems db/books/collections/kim95.html#OzsuB95
+books/acm/kim95/RusinkiewiczS95:Specification and Execution of Transactional Workflows.:Marek Rusinkiewicz Amit P. Sheth:2004-03-08 592-620 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#RusinkiewiczS95 1995
+books/acm/kim95/Samet95:Spatial Data Structures.:Hanan Samet:2004-03-08 361-385 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#Samet95 1995
+books/acm/kim95/SametA95:Spatial Data Models and Query Processing.:Hanan Samet Walid G. Aref:2002-01-03 338-360 1995 Modern Database Systems db/books/collections/kim95.html#SametA95
+books/acm/kim95/ShanADDK95:Pegasus A Heterogeneous Information Management System.:Ming-Chien Shan Rafi Ahmed Jim Davis Weimin Du William Kent:2004-03-08 664-682 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#ShanADDK95 1995
+books/acm/kim95/Snodgrass95:Temporal Object-Oriented Databases A Critical Comparison.:Richard T. Snodgrass:2002-01-03 386-408 1995 Modern Database Systems db/books/collections/kim95.html#Snodgrass95
+books/acm/kim95/SoleyK95:The OMG Object Model.:Richard Mark Soley William Kent:2002-01-03 18-41 1995 Modern Database Systems db/books/collections/kim95.html#SoleyK95
+books/acm/kim95/Stout95:EDA/SQL.:Ralph L. Stout:2004-03-08 649-663 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#Stout95 1995
+books/acm/kim95/Thompson95:The Changing Database Standards Landscape.:Craig W. Thompson:2002-01-03 302-317 1995 Modern Database Systems db/books/collections/kim95.html#Thompson95
+books/acm/kim95/BreitbartR95:Overview of the ADDS System.:Yuri Breitbart Tom C. Reyes:2009-06-12 683-701 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#BreitbartR95 1995
+books/acm/Kim95:Modern Database Systems The Object Model, Interoperability, and Beyond.::2004-03-08 Won Kim Modern Database Systems ACM Press and Addison-Wesley 1995 0-201-59098-0 db/books/collections/kim95.html
+books/ap/MarshallO79:Inequalities Theory of Majorization and Its Application.:Albert W. Marshall Ingram Olkin:2002-01-03 Academic Press 1979 0-12-473750-1
+books/aw/kimL89/BjornerstedtH89:Version Control in an Object-Oriented Architecture.:Anders Björnerstedt Christer Hulten:2006-02-24 451-485 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#BjornerstedtH89
+books/aw/kimL89/BretlMOPSSWW89:The GemStone Data Management System.:Robert Bretl David Maier Allen Otis D. Jason Penney Bruce Schuchardt Jacob Stein E. Harold Williams Monty Williams:2002-01-03 283-308 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#BretlMOPSSWW89
+books/aw/kimL89/CareyDRS89:Storage Management in EXODUS.:Michael J. Carey David J. DeWitt Joel E. Richardson Eugene J. Shekita:2002-01-03 341-369 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#CareyDRS89
+books/aw/kimL89/Decouchant89:A Distributed Object Manager for the Smalltalk-80 System.:Dominique Decouchant:2002-01-03 487-520 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Decouchant89
+books/aw/kimL89/DiederichM89:Objects, Messages, and Rules in Database Design.:Jim Diederich Jack Milton:2002-01-03 177-197 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#DiederichM89
+books/aw/kimL89/EllisG89:Active Objects Ealities and Possibilities.:Clarence A. Ellis Simon J. Gibbs:2002-01-03 561-572 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#EllisG89
+books/aw/kimL89/FishmanABCCDHHKLLMNRSW89:Overview of the Iris DBMS.:Daniel H. Fishman Jurgen Annevelink David Beech E. C. Chow Tim Connors J. W. Davis Waqar Hasan C. G. Hoch William Kent S. Leichner Peter Lyngbæk Brom Mahbod Marie-Anne Neimat Tore Risch Ming-Chien Shan W. Kevin Wilkinson:2002-01-03 219-250 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#FishmanABCCDHHKLLMNRSW89
+books/aw/kimL89/KimBCGW89:Features of the ORION Object-Oriented Database System.:Won Kim Nat Ballou Hong-Tai Chou Jorge F. Garza Darrell Woelk:2002-01-03 251-282 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#KimBCGW89
+books/aw/kimL89/KimKD89:Indexing Techniques for Object-Oriented Databases.:Won Kim Kyung-Chang Kim Alfred G. Dale:2002-01-03 371-394 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#KimKD89
+books/aw/kimL89/King89:My Cat Is Object-Oriented.:Roger King:2002-01-03 23-30 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#King89
+books/aw/kimL89/Maier89:Making Database Systems Fast Enough for CAD Applications.:David Maier:2002-01-03 573-582 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Maier89
+books/aw/kimL89/MellenderRS89:Optimizing Smalltalk Message Performance.:Fred Mellender Steve Riegel Andrew Straw:2002-01-03 423-450 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#MellenderRS89
+books/aw/kimL89/Moon89:The Common List Object-Oriented Programming Language Standard.:David A. Moon:2002-01-03 49-78 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Moon89
+books/aw/kimL89/Moss89:Object Orientation as Catalyst for Language-Database Inegration.:J. Eliot B. Moss:2002-01-03 583-592 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Moss89
+books/aw/kimL89/Nierstrasz89:A Survey of Object-Oriented Concepts.:Oscar Nierstrasz:2002-01-03 3-21 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Nierstrasz89
+books/aw/kimL89/NierstraszT89:Integrated Office Systems.:Oscar Nierstrasz Dennis Tsichritzis:2002-01-03 199-215 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#NierstraszT89
+books/aw/kimL89/Russinoff89:Proteus A Frame-Based Nonmonotonic Inference System.:David M. Russinoff:2002-01-03 127-150 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#Russinoff89
+books/aw/kimL89/SkarraZ89:Concurrency Control and Object-Oriented Databases.:Andrea H. Skarra Stanley B. Zdonik:2002-01-03 395-421 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#SkarraZ89
+books/aw/kimL89/SteinLU89:A Shared View of Sharing The Treaty of Orlando.:Lynn Andrea Stein Henry Lieberman David Ungar:2002-01-03 31-48 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#SteinLU89
+books/aw/kimL89/TarltonT89:Pogo A Declarative Representation System for Graphics.:Mark A. Tarlton P. Nong Tarlton:2002-01-03 151-176 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#TarltonT89
+books/aw/kimL89/TomlinsonS89:Concurrent Object-Oriented Programming Languages.:Chris Tomlinson Mark Scheevel:2002-01-03 79-124 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#TomlinsonS89
+books/aw/kimL89/TsichritzisN89:Directions in Object-Oriented Research.:Dennis Tsichritzis Oscar Nierstrasz:2002-01-03 523-536 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#TsichritzisN89
+books/aw/kimL89/Wand89:A Proposal for a Formal Model of Objects.:Yair Wand:2002-01-03 537-559 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Wand89
+books/aw/kimL89/WeiserL89:OZ+ An Object-Oriented Database System.:Stephen P. Weiser Frederick H. Lochovsky:2002-01-03 309-337 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#WeiserL89
+books/aw/stonebraker86/RoweS86:The Commercial INGRES Epilogue.:Lawrence A. Rowe Michael Stonebraker:2002-01-03 63-82 1986 The INGRES Papers db/books/collections/Stonebraker86.html#RoweS86 db/books/collections/Stonebraker86/RoweS86.html ingres/P063.pdf
+books/aw/stonebraker86/Stonebraker86:Design of Relational Systems (Introduction to Section 1).:Michael Stonebraker:2002-01-03 1-3 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86 db/books/collections/Stonebraker86/Stonebraker86.html ingres/P001.pdf
+books/aw/stonebraker86/Stonebraker86a:Supporting Studies on Relational Systems (Introduction to Section 2).:Michael Stonebraker:2002-01-03 83-85 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86a db/books/collections/Stonebraker86/Stonebraker86a.html ingres/P083.pdf
+books/aw/stonebraker86/Stonebraker86b:Distributed Database Systems (Introduction to Section 3).:Michael Stonebraker:2002-01-03 183-186 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86b db/books/collections/Stonebraker86/Stonebraker86b.html ingres/P183.pdf
+books/aw/stonebraker86/Stonebraker86c:The Design and Implementation of Distributed INGRES.:Michael Stonebraker:2002-01-03 187-196 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86c db/books/collections/Stonebraker86/Stonebraker86c.html ingres/P187.pdf
+books/aw/stonebraker86/Stonebraker86d:User Interfaces for Database Systems (Introduction to Section 4).:Michael Stonebraker:2002-01-03 243-245 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86d db/books/collections/Stonebraker86/Stonebraker86d.html ingres/P243.pdf
+books/aw/stonebraker86/Stonebraker86e:Extended Semantics for the Relational Model (Introduction to Section 5).:Michael Stonebraker:2002-01-03 313-316 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86e db/books/collections/Stonebraker86/Stonebraker86e.html ingres/P313.pdf
+books/aw/stonebraker86/Stonebraker86f:Database Design (Introduction to Section 6).:Michael Stonebraker:2002-01-03 393-394 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86f db/books/collections/Stonebraker86/Stonebraker86f.html ingres/P393.pdf
+books/aw/stonebraker86/X86:Title, Preface, Contents.::2002-01-03 1986 The INGRES Papers db/books/collections/Stonebraker86.html#X86 db/books/collections/Stonebraker86/X86.html ingres/frontmatter.pdf
+books/aw/stonebraker86/X86a:References.::2002-01-03 429-444 1986 The INGRES Papers db/books/collections/Stonebraker86.html#X86a db/books/collections/Stonebraker86/X86a.html ingres/P429.pdf
+books/aw/Knuth86a:TeX The Program:Donald E. Knuth:2002-01-03 Addison-Wesley 1986 0-201-13437-3
+books/aw/AbiteboulHV95:Foundations of Databases.:Serge Abiteboul Richard Hull Victor Vianu:2002-01-03 Addison-Wesley 1995 0-201-53771-0 AHV/Toc.pdf ... ... journals/tods/AstrahanBCEGGKLMMPTWW76 books/bc/AtzeniA93 journals/tcs/AtzeniABM82 journals/jcss/AbiteboulB86 journals/csur/AtkinsonB87 conf/pods/AtzeniB87 journals/vldb/AbiteboulB95 conf/sigmod/AbiteboulB91 conf/dood/AtkinsonBDDMZ89 conf/vldb/AlbanoBGO93 ... conf/icdt/Abiteboul88 journals/ipl/Abiteboul89 conf/ds/Abrial74 journals/tods/AhoBU79 books/mk/minker88/AptBW88 conf/vldb/AroraC78 conf/stoc/AfratiC89 journals/tods/AlbanoCO85 conf/pods/AfratiCY91 conf/pods/AusielloDM85 conf/vldb/AbiteboulG85 journals/jacm/AjtaiG87 conf/focs/AjtaiG89 journals/tods/AbiteboulG91 ... ... journals/tods/AbiteboulH87 conf/sigmod/AbiteboulH88 ... conf/sigmod/AbiteboulK89 journals/tcs/AbiteboulKG91 journals/jcss/AbiteboulKRW95 conf/sigmod/AbiteboulLUW93 conf/pods/AtzeniP82 conf/pods/AfratiP87 conf/pods/AptP87 conf/wg/AndriesP91 conf/pods/AfratiPPRSU86 books/el/leeuwen90/Apt90 conf/ifip/Armstrong74 journals/siamcomp/AhoSSU81 journals/tods/AhoSU79 journals/siamcomp/AhoSU79 conf/pods/AbiteboulSV90 journals/is/AtzeniT93 conf/popl/AhoU79 conf/pods/AbiteboulV87 conf/jcdkb/AbiteboulV88 journals/jacm/AbiteboulV88 conf/pods/AbiteboulV88 journals/jacm/AbiteboulV89 journals/jcss/AbiteboulV90 journals/jcss/AbiteboulV91 conf/stoc/AbiteboulV91 journals/amai/AbiteboulV91 journals/jcss/AbiteboulV95 journals/jacm/AptE82 conf/coco/AbiteboulVV92 conf/iclp/AptB88 conf/oopsla/BobrowKKMSZ86 journals/tse/BatoryBGSTTW88 conf/mfcs/Bancilhon78 ... conf/db-workshops/Bancilhon85 books/el/leeuwen90/Barendregt90 ... journals/tods/BeeriB79 books/el/leeuwen90/BerstelB90 conf/icdt/BeneventanoB92 conf/vldb/BernsteinBC80 conf/vldb/BeeriBG78 conf/sigmod/BorgidaBMR89 journals/tods/BunemanC79 journals/jacm/BernsteinC81 conf/dbpl/BancilhonCD89 books/bc/tanselCGSS93/BaudinetCW93 conf/sigmod/BiskupDB79 journals/jacm/BeeriDFS84 books/mk/BancilhonDK92 conf/edbt/BryDM88 conf/pods/BunemanDW88 journals/jcss/BunemanDW91 journals/tods/Beeri80 journals/dke/Beeri90 ... journals/tods/Bernstein76 conf/lics/BidoitF87 journals/iandc/BidoitF91 conf/sigmod/BeeriFH77 conf/stoc/BeeriFMMUY81 journals/jacm/BeeriFMY83 journals/tods/BunemanFN82 journals/siamcomp/BernsteinG81 journals/iandc/BlassGK85 conf/ijcai/BrachmanGL85 journals/tods/BernsteinGWRR81 books/aw/BernsteinHG87 ... journals/tcs/Bidoit91 journals/tcs/Biskup80 conf/adbt/Biskup79 journals/tods/Biskup83 journals/tcs/BunemanJO91 journals/tods/BeeriK86 conf/pods/BeeriKBR87 conf/icdt/BidoitL90 journals/csur/BatiniL86 conf/sigmod/BlakeleyLT86 conf/vldb/BeeriM91 conf/sigmod/BlakeleyMG93 journals/siamcomp/BeeriMSU81 conf/pods/BancilhonMSU86 conf/pods/BeeriNRST87 journals/software/Borgida85 conf/icalp/BraP83 conf/fgcs/BalbinMR88 ... conf/pods/BeeriR87 journals/jlp/BalbinR87 conf/sigmod/BancilhonR86 books/mk/minker88/BancilhonR88 journals/jlp/BeeriR91 conf/vldb/BancilhonRS82 conf/pods/BeeriRSS92 conf/dood/Bry89 journals/tods/BancilhonS81 journals/cogsci/BrachmanS85 journals/tods/BergamaschiS92 conf/sigmod/BernsteinST75 conf/dbpl/TannenBN91 conf/icdt/TannenBW92 ... journals/jacm/BeeriV84 conf/icalp/BeeriV81 conf/adbt/BeeriV79 journals/siamcomp/BeeriV84 journals/iandc/BeeriV84 journals/jacm/BeeriV84 journals/tcs/BeeriV85 journals/ibmrd/ChamberlinAEGLMRW76 ... journals/iandc/Cardelli88 books/mk/Cattell94 conf/sigmod/CacaceCCTZ90 conf/vldb/CastilhoCF82 conf/adbt/CasanovaF82 conf/focs/CaiFI89 journals/jcss/CasanovaFP84 conf/stoc/CosmadakisGKV88 conf/dood/CorciuloGP93 books/sp/CeriGT90 conf/focs/ChandraH80 journals/jcss/ChandraH80 journals/jcss/ChandraH82 journals/jlp/ChandraH85 conf/popl/Chandra81 conf/adbt/Chang79 conf/pods/Chandra88 ... journals/tods/Chen76 conf/ride/ChenHM94 conf/icde/Chomicki92 conf/pods/Chomicki92 ... ... ... conf/stoc/CosmadakisK85 journals/acr/CosmadakisK86 ... journals/jcss/CosmadakisKS86 journals/jacm/CosmadakisKV90 ... conf/pods/CalvaneseL94 conf/adbt/Clark77 conf/stoc/ChandraLM81 conf/stoc/ChandraM77 conf/pods/ConsensM90 conf/sigmod/ConsensM93 conf/icdt/ConsensM90 journals/cacm/Codd70 conf/sigmod/Codd71a persons/Codd71a persons/Codd72 conf/ifip/Codd74 ... conf/sigmod/Codd79 journals/cacm/Codd82 ... conf/sigmod/Cohen89 journals/cacm/Cohen90 ... journals/jcss/Cook74 conf/pods/Cosmadakis83 conf/focs/Cosmadakis87 books/el/leeuwen90/Courcelle90a journals/jacm/CosmadakisP84 conf/edbt/CeriCGLLTZ88 ... conf/vldb/CeriT87 conf/vldb/CasanovaTF88 ... conf/pods/CasanovaV83 journals/siamcomp/ChandraV85 conf/pods/ChaudhuriV92 conf/pods/ChaudhuriV93 conf/pods/ChaudhuriV94 journals/csur/CardelliW85 conf/pods/ChenW89 conf/pods/CohenW89 conf/vldb/CeriW90 conf/vldb/CeriW91 conf/iclp/ChenW92 conf/vldb/CeriW93 ... conf/birthday/Dahlhaus87 conf/vldb/Date81 books/aw/Date86 ... conf/dbpl/Dayal89 journals/tods/DayalB82 journals/ibmrd/DelobelC73 conf/icde/DelcambreD89 ... journals/tods/Delobel78 journals/jacm/Demolombe92 journals/tods/DateF92 ... conf/vldb/DayalHL91 journals/jacm/Paola69a conf/caap/DahlhausM86 journals/acr/DAtriM86 journals/iandc/DahlhausM92 conf/sigmod/DerrMP93 conf/vldb/MaindrevilleS88 conf/pods/Dong92 conf/adbt/BraP82 ... conf/dbpl/DongS91 journals/iandc/DongS95 conf/dbpl/DongS93 conf/dbpl/DongS93 conf/icdt/DongT92 conf/vldb/DenninghoffV91 conf/pods/DenninghoffV93 ... ... books/acm/kim95/DayalHW95 ... conf/pods/EiterGM94 conf/pods/Escobar-MolanoHJ93 ... books/el/leeuwen90/Emerson90 books/bc/ElmasriN89 ... conf/icse/Eswaran76 conf/sigmod/EpsteinSW78 ... ... conf/vldb/Fagin77 journals/tods/Fagin77 conf/sigmod/Fagin79 journals/tods/Fagin81 journals/ipl/FaginV83 journals/jacm/Fagin82 journals/jacm/Fagin83 journals/tcs/Fagin93 books/sp/kimrb85/FurtadoC85 ... journals/jlp/Fitting85a journals/tcs/FischerJT83 journals/acr/FaginKUV86 conf/icdt/FernandezM92 journals/tods/FaginMU82 conf/vldb/FaloutsosNS91 ... journals/ai/Forgy82 ... conf/sigmod/Freytag87 ... journals/siamcomp/FischerT83 journals/siamcomp/FaginMUY83 conf/pods/FaginUV83 conf/icalp/FaginV84 ... ... ... ... conf/sigmod/GraefeD87 conf/ride/GatziuD94 conf/sigmod/GardarinM86 conf/sigmod/GyssensG88 journals/tcs/GinsburgH83a journals/jacm/GinsburgH86 ... books/bc/tanselCGSS93/Ginsburg93 books/fm/GareyJ79 journals/jacm/GrantJ82 conf/vldb/GehaniJ91 conf/vldb/GhandeharizadehHJCELLTZ93 journals/tods/GhandeharizadehHJ96 conf/vldb/GehaniJS92 ... conf/sigmod/GehaniJS92 ... conf/deductive/GuptaKM92 conf/pods/GurevichL82 conf/iclp/GelfondL88 conf/adbt/77 journals/csur/GallaireMN84 conf/pods/GrahneMR92 conf/sigmod/GuptaMS93 conf/lics/GaifmanMSV87 journals/jacm/GaifmanMSV93 journals/jacm/GrahamMV86 conf/csl/GradelO92 ... conf/pods/Gottlob87 conf/pods/GyssensPG90 conf/dood/GiannottiPSZ91 books/aw/GoldbergR83 journals/acr/GrahneR86 journals/ipl/Grant77 ... journals/iandc/Grandjean83 conf/vldb/Grahne84 ... journals/csur/Graefe93 books/sp/Greibach75 journals/tods/GoodmanS82 journals/jcss/GoodmanS84 conf/focs/GurevichS85 ... conf/pods/GrumbachS94 conf/sigmod/GangulyST90 ... journals/tcs/Gunter92 ... ... ... ... conf/pods/GrahamV84 conf/pods/GrumbachV91 conf/icde/GardarinV92 conf/sigmod/GraefeW89 ... journals/jacm/GinsburgZ82 conf/vldb/GottlobZ88 ... ... journals/sigmod/Hanson89 ... journals/cacm/Harel80 journals/tkde/HaasCLMWLLPCS90 conf/lics/Hella92 journals/iandc/Herrmann95 conf/pods/HirstH93 conf/vldb/HullJ91 conf/ewdw/HullJ90 journals/csur/HullK87 journals/tods/HudsonK89 conf/lics/HillebrandKM93 conf/nato/HillebrandKR93 conf/jcdkb/HsuLM88 journals/ipl/HoneymanLY80 journals/tods/HammerM81 conf/adbt/HenschenMN82 ... journals/jacm/HenschenN84 journals/jacm/Honeyman82 conf/sigmod/HullS89 conf/pods/HullS89 journals/acta/HullS94 journals/jcss/HullS93 conf/fodo/HullTY89 journals/jcss/Hull83 journals/jacm/Hull84 journals/tcs/Hull85 journals/siamcomp/Hull86 ... conf/vldb/Hulin89 ... journals/jacm/HullY84 conf/vldb/HullY90 conf/pods/HullY91 conf/sigmod/IoannidisK90 journals/jcss/ImielinskiL84 conf/adbt/Imielinski82 journals/jcss/Immerman82 journals/iandc/Immerman86 ... journals/siamcomp/Immerman87 conf/pods/ImielinskiN88 conf/vldb/IoannidisNSS92 conf/sigmod/ImielinskiNV91 conf/dood/ImielinskiNV91 conf/vldb/Ioannidis85 journals/jacm/Jacobs82 conf/dbpl/JacobsH91 journals/csur/JarkeK84 journals/jcss/JohnsonK84 conf/popl/JaffarL87 books/el/leeuwen90/Johnson90 journals/jacm/Joyner76 conf/pods/JaeschkeS82 ... books/mk/minker88/Kanellakis88 books/el/leeuwen90/Kanellakis90 conf/oopsla/KhoshafianC86 conf/edbt/KotzDM88 conf/jcdkb/Keller82 conf/pods/Keller85 journals/computer/Keller86 ... journals/tods/Kent79 ... journals/ngc/RohmerLK86 conf/tacs/KanellakisG94 conf/jcdkb/Kifer88 conf/pods/KanellakisKR90 conf/sigmod/KiferKS92 ... conf/icdt/KiferL86 books/aw/KimL89 ... journals/tods/Klug80 journals/jacm/Klug82 journals/jacm/Klug88 journals/jacm/KiferLW95 conf/kr/KatsunoM91 journals/ai/KatsunoM92 conf/jcdkb/KrishnamurthyN88 journals/csur/Knight89 ... journals/iandc/Kolaitis91 journals/ai/Konolige88 conf/ifip/Kowalski74 journals/jacm/Kowalski75 conf/bncod/Kowalski84 conf/vldb/KoenigP81 journals/tods/KlugP82 ... conf/pods/KolaitisP88 conf/pods/KiferRS88 conf/sigmod/KrishnamurthyRS88 books/mg/SilberschatzK91 conf/iclp/KempT88 conf/sigmod/KellerU84 conf/dood/Kuchenhoff91 ... journals/jlp/Kunen87 conf/iclp/Kunen88 conf/pods/Kuper87 conf/pods/Kuper88 conf/ppcp/Kuper93 conf/pods/KuperV84 conf/stoc/KolaitisV87 journals/tcs/KarabegV90 journals/iandc/KolaitisV90 conf/pods/KolaitisV90 journals/tods/KarabegV91 journals/iandc/KolaitisV92 journals/tcs/KuperV93 journals/tods/KuperV93 journals/tse/KellerW85 conf/pods/KiferW89 conf/jcdkb/Lang88 books/el/Leeuwen90 ... journals/jcss/Leivant89 ... journals/iandc/Leivant90 ... conf/db-workshops/Levesque82 journals/ai/Levesque84 conf/mfdbs/Libkin91 conf/er/Lien79 journals/jacm/Lien82 books/mk/minker88/Lifschitz88 ... journals/tcs/Lindell91 journals/tods/Lipski79 journals/jacm/Lipski81 journals/tcs/LeratL86 journals/cj/LeveneL90 books/sp/Lloyd87 conf/pods/LakshmananM89 conf/tlca/LeivantM93 conf/sigmod/LaverMG83 conf/pods/LiptonN90 journals/jcss/LucchesiO78 conf/sigmod/Lohman88 ... conf/ijcai/Lozinskii85 books/ph/LewisP81 ... conf/sigmod/LecluseRV88 journals/is/LipeckS87 journals/jlp/LloydST87 journals/tods/LingTK81 conf/sigmod/LyngbaekV87 conf/dood/LefebvreV89 conf/pods/LibkinW93 conf/dbpl/LibkinW93 journals/jacm/Maier80 books/cs/Maier83 ... conf/vldb/Makinouchi77 conf/icalp/Makowsky81 ... conf/icdt/Malvestuto86 conf/aaai/MacGregorB92 journals/tods/MylopoulosBW80 conf/sigmod/McCarthyD89 journals/csur/MishraE92 conf/sigmod/MumickFPR90 books/mk/Minker88 journals/jlp/Minker88 conf/vldb/MillerIR93 journals/is/MillerIR94 journals/iandc/Mitchell83 conf/pods/Mitchell83 conf/vldb/MendelzonM79 journals/tods/MaierMS79 journals/jcss/MaierMSU80 conf/pods/MendelzonMW94 journals/debu/MorrisNSUG87 journals/ai/Moore85 conf/vldb/Morgenstern83 conf/pods/Morris88 ... conf/pods/MannilaR85 ... journals/jlp/MinkerR90 books/aw/MannilaR92 journals/acr/MaierRW86 ... journals/tods/MarkowitzS92 conf/pods/Marchetti-SpaccamelaPS87 journals/jacm/MaierSY81 conf/iclp/MorrisUG86 journals/tods/MaierUV84 conf/iclp/MorrisUG86 journals/acta/MakowskyV86 books/bc/MaierW88 books/mk/minker88/ManchandraW88 conf/pods/Naughton86 conf/sigmod/NgFS91 ... conf/vldb/Nejdl87 conf/adbt/NicolasM77 conf/sigmod/Nicolas78 journals/acta/Nicolas82 conf/ds/76 conf/pods/NaqviK88 journals/tods/NegriPS91 conf/vldb/NaughtonRSU89 conf/pods/NaughtonS87 ... ... conf/vldb/Osborn79 ... journals/tods/OzsoyogluY87 conf/adbt/Paige82 ... books/cs/Papadimitriou86 ... journals/ipl/Paredaens78 ... books/sp/ParedaensBGG89 journals/ai/Andersen91 books/el/leeuwen90/Perrin90 journals/ins/Petrov89 conf/pods/ParedaensG88 conf/pods/PatnaikI94 conf/adbt/ParedaensJ79 journals/csur/PeckhamM88 ... ... conf/sigmod/ParkerP80 ... conf/iclp/Przymusinski88 conf/pods/Przymusinski89 ... conf/vldb/ParkerSV92 conf/aaai/PearlV87 journals/ai/PereiraW80a conf/pods/PapadimitriouY92 journals/tkde/QianW91 ... journals/jlp/Ramakrishnan91 conf/pods/RamakrishnanBS87 ... conf/adbt/Reiter77 journals/ai/Reiter80 conf/db-workshops/Reiter82 journals/jacm/Reiter86 journals/tods/Rissanen77 conf/mfcs/Rissanen78 conf/pods/Rissanen82 ... journals/ngc/RohmerLK86 journals/jacm/Robinson65 ... conf/pods/Ross89 ... ... conf/sigmod/RoweS79 conf/sigmod/RichardsonS91 journals/debu/RamamohanaraoSBPNTZD87 conf/vldb/RamakrishnanSS92 conf/sigmod/RamakrishnanSSS93 conf/pods/RamakrishnanSUV89 journals/jcss/RamakrishnanSUV93 journals/jlp/RamakrishnanU95 conf/sigmod/SelingerACLP79 conf/sigmod/Sagiv81 journals/tods/Sagiv83 books/mk/minker88/Sagiv88 conf/slp/Sagiv90 conf/sigmod/Sciore81 journals/jacm/Sciore82 conf/pods/Sciore83 journals/acr/Sciore86 journals/jacm/SagivDPF81 conf/pods/X89 ... journals/ai/SmithG85 books/mk/minker88/Shepherdson88 journals/tods/Shipman81 conf/pods/Shmueli87 conf/iclp/SekiI88 conf/sigmod/ShmueliI84 journals/tc/Sickel76 journals/jsc/Siekmann89 conf/sigmod/StonebrakerJGP90 conf/vldb/SimonKM92 journals/csur/ShethL90 conf/pods/SeibL91 conf/sigmod/SuLRD93 conf/adbt/SilvaM79 journals/sigmod/Snodgrass90 journals/sigmod/Soo91 conf/pods/SuciuP94 conf/sigmod/StonebrakerR86 conf/slp/SudarshanR93 conf/pods/SagivS86 journals/cacm/Stonebraker81 books/mk/Stonebraker88 journals/tkde/Stonebraker92 books/aw/Stroustrup91 journals/jacm/SadriU82 conf/vldb/Su91 conf/pods/SagivV89 journals/jacm/SagivW82 journals/tods/StonebrakerWKH76 journals/jacm/SagivY80 conf/pods/SaccaZ86 journals/tcs/SaccaZ88 ... conf/pods/SaccaZ90 ... ... books/bc/TanselCGJSS93 ... journals/acr/ThomasF86 ... ... ... ... journals/tcs/Topor87 ... books/mk/minker88/ToporS88 ... journals/siamcomp/TarjanY84 journals/csur/TeoreyYF86 journals/algorithmica/UllmanG88 conf/pods/Ullman82 books/cs/Ullman82 journals/tods/Ullman85 books/cs/Ullman88 conf/pods/Ullman89 books/cs/Ullman89 conf/sigmod/Gelder86 ... conf/pods/BusscheG92 conf/focs/BusscheGAG92 conf/pods/BusscheP91 conf/slp/Gelder86 conf/pods/Gelder89 conf/pods/GelderRS88 journals/jacm/GelderRS91 journals/tods/GelderT91 journals/ipl/Vardi81 conf/stoc/Vardi82 conf/focs/Vardi82 journals/acta/Vardi83 journals/jcss/Vardi84 conf/pods/Vardi85 conf/pods/Vardi86 journals/jcss/Vardi86 ... conf/pods/Vardi88 conf/sigmod/Vassiliou79 ... ... journals/jacm/EmdenK76 conf/nf2/SchollABBGPRV87 journals/jacm/Vianu87 journals/acta/Vianu87 conf/eds/Vieille86 conf/iclp/Vieille87 ... conf/eds/Vieille88 journals/tcs/Vieille89 ... journals/tcs/VianuV92 conf/sigmod/WidomF90 conf/icde/WangH92 conf/pos/WidjojoHW90 journals/computer/Wiederhold92 conf/pods/Wilkins86 conf/pods/Winslett88 conf/sigmod/WolfsonO90 conf/pods/Wong93 conf/sigmod/WolfsonS88 journals/ibmrd/WangW75 journals/tods/WongY76 conf/vldb/Yannakakis81 journals/csur/YuC84 ... journals/jcss/YannakakisP82 ... journals/tods/Zaniolo82 journals/jcss/Zaniolo84 ... conf/edbt/ZhouH90 journals/ibmsj/Zloof77 books/mk/ZdonikM90 db/books/dbtext/abiteboul95.html
+books/aw/Lamport86:LaTeX User's Guide & Reference Manual:Leslie Lamport:2002-01-03 Addison-Wesley 1986 0-201-15790-X
+books/aw/AhoHU74:The Design and Analysis of Computer Algorithms.:Alfred V. Aho John E. Hopcroft Jeffrey D. Ullman:2002-01-03 Addison-Wesley 1974 0-201-00029-6
+books/aw/Lamport2002:Specifying Systems, The TLA+ Language and Tools for Hardware and Software Engineers:Leslie Lamport:2005-07-28 Addison-Wesley 2002 0-3211-4306-X http //research.microsoft.com/users/lamport/tla/book.html
+books/aw/AhoHU83:Data Structures and Algorithms.:Alfred V. Aho John E. Hopcroft Jeffrey D. Ullman:2002-01-03 Addison-Wesley 1983 0-201-00023-7
+books/aw/LewisBK01:Databases and Transaction Processing An Application-Oriented Approach:Philip M. Lewis Arthur J. Bernstein Michael Kifer:2002-01-03 Addison-Wesley 2001 0-201-70872-8
+books/aw/AhoKW88:The AWK Programming Language:Alfred V. Aho Brian W. Kernighan Peter J. Weinberger:2002-01-03 Addison-Wesley 1988
+books/aw/LindholmY97:The Java Virtual Machine Specification:Tim Lindholm Frank Yellin:2002-01-28 Addison-Wesley 1997 0-201-63452-X
+books/aw/AhoSU86:Compilers Princiles, Techniques, and Tools.:Alfred V. Aho Ravi Sethi Jeffrey D. Ullman:2002-01-03 Addison-Wesley 1986 0-201-10088-6
+books/aw/Sedgewick83:Algorithms:Robert Sedgewick:2002-01-03 Addison-Wesley 1983 0-201-06672-6
+journals/siamcomp/AspnesW96:Randomized Consensus in Expected O(n log² n) Operations Per Processor.:James Aspnes Orli Waarts:2002-01-03 1024-1044 1996 25 SIAM J. Comput. 5 db/journals/siamcomp/siamcomp25.html#AspnesW96
+conf/focs/AspnesW92:Randomized Consensus in Expected O(n log ^2 n) Operations Per Processor:James Aspnes Orli Waarts:2006-04-25 137-146 conf/focs/FOCS33 1992 FOCS db/conf/focs/focs92.html#AspnesW92
+journals/siamcomp/Bloniarz83:A Shortest-Path Algorithm with Expected Time O(n² log n log* n).:Peter A. Bloniarz:2002-01-03 588-600 1983 12 SIAM J. Comput. 3 db/journals/siamcomp/siamcomp12.html#Bloniarz83
+conf/stoc/Bloniarz80:A Shortest-Path Algorithm with Expected Time O(n^2 log n log ^* n):Peter A. Bloniarz:2006-04-25 378-384 conf/stoc/STOC12 1980 STOC db/conf/stoc/stoc80.html#Bloniarz80
+journals/siamcomp/Megiddo83a:Linear-Time Algorithms for Linear Programming in R³ and Related Problems.:Nimrod Megiddo:2002-01-03 759-776 1983 12 SIAM J. Comput. 4 db/journals/siamcomp/siamcomp12.html#Megiddo83a
+conf/focs/Megiddo82:Linear-Time Algorithms for Linear Programming in R^3 and Related Problems:Nimrod Megiddo:2006-04-25 329-338 conf/focs/FOCS23 1982 FOCS db/conf/focs/focs82.html#Megiddo82
+journals/siamcomp/MoffatT87:An All Pairs Shortest Path Algorithm with Expected Time O(n² log n).:Alistair Moffat Tadao Takaoka:2002-01-03 1023-1031 1987 16 SIAM J. Comput. 6 db/journals/siamcomp/siamcomp16.html#MoffatT87
+conf/focs/MoffatT85:An All Pairs Shortest Path Algorithm with Expected Running Time O(n^2 log n):Alistair Moffat Tadao Takaoka:2006-04-25 101-105 conf/focs/FOCS26 1985 FOCS db/conf/focs/focs85.html#MoffatT85
+conf/icip/SchonfeldL98:VORTEX Video Retrieval and Tracking from Compressed Multimedia Databases.:Dan Schonfeld Dan Lelescu:2002-11-05 123-127 1998 ICIP (3) db/conf/icip/icip1998-3.html#SchonfeldL98
+conf/hicss/SchonfeldL99:VORTEX Video Retrieval and Tracking from Compressed Multimedia Databases ¾ Visual Search Engine.:Dan Schonfeld Dan Lelescu:2002-01-03 1999 HICSS http //computer.org/proceedings/hicss/0001/00013/00013006abs.htm db/conf/hicss/hicss1999-3.html#SchonfeldL99
+journals/corr/abs-0802-2861:Geometric Set Cover and Hitting Sets for Polytopes in $R^3$:Sören Laue:2008-03-03 http //arxiv.org/abs/0802.2861 2008 CoRR abs/0802.2861 db/journals/corr/corr0802.html#abs-0802-2861 informal publication
+conf/stacs/Laue08:Geometric Set Cover and Hitting Sets for Polytopes in R³.:Sören Laue:2008-03-04 2008 STACS 479-490 http //drops.dagstuhl.de/opus/volltexte/2008/1367 conf/stacs/2008 db/conf/stacs/stacs2008.html#Laue08
+journals/iandc/IbarraJCR91:Some Classes of Languages in NC¹:Oscar H. Ibarra Tao Jiang Jik H. Chang Bala Ravikumar:2006-04-25 86-106 Inf. Comput. January 1991 90 1 db/journals/iandc/iandc90.html#IbarraJCR91
+conf/awoc/IbarraJRC88:On Some Languages in NC.:Oscar H. Ibarra Tao Jiang Bala Ravikumar Jik H. Chang:2002-08-06 64-73 1988 conf/awoc/1988 AWOC db/conf/awoc/awoc88.html#IbarraJRC88
+journals/jacm/GalilHLSW87:An O(n³log n) deterministic and an O(n³) Las Vegs isomorphism test for trivalent graphs.:Zvi Galil Christoph M. Hoffmann Eugene M. Luks Claus-Peter Schnorr Andreas Weber:2003-11-20 513-531 1987 34 J. ACM 3 http //doi.acm.org/10.1145/28869.28870 db/journals/jacm/jacm34.html#GalilHLSW87
+conf/focs/GalilHLSW82:An O(n^3 log n) Deterministic and an O(n^3) Probabilistic Isomorphism Test for Trivalent Graphs:Zvi Galil Christoph M. Hoffmann Eugene M. Luks Claus-Peter Schnorr Andreas Weber:2006-04-25 118-125 conf/focs/FOCS23 1982 FOCS db/conf/focs/focs82.html#GalilHLSW82
+journals/jacm/GalilT88:An O(n²(m + n log n)log n) min-cost flow algorithm.:Zvi Galil Éva Tardos:2003-11-20 374-386 1988 35 J. ACM 2 http //doi.acm.org/10.1145/42282.214090 db/journals/jacm/jacm35.html#GalilT88
+conf/focs/GalilT86:An O(n^2 (m + n log n) log n) Min-Cost Flow Algorithm:Zvi Galil Éva Tardos:2006-04-25 1-9 conf/focs/FOCS27 1986 FOCS db/conf/focs/focs86.html#GalilT86
+series/synthesis/2009Weintraub:Jordan Canonical Form Theory and Practice:Steven H. Weintraub:2009-09-06 Jordan Canonical Form Theory and Practice http //dx.doi.org/10.2200/S00218ED1V01Y200908MAS006 http //dx.doi.org/10.2200/S00218ED1V01Y200908MAS006 2009 Synthesis Lectures on Mathematics & Statistics Morgan & Claypool Publishers
+series/synthesis/2009Brozos:The Geometry of Walker Manifolds:Miguel Brozos-Vázquez Eduardo García-Río Peter Gilkey Stana Nikcevic Rámon Vázquez-Lorenzo:2009-09-06 The Geometry of Walker Manifolds http //dx.doi.org/10.2200/S00197ED1V01Y200906MAS005 http //dx.doi.org/10.2200/S00197ED1V01Y200906MAS005 2009 Synthesis Lectures on Mathematics & Statistics Morgan & Claypool Publishers
diff --git a/asterix-fuzzyjoin/data/dblp-small/ssjoin.in-000/part-00000 b/asterix-fuzzyjoin/data/dblp-small/ssjoin.in-000/part-00000
new file mode 100644
index 0000000..75f0592
--- /dev/null
+++ b/asterix-fuzzyjoin/data/dblp-small/ssjoin.in-000/part-00000
Binary files differ
diff --git a/asterix-fuzzyjoin/data/dblp-small/ssjoin.out-000/part-00000 b/asterix-fuzzyjoin/data/dblp-small/ssjoin.out-000/part-00000
new file mode 100644
index 0000000..f2dfbd3
--- /dev/null
+++ b/asterix-fuzzyjoin/data/dblp-small/ssjoin.out-000/part-00000
@@ -0,0 +1,2 @@
+73 75 0.522
+65 63 0.500
diff --git a/asterix-fuzzyjoin/data/intarray-bag-small.expected/recordpairs-000/expected.txt b/asterix-fuzzyjoin/data/intarray-bag-small.expected/recordpairs-000/expected.txt
new file mode 100644
index 0000000..919d9f2
--- /dev/null
+++ b/asterix-fuzzyjoin/data/intarray-bag-small.expected/recordpairs-000/expected.txt
@@ -0,0 +1,2 @@
+1:[1, 2, 2, 3]:0.6:2:[2, 3, 2, 4]
+3:[3, 4, 4, 5]:0.6:5:[3, 4, 4, 4]
diff --git a/asterix-fuzzyjoin/data/intarray-bag-small/records.r-000/part-00000 b/asterix-fuzzyjoin/data/intarray-bag-small/records.r-000/part-00000
new file mode 100644
index 0000000..3c469f5
--- /dev/null
+++ b/asterix-fuzzyjoin/data/intarray-bag-small/records.r-000/part-00000
@@ -0,0 +1,2 @@
+1:1 2 2 3
+2:2 3 2 4
diff --git a/asterix-fuzzyjoin/data/intarray-bag-small/records.r-000/part-00001 b/asterix-fuzzyjoin/data/intarray-bag-small/records.r-000/part-00001
new file mode 100644
index 0000000..a45a826
--- /dev/null
+++ b/asterix-fuzzyjoin/data/intarray-bag-small/records.r-000/part-00001
@@ -0,0 +1,3 @@
+3:3 4 4 5
+4:4 5 6 7
+5:3 4 4 4
diff --git a/asterix-fuzzyjoin/data/intarray-set-small.expected/recordpairs-000/expected.txt b/asterix-fuzzyjoin/data/intarray-set-small.expected/recordpairs-000/expected.txt
new file mode 100644
index 0000000..7c7fc9e
--- /dev/null
+++ b/asterix-fuzzyjoin/data/intarray-set-small.expected/recordpairs-000/expected.txt
@@ -0,0 +1,2 @@
+1:[1, 2, 3]:0.5:2:[2, 3, 4]
+3:[3, 4, 5, 6]:0.6:4:[4, 5, 6, 7]
diff --git a/asterix-fuzzyjoin/data/intarray-set-small/records.r-000/part-00000 b/asterix-fuzzyjoin/data/intarray-set-small/records.r-000/part-00000
new file mode 100644
index 0000000..4bc0bb7
--- /dev/null
+++ b/asterix-fuzzyjoin/data/intarray-set-small/records.r-000/part-00000
@@ -0,0 +1,2 @@
+1:1 2 3
+2:2 3 4
diff --git a/asterix-fuzzyjoin/data/intarray-set-small/records.r-000/part-00001 b/asterix-fuzzyjoin/data/intarray-set-small/records.r-000/part-00001
new file mode 100644
index 0000000..4868b29
--- /dev/null
+++ b/asterix-fuzzyjoin/data/intarray-set-small/records.r-000/part-00001
@@ -0,0 +1,2 @@
+3:3 4 5 6
+4:4 5 6 7
diff --git a/asterix-fuzzyjoin/data/intarray.txt b/asterix-fuzzyjoin/data/intarray.txt
new file mode 100644
index 0000000..5258d1c
--- /dev/null
+++ b/asterix-fuzzyjoin/data/intarray.txt
@@ -0,0 +1,6 @@
+[1, 2, 3]
+[2, 3, 4, 5]
+[-1, 2, -3, 0]
+[5]
+[10, 11]
+[100]
diff --git a/asterix-fuzzyjoin/data/pub-small.expected/recordpairs-000/expected.txt b/asterix-fuzzyjoin/data/pub-small.expected/recordpairs-000/expected.txt
new file mode 100644
index 0000000..02ab24c
--- /dev/null
+++ b/asterix-fuzzyjoin/data/pub-small.expected/recordpairs-000/expected.txt
@@ -0,0 +1,6 @@
+1:books/acm/kim95/AnnevelinkACFHK95:Object SQL - A Language for the Design and Implementation of Object Databases.:Jurgen Annevelink Rafiul Ahad Amelia Carlson Daniel H. Fishman Michael L. Heytens William Kent:2002-01-03 42-68 1995 Modern Database Systems db/books/collections/kim95.html#AnnevelinkACFHK95;0.78571427;1:oai CiteSeerXPSU 10.1.1.39.1830:Object SQL - A Language for the Design and Implementation of Object Databases:Jurgen Annevelink Rafiul Ahad Amelia Carlson Dan Fishman Mike Heytens William Kent:2009-04-13 ly, a function application expression consists of two expressions a function reference (labelled func_ref in Figure 3 line 2), and an argument (labelled arg). The func_ref expression evaluates to a (generic or specific) function identifier, which may be the same as the function that the expression is a part of, thus allowing recursive function invocations. The expression labelled arg evaluates to an arbitrary object or aggregate object. The semantics of evaluating function applications was discussed in detail in section 2. For example, to set the name of a person, we evaluate the following expression FunAssign(function name.person) (p1,'John') In this example, the first expression is itself a function call, applying the function FunAssign to the function name.person (an example of a specific function reference). This returns the oid of the function that sets a person's name, which is subsequently applied to a tuple of two elements, the oid of the person and the new name (a string o... CiteSeerX ACM Press 2009-04-13 2007-11-22 1994 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.39.1830 http //www.tu-chemnitz.de/~igrdb/docs/OpenODB/osql.ps.gz en 10.1.1.31.2534 10.1.1.28.4658 10.1.1.44.5947 10.1.1.39.199 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+25:books/acm/kim95/RusinkiewiczS95:Specification and Execution of Transactional Workflows.:Marek Rusinkiewicz Amit P. Sheth:2004-03-08 592-620 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#RusinkiewiczS95 1995;0.90909094;88:oai CiteSeerXPSU 10.1.1.43.3839:Specification and Execution of Transactional Workflows:Marek Rusinkiewicz Amit Sheth:2009-04-13 The basic transaction model has evolved over time to incorporate more complex transaction structures and to selectively modify the atomicity and isolation properties. In this chapter we discuss the application of transaction concepts to activities that involve coordinated execution of multiple tasks (possibly of different types) over different processing entities. Such applications are referred to as transactional workflows. In this chapter we discuss the specification of such workflows and the issues involved in their execution. 1 What is a Workflow? Workflows are activities involving the coordinated execution of multiple tasks performed by different processing entities. A task defines some work to be done and can be specified in a number of ways, including a textual description in a file or an email, a form, a message, or a computer program. A processing entity that performs the tasks may be a person or a software system (e.g., a mailer, an application program, a database mana... CiteSeerX ACM Press 2009-04-13 2007-11-22 1995 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.43.3839 http //lsdis.cs.uga.edu/lib/././download/RS93.ps en 10.1.1.17.1323 10.1.1.59.5051 10.1.1.38.6210 10.1.1.68.7445 10.1.1.109.5175 10.1.1.17.7962 10.1.1.44.7778 10.1.1.112.244 10.1.1.13.7602 10.1.1.102.7874 10.1.1.41.4043 10.1.1.49.5143 10.1.1.41.7252 10.1.1.17.3225 10.1.1.54.7761 10.1.1.55.5255 10.1.1.108.958 10.1.1.35.7733 10.1.1.52.3682 10.1.1.36.1618 10.1.1.45.6317 10.1.1.43.3180 10.1.1.35.8718 10.1.1.44.6365 10.1.1.51.2883 10.1.1.50.9206 10.1.1.6.9085 10.1.1.30.1707 10.1.1.80.6634 10.1.1.49.355 10.1.1.127.3550 10.1.1.35.3562 10.1.1.137.8832 10.1.1.49.4085 10.1.1.41.5506 10.1.1.40.4657 10.1.1.43.2369 10.1.1.40.832 10.1.1.74.5411 10.1.1.90.4428 10.1.1.110.6967 10.1.1.27.2122 10.1.1.15.5605 10.1.1.54.727 10.1.1.49.7512 10.1.1.45.8796 10.1.1.50.5984 10.1.1.53.137 10.1.1.30.3262 10.1.1.28.1680 10.1.1.21.7110 10.1.1.29.3148 10.1.1.57.687 10.1.1.59.5924 10.1.1.46.2812 10.1.1.51.5552 10.1.1.17.7375 10.1.1.40.1598 10.1.1.52.9787 10.1.1.1.3496 10.1.1.50.6791 10.1.1.55.3358 10.1.1.137.7582 10.1.1.118.4127 10.1.1.49.3580 10.1.1.35.5825 10.1.1.46.9382 10.1.1.31.7411 10.1.1.48.5504 10.1.1.55.5163 10.1.1.18.1603 10.1.1.52.8129 10.1.1.1.9723 10.1.1.21.9113 10.1.1.49.7644 10.1.1.52.6646 10.1.1.75.3106 10.1.1.80.2072 10.1.1.55.8770 10.1.1.54.8188 10.1.1.101.7919 10.1.1.104.8176 10.1.1.24.5741 10.1.1.29.4667 10.1.1.4.1055 10.1.1.48.9175 10.1.1.56.792 10.1.1.65.3172 10.1.1.66.5947 10.1.1.73.8532 10.1.1.83.8299 10.1.1.86.8521 10.1.1.87.2402 10.1.1.87.4648 10.1.1.90.5638 10.1.1.91.1709 10.1.1.94.4248 10.1.1.114.511 10.1.1.119.5037 10.1.1.124.7957 10.1.1.49.215 10.1.1.53.7777 10.1.1.53.9711 10.1.1.45.9409 10.1.1.40.8789 10.1.1.43.4845 10.1.1.34.8273 10.1.1.35.4783 10.1.1.28.3176 10.1.1.16.8151 10.1.1.8.9117 10.1.1.58.3449 10.1.1.142.7041 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+3:books/acm/kim95/BreitbartGS95:Transaction Management in Multidatabase Systems.:Yuri Breitbart Hector Garcia-Molina Abraham Silberschatz:2004-03-08 573-591 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#BreitbartGS95 1995;0.6;85:oai CiteSeerXPSU 10.1.1.37.8818:Overview of Multidatabase Transaction Management:Yuri Breitbart Hector Garcia-Molina Avi Silberschatz:2009-06-22 A multidatabase system (MDBS) is a facility that allows users access to data located in multiple autonomous database management systems (DBMSs). In such a system, global transactions are executed under the control of the MDBS. Independently, local transactions are executed under the control of the local DBMSs. Each local DBMS integrated by the MDBS may employ a different transaction management scheme. In addition, each local DBMS has complete control over all transactions (global and local) executing at its site, including the ability to abort at any point any of the transactions executing at its site. Typically, no design or internal DBMS structure changes are allowed in order to accommodate the MDBS. Furthermore, the local DBMSs may not be aware of each other, and, as a consequence, cannot coordinate their actions. Thus, traditional techniques for ensuring transaction atomicity and consistency in homogeneous distributed database systems may not be appropriate for an MDBS environment.... CiteSeerX 2009-06-22 2007-11-22 1992 text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.37.8818 ftp //ftp.cs.utexas.edu/pub/avi/UT-CS-TR-92-21.PS.Z en 10.1.1.101.8988 10.1.1.130.1772 10.1.1.38.6210 10.1.1.34.3768 10.1.1.36.1275 10.1.1.104.3430 10.1.1.112.244 10.1.1.94.9106 10.1.1.41.4043 10.1.1.49.5143 10.1.1.59.2034 10.1.1.53.875 10.1.1.137.5642 10.1.1.41.8832 10.1.1.21.1100 10.1.1.105.3626 10.1.1.44.773 10.1.1.21.2576 10.1.1.40.6484 10.1.1.144.2713 10.1.1.48.6718 10.1.1.16.6166 10.1.1.40.832 10.1.1.36.2660 10.1.1.30.3087 10.1.1.47.322 10.1.1.17.6532 10.1.1.33.2301 10.1.1.20.4306 10.1.1.47.6258 10.1.1.39.9212 10.1.1.46.4334 10.1.1.71.485 10.1.1.43.1405 10.1.1.49.1308 10.1.1.35.6530 10.1.1.42.5177 10.1.1.54.4068 10.1.1.133.3692 10.1.1.40.4220 10.1.1.48.7743 10.1.1.26.575 10.1.1.107.596 10.1.1.116.3495 10.1.1.33.2074 10.1.1.38.7229 10.1.1.59.4464 10.1.1.103.9562 10.1.1.36.5887 10.1.1.40.9658 10.1.1.53.6783 10.1.1.29.5010 10.1.1.107.876 10.1.1.46.2273 10.1.1.46.3657 10.1.1.49.5281 10.1.1.50.4114 10.1.1.63.3234 10.1.1.79.9607 10.1.1.83.4819 10.1.1.83.4980 10.1.1.84.8136 10.1.1.90.953 10.1.1.90.9785 10.1.1.92.2397 10.1.1.93.8911 10.1.1.94.3702 10.1.1.97.672 10.1.1.98.4604 10.1.1.117.6190 10.1.1.118.4814 10.1.1.130.880 10.1.1.137.1167 10.1.1.51.5111 10.1.1.45.2774 10.1.1.45.9165 10.1.1.40.4684 10.1.1.35.5866 10.1.1.38.3606 10.1.1.29.9166 10.1.1.31.3667 10.1.1.21.7181 10.1.1.33.2343 10.1.1.23.3117 10.1.1.24.7879 10.1.1.18.8936 10.1.1.19.3770 10.1.1.19.5246 10.1.1.12.3293 10.1.1.2.2325 10.1.1.60.116 10.1.1.140.5244 10.1.1.143.3448 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+3:books/acm/kim95/BreitbartGS95:Transaction Management in Multidatabase Systems.:Yuri Breitbart Hector Garcia-Molina Abraham Silberschatz:2004-03-08 573-591 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#BreitbartGS95 1995;0.6;86:oai CiteSeerXPSU 10.1.1.54.6302:Overview of Multidatabase Transaction Management:Yuri Breitbart Hector Garcia-molina Avi Silberschatz:2009-04-12 A multidatabase system (MDBS) is a facility that allows users access to data located in multiple autonomous database management systems (DBMSs). In such a system, global transactions are executed under the control of the MDBS. Independently, local transactions are executed under the control of the local DBMSs. Each local DBMS integrated by the MDBS may employ a different transaction management scheme. In addition, each local DBMS has complete control over all transactions (global and local) executing at its site, including the ability to abort at any point any of the transactions executing at its site. Typically, no design or internal DBMS structure changes are allowed in order to accommodate the MDBS. Furthermore, the local DBMSs may not be aware of each other, and, as a consequence, cannot coordinate their actions. Thus, traditional techniques for ensuring transaction atomicity and consistency in homogeneous distributed database systems may not be appropriate for an MDBS environment.... CiteSeerX 2009-04-12 2007-11-22 1992 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.6302 http //www-db.stanford.edu/pub/papers/multidatabase.ps en 10.1.1.101.8988 10.1.1.130.1772 10.1.1.38.6210 10.1.1.34.3768 10.1.1.36.1275 10.1.1.104.3430 10.1.1.112.244 10.1.1.94.9106 10.1.1.41.4043 10.1.1.49.5143 10.1.1.59.2034 10.1.1.53.875 10.1.1.137.5642 10.1.1.41.8832 10.1.1.21.1100 10.1.1.105.3626 10.1.1.44.773 10.1.1.21.2576 10.1.1.40.6484 10.1.1.144.2713 10.1.1.48.6718 10.1.1.16.6166 10.1.1.40.832 10.1.1.36.2660 10.1.1.30.3087 10.1.1.47.322 10.1.1.17.6532 10.1.1.33.2301 10.1.1.20.4306 10.1.1.47.6258 10.1.1.39.9212 10.1.1.46.4334 10.1.1.71.485 10.1.1.43.1405 10.1.1.49.1308 10.1.1.35.6530 10.1.1.42.5177 10.1.1.54.4068 10.1.1.133.3692 10.1.1.40.4220 10.1.1.48.7743 10.1.1.26.575 10.1.1.107.596 10.1.1.116.3495 10.1.1.33.2074 10.1.1.38.7229 10.1.1.59.4464 10.1.1.103.9562 10.1.1.36.5887 10.1.1.40.9658 10.1.1.53.6783 10.1.1.29.5010 10.1.1.107.876 10.1.1.46.2273 10.1.1.46.3657 10.1.1.49.5281 10.1.1.50.4114 10.1.1.63.3234 10.1.1.79.9607 10.1.1.83.4819 10.1.1.83.4980 10.1.1.84.8136 10.1.1.90.953 10.1.1.90.9785 10.1.1.92.2397 10.1.1.93.8911 10.1.1.94.3702 10.1.1.97.672 10.1.1.98.4604 10.1.1.117.6190 10.1.1.118.4814 10.1.1.130.880 10.1.1.137.1167 10.1.1.51.5111 10.1.1.45.2774 10.1.1.45.9165 10.1.1.40.4684 10.1.1.35.5866 10.1.1.38.3606 10.1.1.29.9166 10.1.1.31.3667 10.1.1.21.7181 10.1.1.33.2343 10.1.1.23.3117 10.1.1.24.7879 10.1.1.18.8936 10.1.1.19.3770 10.1.1.19.5246 10.1.1.12.3293 10.1.1.2.2325 10.1.1.60.116 10.1.1.140.5244 10.1.1.143.3448 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+54:books/aw/kimL89/SteinLU89:A Shared View of Sharing The Treaty of Orlando.:Lynn Andrea Stein Henry Lieberman David Ungar:2002-01-03 31-48 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#SteinLU89;1.0;91:oai CiteSeerXPSU 10.1.1.55.482:A Shared View of Sharing The Treaty of Orlando:Lynn Andrea Stein Henry Lieberman David Ungar:2009-04-12 Introduction For the past few years, researchers have been debating the relative merits of object-oriented languages with classes and inheritance as opposed to those with prototypes and delegation. It has become clear that the object-oriented programming language design space is not a dichotomy. Instead, we have identified two fundamental mechanisms---templates and empathy---and several different independent degrees of freedom for each. Templates create new objects in their own image, providing guarantees about the similarity of group members. Empathy allows an object to act as if it were some other object, thus providing sharing of state and behavior. The Smalltalk-80 TM language, 1 Actors, Lieberman's Delegation system, Self, and Hybrid each take differing stands on the forms of templates 1 Smalltalk-80 TM is a trademark of Par CiteSeerX ACM Press 2009-04-12 2007-11-22 1989 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.55.482 http //lcs.www.media.mit.edu/people/lieber/Lieberary/OOP/Treaty/Treaty.ps en 10.1.1.26.9545 10.1.1.118.6579 10.1.1.48.69 10.1.1.57.5195 10.1.1.9.570 10.1.1.47.511 10.1.1.127.5320 10.1.1.100.4334 10.1.1.5.3348 10.1.1.39.3374 10.1.1.56.4713 10.1.1.61.2065 10.1.1.27.3015 10.1.1.1.5960 10.1.1.67.5433 10.1.1.31.8109 10.1.1.68.4062 10.1.1.49.3986 10.1.1.122.9331 10.1.1.46.8283 10.1.1.54.5230 10.1.1.16.2055 10.1.1.137.5180 10.1.1.43.5722 10.1.1.68.2105 10.1.1.35.1247 10.1.1.30.1415 10.1.1.7.5014 10.1.1.102.3946 10.1.1.105.6469 10.1.1.26.223 10.1.1.26.8645 10.1.1.35.4104 10.1.1.39.6986 10.1.1.41.7822 10.1.1.42.9056 10.1.1.53.9325 10.1.1.71.1802 10.1.1.76.6993 10.1.1.89.9613 10.1.1.121.5599 10.1.1.122.3737 10.1.1.127.1894 10.1.1.55.5674 10.1.1.37.8260 10.1.1.2.2077 10.1.1.24.5782 10.1.1.19.780 10.1.1.2.4148 10.1.1.2.4173 10.1.1.131.902 10.1.1.30.2927 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+5:books/acm/kim95/DayalHW95:Active Database Systems.:Umeshwar Dayal Eric N. Hanson Jennifer Widom:2002-01-03 434-456 1995 Modern Database Systems db/books/collections/kim95.html#DayalHW95;1.0;98:oai CiteSeerXPSU 10.1.1.49.2910:Active Database Systems:Umeshwar Dayal Eric N. Hanson Jennifer Widom:2009-04-12 In Won Kim editor Modern Database Systems The Object Model Integrating a production rules facility into a database system provides a uniform mechanism for a number of advanced database features including integrity constraint enforcement, derived data maintenance, triggers, alerters, protection, version control, and others. In addition, a database system with rule processing capabilities provides a useful platform for large and efficient knowledge-base and expert systems. Database systems with production rules are referred to as active database systems, and the field of active database systems has indeed been active. This chapter summarizes current work in active database systems topics covered include active database rule models and languages, rule execution semantics, and implementation issues. 1 Introduction Conventional database systems are passive they only execute queries or transactions explicitly submitted by a user or an application program. For many applications, however, it is important to monitor situations of interest, and to ... CiteSeerX ACM Press 2009-04-12 2007-11-22 1994 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.49.2910 http //www-db.stanford.edu/pub/papers/book-chapter.ps en 10.1.1.17.1323 10.1.1.143.7196 10.1.1.50.3821 10.1.1.51.9946 10.1.1.41.2030 10.1.1.46.2504 10.1.1.52.4421 10.1.1.38.2083 10.1.1.34.661 10.1.1.103.7630 10.1.1.100.9015 10.1.1.97.1699 10.1.1.107.4220 10.1.1.47.9217 10.1.1.133.7157 10.1.1.101.5051 10.1.1.30.9989 10.1.1.53.6941 10.1.1.50.8529 10.1.1.133.4287 10.1.1.50.7278 10.1.1.10.1688 10.1.1.19.8669 10.1.1.44.7600 10.1.1.144.376 10.1.1.44.1348 10.1.1.47.9998 10.1.1.90.4428 10.1.1.108.344 10.1.1.48.9470 10.1.1.53.5472 10.1.1.52.4872 10.1.1.144.4965 10.1.1.31.7578 10.1.1.32.6426 10.1.1.58.6335 10.1.1.85.8052 10.1.1.93.1931 10.1.1.55.4610 10.1.1.21.3821 10.1.1.26.9208 10.1.1.31.4869 10.1.1.48.1833 10.1.1.83.8628 10.1.1.87.9318 10.1.1.90.2195 10.1.1.36.5184 10.1.1.21.1704 10.1.1.53.1733 10.1.1.90.3181 10.1.1.53.6783 10.1.1.52.6151 10.1.1.104.6911 10.1.1.105.1691 10.1.1.21.1984 10.1.1.23.2775 10.1.1.62.5556 10.1.1.68.9063 10.1.1.74.4746 10.1.1.78.5097 10.1.1.84.743 10.1.1.84.904 10.1.1.87.6019 10.1.1.88.3907 10.1.1.89.9631 10.1.1.90.4147 10.1.1.92.365 10.1.1.100.2747 10.1.1.98.5083 10.1.1.98.6663 10.1.1.99.1894 10.1.1.99.8174 10.1.1.133.8073 10.1.1.52.7823 10.1.1.39.5341 10.1.1.35.3458 10.1.1.26.4620 10.1.1.18.8936 10.1.1.19.3694 10.1.1.12.631 10.1.1.48.6394 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
diff --git a/asterix-fuzzyjoin/data/pub-small.expected/ridpairs-000/expected.txt b/asterix-fuzzyjoin/data/pub-small.expected/ridpairs-000/expected.txt
new file mode 100644
index 0000000..0153590
--- /dev/null
+++ b/asterix-fuzzyjoin/data/pub-small.expected/ridpairs-000/expected.txt
@@ -0,0 +1,6 @@
+1 1 0.78571427
+25 88 0.90909094
+3 85 0.6
+3 86 0.6
+54 91 1.0
+5 98 1.0
diff --git a/asterix-fuzzyjoin/data/pub-small.expected/tokens-000/expected.txt b/asterix-fuzzyjoin/data/pub-small.expected/tokens-000/expected.txt
new file mode 100644
index 0000000..d8e0af0
--- /dev/null
+++ b/asterix-fuzzyjoin/data/pub-small.expected/tokens-000/expected.txt
@@ -0,0 +1,597 @@
+¾_1
+1_1
+2_1
+3_1
+3_2
+4_1
+5_1
+6_1
+80_1
+a_1
+a_2
+abiteboul_1
+abraham_1
+active_1
+adds_1
+ahad_1
+ahmed_1
+aho_1
+albert_1
+alfons_1
+alfred_1
+algorithm_1
+algorithms_1
+alistair_1
+all_1
+allen_1
+amelia_1
+amihai_1
+amit_1
+an_1
+an_2
+analysis_1
+and_1
+and_2
+anders_1
+andrea_1
+andreas_1
+andrew_1
+andrews_1
+angelika_1
+anne_1
+annevelink_1
+application_1
+applications_1
+approach_1
+approaches_1
+architecture_1
+aref_1
+arthur_1
+as_1
+aspnes_1
+authorization_1
+awk_1
+b_1
+bala_1
+ballou_1
+based_1
+beech_1
+benchmark_1
+bernstein_1
+better_1
+beyond_1
+bindings_1
+björnerstedt_1
+blakeley_1
+bloniarz_1
+breitbart_1
+bretl_1
+brian_1
+brom_1
+brozos_1
+bruce_1
+c_1
+c_2
+cad_1
+canonical_1
+capability_1
+carey_1
+carlson_1
+cat_1
+catalyst_1
+chang_1
+changing_1
+chien_1
+choi_1
+chou_1
+chow_1
+chris_1
+christer_1
+christodoulakis_1
+christoph_1
+clarence_1
+classes_1
+claus_1
+clement_1
+commercial_1
+common_1
+comparison_1
+compilers_1
+compressed_1
+computer_1
+concepts_1
+concurrency_1
+concurrent_1
+connors_1
+consensus_1
+contents_1
+control_1
+cooperative_1
+cost_1
+cover_1
+craig_1
+critical_1
+critique_1
+d_1
+dale_1
+dan_1
+dan_2
+daniel_1
+darrell_1
+data_1
+database_1
+databases_1
+david_1
+davis_1
+dayal_1
+dbms_1
+dbmss_1
+declarative_1
+decouchant_1
+dennis_1
+design_1
+deterministic_1
+developing_1
+dewitt_1
+diederich_1
+directions_1
+distributed_1
+dittrich_1
+dittrich_2
+do_1
+dominique_1
+donald_1
+du_1
+e_1
+ealities_1
+early_1
+eda_1
+eduardo_1
+edward_1
+eliot_1
+ellis_1
+engine_1
+engineers_1
+enough_1
+environments_1
+epilogue_1
+eric_1
+eugene_1
+éva_1
+execution_1
+exodus_1
+expected_1
+experiences_1
+extended_1
+extending_1
+f_1
+fast_1
+features_1
+fishman_1
+flow_1
+for_1
+for_2
+form_1
+formal_1
+foundations_1
+frame_1
+frank_1
+fred_1
+frederick_1
+from_1
+future_1
+g_1
+gail_1
+gala_1
+galil_1
+garcia_1
+garcía_1
+garza_1
+gemstone_1
+generation_1
+genome_1
+geometric_1
+geometry_1
+gibbs_1
+gilkey_1
+goodman_1
+graham_1
+graphics_1
+graphs_1
+guide_1
+guido_1
+h_1
+h_2
+hanan_1
+hanson_1
+hardware_1
+harold_1
+hasan_1
+hector_1
+henry_1
+heterogeneity_1
+heterogeneous_1
+heytens_1
+hitting_1
+hoch_1
+hoffmann_1
+hong_1
+hopcroft_1
+hsu_1
+hull_1
+hulten_1
+ibarra_1
+implementation_1
+in_1
+indexing_1
+inegration_1
+inequalities_1
+inference_1
+information_1
+ingram_1
+ingres_1
+injun_1
+integrated_1
+interfaces_1
+interoperability_1
+interoperating_1
+introduction_1
+iris_1
+is_1
+isomorphism_1
+issues_1
+its_1
+j_1
+j_2
+j_3
+jack_1
+jacob_1
+james_1
+jason_1
+java_1
+jeffrey_1
+jennifer_1
+jiang_1
+jik_1
+jim_1
+joel_1
+john_1
+jordan_1
+jorge_1
+josé_1
+jurgen_1
+k_1
+kaiser_1
+kelley_1
+kemper_1
+kent_1
+kernighan_1
+kevin_1
+kifer_1
+kim_1
+kim_2
+king_1
+klaus_1
+knuth_1
+kotz_1
+koveos_1
+kowalski_1
+krieger_1
+kyung_1
+l_1
+lamport_1
+landscape_1
+language_1
+languages_1
+las_1
+latex_1
+laue_1
+lawrence_1
+legacy_1
+leichner_1
+lelescu_1
+leonidas_1
+leslie_1
+lewis_1
+lieberman_1
+lindholm_1
+linear_1
+linear_2
+list_1
+lochovsky_1
+log_1
+log_2
+log²_1
+lorenzo_1
+luks_1
+lunt_1
+lyngbæk_1
+lynn_1
+m_1
+m_2
+machine_1
+mahbod_1
+maier_1
+majorization_1
+making_1
+management_1
+manager_1
+managing_1
+manifolds_1
+manual_1
+mapping_1
+marek_1
+marie_1
+mark_1
+marshall_1
+megiddo_1
+meichun_1
+mellender_1
+meng_1
+message_1
+messages_1
+michael_1
+miguel_1
+milton_1
+min_1
+ming_1
+model_1
+models_1
+modern_1
+moerkotte_1
+moffat_1
+molina_1
+monty_1
+moon_1
+moss_1
+motro_1
+multidatabase_1
+multimedia_1
+multiuser_1
+my_1
+n_1
+n_2
+n²_1
+n_3
+n³_1
+n³log_1
+n_4
+nat_1
+nathan_1
+nc_1
+nc¹_1
+neimat_1
+next_1
+nierstrasz_1
+nikcevic_1
+nimrod_1
+nong_1
+nonmonotonic_1
+o_1
+o_2
+object_1
+object_2
+objects_1
+of_1
+of_2
+office_1
+olkin_1
+omg_1
+omiecinski_1
+on_1
+operations_1
+optimizing_1
+oql_1
+orientation_1
+oriented_1
+orion_1
+orlando_1
+orli_1
+oscar_1
+otis_1
+overview_1
+oz_1
+özsu_1
+p_1
+pairs_1
+parallel_1
+part_1
+path_1
+pegasus_1
+penney_1
+per_1
+performance_1
+peter_1
+philip_1
+physical_1
+pogo_1
+polytopes_1
+posc_1
+possibilities_1
+practice_1
+preface_1
+princiles_1
+probabilistic_1
+problems_1
+processing_1
+processor_1
+program_1
+programming_1
+promises_1
+proposal_1
+proteus_1
+query_1
+r_1
+r³_1
+rafi_1
+rafiul_1
+ralph_1
+rámon_1
+randomized_1
+ravi_1
+ravikumar_1
+reality_1
+reference_1
+references_1
+related_1
+relational_1
+representation_1
+requirements_1
+research_1
+resolving_1
+retrieval_1
+reyes_1
+richard_1
+richardson_1
+riegel_1
+río_1
+risch_1
+robert_1
+roger_1
+rowe_1
+rules_1
+running_1
+rusinkiewicz_1
+russinoff_1
+s_1
+samet_1
+scheevel_1
+schema_1
+schematic_1
+schnorr_1
+schonfeld_1
+schuchardt_1
+search_1
+section_1
+sedgewick_1
+semantics_1
+serge_1
+set_1
+sethi_1
+sets_1
+shan_1
+shared_1
+sharing_1
+shekita_1
+sheth_1
+shortest_1
+should_1
+silberschatz_1
+simon_1
+skarra_1
+smalltalk_1
+snodgrass_1
+software_1
+soley_1
+solution_1
+some_1
+sören_1
+spatial_1
+specification_1
+specifying_1
+sql_1
+stana_1
+standard_1
+standards_1
+stanley_1
+stavros_1
+stein_1
+stephen_1
+steve_1
+steven_1
+stonebraker_1
+storage_1
+story_1
+stout_1
+straw_1
+structures_1
+studies_1
+sunit_1
+support_1
+supporting_1
+survey_1
+system_1
+systems_1
+t_1
+tadao_1
+tai_1
+takaoka_1
+tamer_1
+tao_1
+tardos_1
+tarlton_1
+tarlton_2
+techniques_1
+technology_1
+temporal_1
+teresa_1
+test_1
+tex_1
+the_1
+theory_1
+thompson_1
+tim_1
+time_1
+title_1
+tla_1
+to_1
+tom_1
+tomlinson_1
+tools_1
+tore_1
+tracking_1
+transaction_1
+transactional_1
+transactions_1
+treaty_1
+trivalent_1
+tsichritzis_1
+ullman_1
+umeshwar_1
+uncerainty_1
+ungar_1
+unisql_1
+user_1
+v_1
+vázquez_1
+vázquez_2
+vegs_1
+version_1
+vianu_1
+victor_1
+video_1
+view_1
+vincent_1
+virtual_1
+visual_1
+vortex_1
+w_1
+w_2
+waarts_1
+walid_1
+walker_1
+wand_1
+waqar_1
+war_1
+weber_1
+weimin_1
+weinberger_1
+weintraub_1
+weiser_1
+weiyi_1
+where_1
+widom_1
+wilkinson_1
+william_1
+williams_1
+williams_2
+with_1
+woelk_1
+won_1
+workflows_1
+yair_1
+yellin_1
+yu_1
+yuri_1
+zdonik_1
+zvi_1
diff --git a/asterix-fuzzyjoin/data/pub-small/csx-small-id.txt b/asterix-fuzzyjoin/data/pub-small/csx-small-id.txt
new file mode 100644
index 0000000..78fb816
--- /dev/null
+++ b/asterix-fuzzyjoin/data/pub-small/csx-small-id.txt
@@ -0,0 +1,100 @@
+1:oai CiteSeerXPSU 10.1.1.39.1830:Object SQL - A Language for the Design and Implementation of Object Databases:Jurgen Annevelink Rafiul Ahad Amelia Carlson Dan Fishman Mike Heytens William Kent:2009-04-13 ly, a function application expression consists of two expressions a function reference (labelled func_ref in Figure 3 line 2), and an argument (labelled arg). The func_ref expression evaluates to a (generic or specific) function identifier, which may be the same as the function that the expression is a part of, thus allowing recursive function invocations. The expression labelled arg evaluates to an arbitrary object or aggregate object. The semantics of evaluating function applications was discussed in detail in section 2. For example, to set the name of a person, we evaluate the following expression FunAssign(function name.person) (p1,'John') In this example, the first expression is itself a function call, applying the function FunAssign to the function name.person (an example of a specific function reference). This returns the oid of the function that sets a person's name, which is subsequently applied to a tuple of two elements, the oid of the person and the new name (a string o... CiteSeerX ACM Press 2009-04-13 2007-11-22 1994 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.39.1830 http //www.tu-chemnitz.de/~igrdb/docs/OpenODB/osql.ps.gz en 10.1.1.31.2534 10.1.1.28.4658 10.1.1.44.5947 10.1.1.39.199 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+2:oai CiteSeerXPSU 10.1.1.1.1483:Candidate high myopia loci on chromosomes 18p and 12q do not play a major role in susceptibility to common myopia:Grace Ibay Betty Doan Lauren Reider Debra Dana Melissa Schlifka Heping Hu Taura Holmes Jennifer O'Neill Robert Owens Elise Ciner Joan Bailey-Wilson Dwight Stambolian:2009-05-24 Background To determine whether previously reported loci predisposing to nonsyndromic high myopia show linkage to common myopia in pedigrees from two ethnic groups Ashkenazi Jewish and Amish. We hypothesized that these high myopia loci might exhibit allelic heterogeneity and be responsible for moderate /mild or common myopia. Methods Cycloplegic and manifest refraction were performed on 38 Jewish and 40 Amish families. Individuals with at least -1.00 D in each meridian of both eyes were classified as myopic. Genomic DNA was genotyped with 12 markers on chromosomes 12q21-23 and 18p11.3. Parametric and nonparametric linkage analyses were conducted to determine whether susceptibility alleles at these loci are important in families with less severe, clinical forms of myopia. Results There was no strong evidence of linkage of common myopia to these candidate regions all two-point and multipoint heterogeneity LOD scores were < 1.0 and non-parametric linkage p-values were > 0.01. However, one Amish family showed slight evidence of linkage (LOD>1.0) on 12q another 3 Amish families each gave LOD >1.0 on 18p and 3 Jewish families each gave LOD >1.0 on 12q. Conclusions Significant evidence of linkage (LOD> 3) of myopia was not found on chromosome 18p or 12q loci in these families. These results suggest that these loci do not play a major role in the causation of common myopia in our families studied. CiteSeerX 2009-05-24 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1483 http //www.biomedcentral.com/content/pdf/1471-2350-5-20.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+3:oai CiteSeerXPSU 10.1.1.1.1484:Winner-Take-All Network Utilising Pseudoinverse Reconstruction Subnets Demonstrates Robustness on the Handprinted Character Recognition Problem:J. Körmendy-rácz S. Szabó J. Lörincz G. Antal G. Kovács A. Lörincz:2009-05-24 Correspondence and offprint requests to J. Kormendy-Rácz Wittmeyer’s pseudoinverse iterative algorithm is formulated as a dynamic connectionist Data Compression and Reconstruction (DCR) network, and subnets of this type are supplemented by the winner-take-all paradigm. The winner is selected upon the goodness-of-fit of the input reconstruction. The network can be characterised as a competitive-cooperative-competitive architecture by virtue of the contrast enhancing properties of the pseudoinverse subnets. The network is capable of fast learning. The adopted learning method gives rise to increased sampling in the vicinity of dubious boundary regions that resembles the phenomenon of categorical perception. The generalising abilities of the scheme allow one to utilise single bit connection strengths. The network is robust against input noise and contrast levels, shows little sensitivity to imprecise connection strengths, and is promising for mixed VLSI implementation with on-chip learning properties. The features of the DCR network are demonstrated on the NIST database of handprinted characters. CiteSeerX Springer 2009-05-24 2007-11-19 1999 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1484 http //people.inf.elte.hu/lorincz/Files/publications/WTA_NCA.pdf en 10.1.1.134.6077 10.1.1.65.2144 10.1.1.54.7277 10.1.1.48.5282 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+4:oai CiteSeerXPSU 10.1.1.1.1485:DEEM a Tool for the Dependability Modeling and Evaluation:A. Bondavalli I. Mura S. Chiaradonna S. Poli F. Sandrini:2009-05-24 Processes Multiple-Phased Systems, whose operational life can be partitioned in a set of disjoint periods, called ¿phases¿ include several classes of systems such as Phased Mission Systems and Scheduled Maintenance Systems. Because of their deployment in critical applications, the dependability modeling and analysis of Multiple-Phased Systems is a task of primary relevance. However, the phased behavior makes the analysis of Multiple-Phased Systems extremely complex. This paper is centered on the description and application of DEEM, a dependability modeling and evaluation tool for Multiple Phased Systems. DEEM supports a powerful and efficient methodology for the analytical dependability modeling and evaluation of Multiple Phased Systems, based on Deterministic and Stochastic Petri Nets and on Markov Regenerative Processes. CiteSeerX IEEE Computer Society 2009-05-24 2007-11-19 2000 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1485 http //bonda.cnuce.cnr.it/Documentation/Papers/file-BMCFPS00-DSN2000-76.pdf en 10.1.1.47.2594 10.1.1.58.2039 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+5:oai CiteSeerXPSU 10.1.1.1.1486:Dynamical networks in function dynamics:Naoto Kataoka Kunihiko Kaneko:2009-05-24 Function dynamics Iterated map Self-reference Dynamical network As a first step toward realizing a dynamical system that evolves while spontaneously determining its own rule for time evolution, function dynamics (FD) is analyzed. FD consists of a functional equation with a self-referential term, given as a dynamical system of a one-dimensional map. Through the time evolution of this system, a dynamical graph (a network) emerges. This graph has three interesting properties (i) vertices appear as stable elements, (ii) the terminals of directed edges change in time, and (iii) some vertices determine the dynamics of edges, and edges determine the stability of the vertices, complementarily. Two aspects of FD are studied, the generation of a graph (network) structure and the dynamics of this graph (network) in the system. CiteSeerX 2009-05-24 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1486 http //chaos.c.u-tokyo.ac.jp/others/kataoka03.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+6:oai CiteSeerXPSU 10.1.1.1.1487:Simulation Prototyping:Ingolf Ståhl:2009-04-24 A simulation model is successful if it leads to policy action, i.e., if it is implemented. Studies show that for a model to be implemented, it must have good correspondence with the mental model of the system held by the user of the model. The user must feel confident that the simulation model corresponds to this mental model. An understanding of how the model works is required. Simulation models for implementation must be developed step by step, starting with a simple model, the simulation prototype. After this has been explained to the user, a more detailed model can be developed on the basis of feedback from the user. Software for simulation prototyping is discussed, e.g., with regard to the ease with which models and output can be explained and the speed with which small models can be written. CiteSeerX 2009-04-24 2007-11-19 2002 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1487 http //www.informs-cs.org/wsc02papers/073.pdf en 10.1.1.17.7647 10.1.1.134.3230 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+7:oai CiteSeerXPSU 10.1.1.1.1488:Hedging beyond duration and convexity:Jian Chen Michael C. Fu:2009-05-24 Hedging of fixed income securities remains one of the most challenging problems faced by financial institutions. The predominantly used measures of duration and convexity do not completely capture the interest rate risks borne by the holder of these securities. Using historical data for the entire yield curve, we perform a principal components analysis and find that the first four factors capture over 99.99% of the yield curve variation. Incorporating these factors into the pricing of arbitrary fixed income securities via Monte Carlo simulation, we derive perturbation analysis (PA) estimators for the price sensitivities with respect to the factors. Computational results for mortgage-backed securities (MBS) indicate that using these sensitivity measures in hedging provides far more protection against interest risk exposure than the conventional measures of duration and convexity. CiteSeerX 2009-05-24 2007-11-19 2002 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1488 http //www.informs-cs.org/wsc02papers/218.pdf en 10.1.1.113.9305 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+8:oai CiteSeerXPSU 10.1.1.1.1489:Designing for social friction Exploring ubiquitous computing as means of cultural interventions in urban space:Rune Huvendick Jensen Tau Ulv Lenskjold:2009-05-24 everyday life urban space Situationism As ubiquitous computing emerges in our lives and cities new opportunities for artistic and otherwise cultural interventions in urban space follow, but so far not much work has been done in order to articulate the socio-cultural significance of these new opportunities. This paper is part of a general attempt to develop a coherent understanding of the implications and potentials of ubiquitous computing in the context of everyday city life. On a more specific level the paper examines how the notion of social friction can be helpful in the development and analysis of ubiquitous computing in relation to art and design. Social friction is articulated as a critical position, which could be applied as a strategy for design. Our approach consists of a theoretical analysis and precedes concrete development and real-life experiments. As such the paper aims to establish a steppingstone from which to launch actual digital designs. We argue that by designing for the social friction, which is an intrinsic characteristic of everyday life, new forms of social and cultural potentials can be released. By means of discussing CityNova, a vision for a possible use of ubiquitous computing in urban space, we explore how this approach might lead to systems that create new ways of experiencing the city. CiteSeerX 2009-05-24 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1489 http //asp.cbs.dk/cade2004/proceedings/fullpapers/7_jensen_final_fullpaper.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+9:oai CiteSeerXPSU 10.1.1.1.1490:Optimal Combination of Number of Taps and Coefficient Bit-Width for Low Power FIR Filter Realization:João Portela Eduardo Costa José Monteiro:2009-05-24 This paper addresses the optimization of FIR filters for low power. We propose a search algorithm to find the combination of the number of taps and coe#cient bit-width that leads to the minimum number of total partial sums, and hence to the least power consumption. We show that the minimum number of taps does not necessarily lead to the least power consumption in fully parallel FIR filter architectures. This is particularly true if the reduction of the bit-width of the coe#cients is taken into account. We show that power is directly related to the total number of partial sums in the FIR filter, which in turn is determined by the number of bits set to 1 in the coe#cients. We have developed a search algorithm that achieves up to 36% less power consumption when compared to an implementation using the minimum number of taps. CiteSeerX 2009-05-24 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1490 http //tahoe.inesc-id.pt/pt/Ficheiros/1188.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+10:oai CiteSeerXPSU 10.1.1.1.1491:The Influence of a Course on Direct and Activating Instruction upon Student Teachers' Classroom Practice:Simon Veenman Eddie Denessen Ingrid Van Den Oord Ferdy Naafs:2009-05-24 Educational research has highlighted the importance of maintaining an orderly classroom environment and providing both clear and well-organized instruction tailored to the needs of individual students. Time spent on direct instruction and particularly the direct instruction of basic skills is associated with school learning (Wang, Haertel & Walberg, 1993). With the increased interest in constructivistic conceptions of learning and teaching today, educators with constructivistic orientations contend that various forms of knowledge and skills are applied more generally when constructed by the learners themselves as opposed to explicitly taught "knowledge is made, not acquired" (Phillips, 2000, p. 7). Such a view nevertheless often leads to an inclination to reject direct instruction by the teacher (see, for example, Brooks & Brooks, 1993). It should be noted, however, that many of the discussions of constructivistic orientations to learning and instruction are at the level of slogan and cliché (Duffy & Cunningham, 1996 Finn & Ravitch, 1996 Kozloff, 1998). In addition, the term constructivism has come to serve as an umbrella term for a diversity of views (Phillips, 1995 2000). CiteSeerX 2009-05-24 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1491 http //www.socsci.kun.nl/ped/owk/onderwijs/cursussen/io242/papers/earli2003_direct_instruction.pdf en 10.1.1.29.1993 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+11:oai CiteSeerXPSU 10.1.1.1.1492:Multiplanar Applications and Multimodal Networks:S. Keshav:2009-05-24 Future applications network architecture We believe that a broad class of future applications will span both the Internet and the telephone network because such multiplanar applications have several economic and architectural advantages over conventional ones. We also envision the close interlinking of the telephone network and the Internet to form a multimodal network. In this paper, we describe these applications and networks, outline their architecture, and present our experiences in constructing a prototype multiplanar application. CiteSeerX 2009-05-24 2007-11-19 1999 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1492 http //www.cs.cornell.edu/skeshav/papers/openarch99.pdf en 10.1.1.17.5614 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+12:oai CiteSeerXPSU 10.1.1.1.1493:Free-Riding and Whitewashing in Peer-to-Peer Systems:Michal Feldman Christos Papadimitriou John Chuang Ion Stoica:2009-05-24 We devise a simple model to study the phenomenon of free-riding and the effect of free identities on user behavior in peer-to-peer systems. At the heart of our model is a strategic user of a certain type, an intrinsic and private parameter that reflects the user's generosity. The user decides whether to contribute or free-ride based on how the current burden of contributing in the system compares to her type. We derive the emerging cooperation level in equilibrium and quantify the effect of providing free-riders with degraded service on the emerging cooperation. We find that this penalty mechanism is beneficial mostly when the "generosity level" of the society (i.e., the average type) is low. To quantify the social cost of free identities, we extend the model to account for dynamic scenarios with turnover (users joining and leaving) and with whitewashers users who strategically leave the system and re-join with a new identity. We find that the imposition of penalty on all legitimate newcomers incurs a significant social loss only under high turnover rates in conjunction with intermediate societal generosity levels. CiteSeerX 2009-05-24 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1493 http //www.dtc.umn.edu/weis2004/feldman.pdf en 10.1.1.18.6611 10.1.1.12.2253 10.1.1.12.6173 10.1.1.11.4846 10.1.1.127.6039 10.1.1.130.2771 10.1.1.30.6376 10.1.1.1.3415 10.1.1.42.1211 10.1.1.15.3743 10.1.1.11.6993 10.1.1.13.9315 10.1.1.12.9564 10.1.1.6.1046 10.1.1.101.3865 10.1.1.112.397 10.1.1.77.6233 10.1.1.76.9348 10.1.1.60.8693 10.1.1.1.3899 10.1.1.84.7582 10.1.1.116.3542 10.1.1.59.9845 10.1.1.119.8132 10.1.1.132.4448 10.1.1.143.2268 10.1.1.60.4396 10.1.1.87.4710 10.1.1.137.696 10.1.1.60.8756 10.1.1.97.1922 10.1.1.105.4069 10.1.1.76.3341 10.1.1.80.5437 10.1.1.93.1336 10.1.1.131.1666 10.1.1.102.2386 10.1.1.108.4119 10.1.1.108.5840 10.1.1.109.7946 10.1.1.66.8436 10.1.1.73.5608 10.1.1.73.5797 10.1.1.75.4852 10.1.1.83.2020 10.1.1.85.8881 10.1.1.87.5160 10.1.1.92.3905 10.1.1.92.5572 10.1.1.95.1197 10.1.1.95.6876 10.1.1.110.4873 10.1.1.111.8663 10.1.1.112.1051 10.1.1.113.5715 10.1.1.113.7254 10.1.1.115.348 10.1.1.120.8054 10.1.1.123.6169 10.1.1.128.7663 10.1.1.129.3166 10.1.1.134.3014 10.1.1.135.98 10.1.1.59.9602 10.1.1.129.9940 10.1.1.141.1032 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+13:oai CiteSeerXPSU 10.1.1.1.1494:A Simple Algorithm for Complete Motion Planning of Translating Polyhedral Robots:Gokul Varadhan Shankar Krishnan T. V. N. Sriram Dinesh Manocha:2009-05-24 We present an algorithm for complete path planning for translating polyhedral robots in 3D. Instead of exactly computing an explicit representation of the free space, we compute a roadmap that captures its connectivity. This representation encodes the complete connectivity of free space and allows us to perform exact path planning. We construct the roadmap by computing deterministic samples in free space that lie on an adaptive volumetric grid. Our algorithm is simple to implement and uses two tests a complex cell test and a star-shaped test. These tests can be efficiently performed on polyhedral objects using max-norm distance computation and linear programming. The complexity of our algorithm varies as a function of the size of narrow passages in the configuration space. We demonstrate the performance of our algorithm on environments with very small narrow passages or no collision-free paths. CiteSeerX Sage Publications, Inc. 2009-05-24 2007-11-19 2005 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1494 http //www.cs.unc.edu/~varadhan/papers/motion.pdf en 10.1.1.19.3462 10.1.1.20.8539 10.1.1.52.7808 10.1.1.31.1678 10.1.1.34.1071 10.1.1.88.5053 10.1.1.1.3224 10.1.1.66.385 10.1.1.58.2466 10.1.1.76.8798 10.1.1.88.305 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+14:oai CiteSeerXPSU 10.1.1.1.1495:Modeling Ship Arrivals in Ports:Eelco van Asperen Rommert Dekker Mark Polman Henk de Swaan Arons:2009-04-29 The model used in this report focuses on the analysis of ship waiting statistics and stock fluctuations under different arrival processes. However, the basic outline is the same central to both models are a jetty and accompanying tankfarm facilities belonging to a new chemical plant in the Port of Rotterdam. Both the supply of raw materials and the export of finished products occur through ships loading and unloading at the jetty. Since disruptions in the plants production process are very expensive, buffer stock is needed to allow for variations in ship arrivals and overseas exports through large ships. Ports provide jetty facilities for ships to load and unload their cargo. Since ship delays are costly, terminal operators attempt to minimize their number and duration. Here, simulation has proved to be a very suitable tool. However, in port simulation models, the impact of the arrival process of ships on the model outcomes tends to be underestimated. This article considers three arrival processes stock-controlled, equidistant per ship type, and Poisson. We assess how their deployment in a port simulation model, based on data from a real case study, affects the efficiency of the loading and unloading process. Poisson, which is the chosen arrival process in many client-oriented simulations, actually performs worst in terms of both ship delays and required storage capacity. Stock-controlled arrivals perform best with regard to ship delays and required storage capacity. In the case study two types of arrival processes were considered. The first type are the so-called stock-controlled arrivals, i.e., ship arrivals are scheduled in such a way, that a base stock level is maintained in the tanks. Given a base stock level of a raw material or ... CiteSeerX 2009-04-29 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1495 http //www.informs-cs.org/wsc03papers/222.pdf en 10.1.1.17.1837 10.1.1.120.9692 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+15:oai CiteSeerXPSU 10.1.1.1.1496:In Proceedings of the16th IFAC Symposium on Automatic Control in Aerospace, Elsevier Science Ltd, Oxford,:Uk Cognitive Tools Donald Sofge Dennis Perzanowski Marjorie Skubic Magdalena Bugajska J. Gregory Trafton Nicholas Cassimatis Derek Brock William Adams Alan Schultz:2009-04-19 Cognitive Systems Co-operative Control Speech Recognition Natural Language Human-Machine Interface Autonomous Mobile Robots The effective use of humanoid robots in space will depend upon the efficacy of interaction between humans and robots. The key to achieving this interaction is to provide the robot with sufficient skills for natural communication with humans so that humans can interact with the robot almost as though it were another human. This requires that a number of basic capabilities be incorporated into the robot, including voice recognition, natural language, and cognitive tools on-board the robot to facilitate interaction between humans and robots through use of common representations and shared humanlike behaviors. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1496 http //www.aic.nrl.navy.mil/~dennisp/ifac.aca.2004.pdf en 10.1.1.13.8248 10.1.1.101.9124 10.1.1.58.5211 10.1.1.99.4007 10.1.1.58.4797 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+16:oai CiteSeerXPSU 10.1.1.1.1497:Exploiting Computer Automation to Improve the Interview Process and Increase Survey Cooperation:Jeffrey C. Moore Jeffrey C. Moore:2009-04-19 burden conversational norms efficiency flow nonresponse/attrition questionnaire design respondent-friendly I. In Couper (2002) outlines the "challenges and opportunities" of recent and stillemerging technological developments on the conduct of survey research. This paper focuses on one such development -- the use of computer-assisted survey instruments in place of paper-andpencil questionnaires -- and it focuses on one particular opportunity which this development presents the ability to improve efficiency, "flow," and naturalness, and in general make the interview experience a more pleasant one for all participants, while still controlling question wording and sequencing. Moral arguments can be raised in defense of such efforts the potential for important practical benefits, including improved survey cooperation, lends more mundane but perhaps more potent support. Although the research literature is surprisingly scant, there is some evidence that improved instrument design can reduce nonresponse. A recent effort by the U.S. Census Bureau to redesign the core instrument for the Survey of Income and Program Participation (SIPP) offers additional support. Motivated in large measure by evidence of increasing unit nonresponse and attrition, the primary goal of the SIPP redesign effort was to improve the interview process, and in particular to seek ways to avoid violations of conversational norms (e.g., Grice, 1975). A great many of the SIPP interview process improvements would not have been feasible without the computerization of the survey instrument. This paper briefly summarizes many of the technology-based changes implemented in the SIPP instrument, and briefly describes a set of field experiments used to develop and refine the new procedures and to evaluate their success in achieving SIPP's redesign goals. Keywords burden, conversational norms, efficiency, flow, nonresponse/... CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1497 http //www.census.gov/srd/papers/pdf/rsm2004-01.pdf en 10.1.1.131.9305 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+17:oai CiteSeerXPSU 10.1.1.1.1499:Performance and Design Evaluation:Of The Raid-Ii Peter M. Chen Edward K. Lee Ann L. Drapeau Ethan L. Miller Srinivasan Seshan Ken Shirriff David A. Patterson Y H. Katz:2009-04-19 RAID-II is a high-bandwidth, networkattached storage server designed and implemented at the University of California at Berkeley. In this paper, we measure the performance of RAID-II and evaluate various architectural decisions made during the design process. We first measure the end-to-end performance of the system to be approximately 20 MB/s for both disk array reads and writes. We then perform a bottleneck analysis by examining the performance of each individual subsystem and conclude that the disk subsystem limits performance. By adding a custom interconnect board with a high-speed memory and bus system and parity engine, we are able to achieve a performance speedup of 8 to 15 over a comparative system using only off-theshelf hardware. CiteSeerX 2009-04-19 2007-11-19 1994 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1499 http //ssrc.cse.ucsc.edu/~elm/Papers/ipps93.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+18:oai CiteSeerXPSU 10.1.1.1.1500:The Marinov Motor, Notional Induction:Without Magnetic Field J. P Wesley:2009-04-19 Introduction The force of induction F on a charge q is given by FA=-qtcdd,(1) where A is the usual magnetic vector potential defined by A r rJr rrc - s ,(2) where J is the current density. Slowly varying effects are assumed here, where the basic theory may be given as a true relativity theory, involving the separation distance between two charges and its time derivatives. This force of induction, Eq. (1), yields Faraday's law of electromagnetic induction for the special case of an electromotive force (emf) around a fixed closed loop. In particular, emf d d d d d ' & ( 0 ) =- =- =- s s s sF q s tc tc an tc an A B ,(3) where F is the magnetic flux through the loop. It is observed in the laboratory that an emf is also induced when =A tc 0 , and the magnetic flux through the loop is changed by moving the loop, so Faraday's law becomes emf = - .-(4) Francisco Mller's (1987) experiments show that induction occurs locally and that the force CiteSeerX 2009-04-19 2007-11-19 2001 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1500 http //redshift.vif.com/JournalFiles/Pre2001/V05NO3PDF/v05n3wes.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+19:oai CiteSeerXPSU 10.1.1.1.1501:Biometric Verification Based on Grip-Pattern Recognition:Raymond Veldhuis Asker Bazen Joost Kauffman Pieter Hartel:2009-04-19 Biometric verification likelihood ratio smart gun grip-pattern recognition This paper describes the design, implementation and evaluation of a user-verification system for a smart gun, which is based on grip-pattern recognition. An existing pressure sensor consisting of an array of 44 44 piezoresistive elements is used to measure the grip pattern. An interface has been developed to acquire pressure images from the sensor. The values of the pixels in the pressure-pattern images are used as inputs for a verification algorithm, which is currently implemented in software on a PC. The verification algorithm is based on a likelihoodratio classifier for Gaussian probability densities. First results indicate that it is feasible to use grip-pattern recognition for biometric verification. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1501 http //www.ub.utwente.nl/webdocs/ctit/1/000000f5.pdf en 10.1.1.9.5838 10.1.1.101.5555 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+20:oai CiteSeerXPSU 10.1.1.1.1502:Relativistic Doppler Effect and the Principle of Relativity:W. Engelhardt:2009-04-19 Relativity Doppler Effect Aberration a private address Fasaneriestrasse 8 D-80636 München The frequency shifts predicted by the `relativistic' Doppler e#ect are derived in the photon picture of light. It turns out that, in general, the results do not depend exclusively on the relative velocity between observer and light source. CiteSeerX 2009-04-19 2007-11-19 2003 text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1502 http //redshift.vif.com/JournalFiles/V10NO4PDF/V10N4ENG.PDF en 10.1.1.58.3335 10.1.1.140.9931 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+21:oai CiteSeerXPSU 10.1.1.1.1503:Differential Association Rule Mining for the Study of Protein-Protein Interaction Networks:Christopher Besemann Anne Denton Ajay Yekkirala Ron Hutchison Marc Anderson:2009-04-19 Protein-protein interactions are of great interest to biologists. A variety of high-throughput techniques have been devised, each of which leads to a separate definition of an interaction network. The concept of differential association rule mining is introduced to study the annotations of proteins in the context of one or more interaction networks. Differences among items across edges of a network are explicitly targeted. As a second step we identify differences between networks that are separately defined on the same set of nodes. The technique of differential association rule mining is applied to the comparison of protein annotations within an interaction network and between different interaction networks. In both cases we were able to find rules that explain known properties of protein interaction networks as well as rules that show promise for advanced study. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1503 http //www.cs.ndsu.nodak.edu/%7Eadenton/publications/BIOKDD040712a.pdf en 10.1.1.40.6984 10.1.1.40.9892 10.1.1.12.6495 10.1.1.13.6963 10.1.1.113.6042 10.1.1.36.2485 10.1.1.10.7611 10.1.1.18.8344 10.1.1.12.7211 10.1.1.32.7066 10.1.1.12.3538 10.1.1.56.7889 10.1.1.12.8995 10.1.1.11.2425 10.1.1.27.9671 10.1.1.21.3747 10.1.1.1.5073 10.1.1.66.4476 10.1.1.67.617 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+22:oai CiteSeerXPSU 10.1.1.1.1504:Combining Adaptive and Deterministic Routing Evaluation of a Hybrid Router:Dianne Kumar And Dianne Kumar Walid A. Najjar:2009-04-19 This paper reports on the implementation and evaluation ofahybrid routing scheme that combines the advantages of deterministic and adaptive routing. An expanded version of this paper can be found in #1# In the deterministic, or dimension-ordered, routing algorithm a message is routed along decreasing dimensions with a dimension decrease occurring only when zero hops remain in all higher dimensions. Virtual channels #VCs# are included in the router to avoid deadlock #6#. Deterministic routing can su#er from congestion since only a single path between source and destination can be used CiteSeerX Springer 2009-04-19 2007-11-19 1999 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1504 http //www.cs.ucr.edu/%7Enajjar/papers/canpc99.pdf en 10.1.1.117.7403 10.1.1.80.5595 10.1.1.44.6553 10.1.1.53.4729 10.1.1.22.6577 10.1.1.24.4663 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+23:oai CiteSeerXPSU 10.1.1.1.1505:Towards a More Complete Model of Role:Adrian Baldwin Cheh Goh Cheh Goh:2009-04-19 In order to manage the use of roles for the purpose of access control, it is important to look at attributes beyond the consideration of capability assignment. Fundamentally, a generic attribute description using a constraint-based approach will allow many of the important aspects of role, such as scope, activation and deactivation, to be included. Furthermore, the commonly accepted concept of role hierarchy is challenged from the point of view of subsidiarity in real organisations, with the suggestion that role hierarchy has limited usefulness that does not seem to apply widely. CiteSeerX 2009-04-19 2007-11-19 1998 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1505 http //www.cs.kun.nl/is/Library/./Data/1998/Goh/Towards/1998-Goh-Towards.pdf en 10.1.1.47.3914 10.1.1.26.2311 10.1.1.45.1616 10.1.1.18.5632 10.1.1.103.8527 10.1.1.11.1495 10.1.1.21.1056 10.1.1.18.6290 10.1.1.88.1656 10.1.1.130.4738 10.1.1.57.6574 10.1.1.10.7333 10.1.1.21.3059 10.1.1.76.1573 10.1.1.85.2890 10.1.1.14.6686 10.1.1.59.2363 10.1.1.93.2667 10.1.1.100.3563 10.1.1.105.1864 10.1.1.88.5400 10.1.1.88.7033 10.1.1.90.1790 10.1.1.90.2382 10.1.1.90.3968 10.1.1.91.1743 10.1.1.94.4357 10.1.1.124.6837 10.1.1.130.3601 10.1.1.18.2266 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+24:oai CiteSeerXPSU 10.1.1.1.1506:Target Tracking with Distributed Sensors The Focus of Attention Problem:V. Isler Sanjeev Khanna J. Spletzer C. J. Taylor Volkan Isler A Camillo J. Taylor A:2009-04-19 In this paper, we consider the problem of assigning sensors to track targets so as to minimize the expected error in the resulting estimation for target locations. Specifically, we are interested in how disjoint pairs of bearing or range sensors can be best assigned to targets in order to minimize the expected error in the estimates. We refer to this as the focus of attention (FOA) problem. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1506 http //www.cis.upenn.edu/%7Eisleri/research/papers/foaCVIU.pdf en 10.1.1.36.8357 10.1.1.18.8488 10.1.1.19.8778 10.1.1.14.3443 10.1.1.58.6802 10.1.1.49.9816 10.1.1.84.4195 10.1.1.144.2859 10.1.1.71.7438 10.1.1.78.7656 10.1.1.126.3811 10.1.1.130.1224 10.1.1.132.8302 10.1.1.58.5357 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+25:oai CiteSeerXPSU 10.1.1.1.1507:Creating an Integrated Computer Assisted Learning and Assessment Experience in the School of European Languages and Cultures at the University of Edinburgh:John Hobbs John Hobbs Marcus Duran Marcus Duran Eh Jx:2009-04-19 In the field of Computer-Aided anything, acronyms abound. They are, after all, useful tools. However, there is a risk that we become constrained by them and, as a result, fail to see beyond them. CiteSeerX 2009-04-19 2007-11-19 2002 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1507 http //www.lboro.ac.uk/service/ltd/flicaa/conf2002/pdfs/hobbs_jm.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+26:oai CiteSeerXPSU 10.1.1.1.1508:Web Structure Analysis for Information Mining:Lakshmi Vijjappu Ah-Hwee Ah-hwee Tan Chew-lim Tan:2009-04-19 Our approach to extracting information from the web analyzes the structural content of web pages through exploiting the latent information given by HTML tags. For each specific extraction task, an object model is created consisting of the salient fields to be extracted and the corresponding extraction rules based on a library of HTML parsing functions. We derive extraction rules for both single-slot and multiple-slot extraction tasks which we illustrate through two sample domains. CiteSeerX 2009-04-19 2007-11-19 2001 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1508 http //www.csc.liv.ac.uk/~wda2001/Papers/18_lakshmi_wda2001.pdf en 10.1.1.46.6008 10.1.1.32.8501 10.1.1.51.8159 10.1.1.54.3298 10.1.1.20.8120 10.1.1.36.6286 10.1.1.47.6312 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+27:oai CiteSeerXPSU 10.1.1.1.1509:Bayesian Inference for Transductive Learning of Kernel Matrix Using the Tanner-Wong Data Augmentation Algorithm:Zhihua Zhang Dit-Yan Yeung James T. Kwok:2009-04-19 In kernel methods, an interesting recent development seeks to learn a good kernel from empirical data automatically. In this paper, by regarding the transductive learning of the kernel matrix as a missing data problem, we propose a Bayesian hierarchical model for the problem and devise the Tanner-Wong data augmentation algorithm for making inference on the model. The Tanner-Wong algorithm is closely related to Gibbs sampling, and it also bears a strong resemblance to the expectation-maximization (EM) algorithm. For an e#cient implementation, we propose a simplified Bayesian hierarchical model and the corresponding TannerWong algorithm. We express the relationship between the kernel on the input space and the kernel on the output space as a symmetric-definite generalized eigenproblem. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1509 http //www.cs.ust.hk/~zhzhang/papers/icml04tw.pdf en 10.1.1.133.4884 10.1.1.16.1922 10.1.1.23.6757 10.1.1.37.8662 10.1.1.72.509 10.1.1.71.5318 10.1.1.94.7695 10.1.1.119.4637 10.1.1.102.9977 10.1.1.73.7176 10.1.1.77.7873 10.1.1.112.9663 10.1.1.116.111 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+28:oai CiteSeerXPSU 10.1.1.1.1510:Genetic Improvisation Model - a framework for real-time performance environments:Paul Nemirovsky Richard Watson:2009-04-19 This paper presents the current state in an ongoing development of the Genetic Improvisation Model (GIM) a framework for the design of real-time improvisational systems. The aesthetic rationale for the model is presented, followed by a discussion of its general principles. A discussion of the Emonic Environment, a networked system for audiovisual creation built on GIM's principles, follows CiteSeerX 2009-04-19 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1510 http //www.media.mit.edu/~pauln/research/emonic/docs/evomusart2003.pdf en 10.1.1.46.6615 10.1.1.58.3628 10.1.1.1.4031 10.1.1.57.9915 10.1.1.59.1804 10.1.1.69.8249 10.1.1.90.5797 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+29:oai CiteSeerXPSU 10.1.1.1.1511:Mean-Variance Hedging under Additional:Market Information Frank Frank Thierbach Frank Thierbach:2009-04-19 JEL classification G11 G12 In this paper we analyse the mean-variance hedging approach in an incomplete market under the assumption of additional market information, which is represented by a given, finite set of observed prices of non-attainable contingent claims. Due to no-arbitrage arguments, our set of investment opportunities increases and the set of possible equivalent martingale measures shrinks. Therefore, we obtain a modified mean-variance hedging problem, which takes into account the observed additional market information. Solving this by means of the techniques developed by Gourieroux, Laurent and Pham (1998), we obtain an explicit description of the optimal hedging strategy and an admissible, constrained variance-optimal signed martingale measure, that generates both the approximation price and the observed option prices. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1511 http //www.finasto.uni-bonn.de/papers/mvhedging.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+30:oai CiteSeerXPSU 10.1.1.1.1512:Sis-Prueba:Tool For Rapid Pedro Concejero Cerezo Juan José Rodríguez Soler Daniel Tapias Merino Telefónica Móviles España:2009-04-19 SIS PRUEBA is a software tool to integrate usability and user-centred design principles in the development process of services within Telefnica Mviles Espaa (TME), the largest mobile telecommunications operator in Spain. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1512 http //sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS//Vol-103/concejero-et-al.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+31:oai CiteSeerXPSU 10.1.1.1.1513:Reducing the Computational Load of Energy Evaluations for:Protein Folding Eunice Eunice E. Santos Eugene Santos:2009-04-19 protein folding triangular lattice HP energy model caching reuse evolutionary Predicting the native conformation using computational protein models requires a large number of energy evaluations even with simplified models such as hydrophobic-hydrophilic (HP) models. Clearly, energy evaluations constitute a significant portion of computational time. We hypothesize that given the structured nature of algorithms that search for candidate conformations such as stochastic methods, energy evaluation computations can be cached and reused, thus saving computational time and e#ort. In this paper, we present a caching approach and apply it to 2D triangular HP lattice model. We provide theoretical analysis and prediction of the expected savings from caching as applied this model. We conduct experiments using a sophisticated evolutionary algorithm that contains elements of local search, memetic algorithms, diversity replacement, etc. in order to verify our hypothesis and demonstrate a significant level 1 of savings in computational e#ort and time that caching can provide. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1513 http //excalibur.brc.uconn.edu/Papers/Conference/bibe04-submit.pdf en 10.1.1.53.7409 10.1.1.46.770 10.1.1.10.5827 10.1.1.54.8912 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+32:oai CiteSeerXPSU 10.1.1.1.1514:Human-Robot Interactions in Active Sensor Networks:Alexei Makarenko Tobias Kaupp Ben Grocholsky Hugh Durrant-whyte:2009-04-19 This paper considers the modes of interaction between one or several human operators and an active sensor network -- a fully decentralized network of sensors some or all of which have actuators and are in that sense active. The primary goal of this study is to investigate the conditions under which the human involvement will not jeopardize scalability of the overall system. Two aspects of human-robot interaction are considered the ways in which the global view of the system may be conveyed to the operators, and how the operators may influence the behavior of the system during the course of its operation. The results of analysis favor peer-topeer information-based interactions between the operators and the network whereby the humans act as extended sensors and communication nodes of the network itself. Experiments on an indoor active sensor network are described. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1514 http //www.grasp.upenn.edu/~bpg/makarenko03_hum_rob_in_asn.pdf en 10.1.1.110.6935 10.1.1.18.2772 10.1.1.70.4661 10.1.1.42.1968 10.1.1.14.7286 10.1.1.123.9637 10.1.1.58.5090 10.1.1.11.6081 10.1.1.58.5988 10.1.1.4.8345 10.1.1.58.4968 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+33:oai CiteSeerXPSU 10.1.1.1.1515:Disjoint Sum of Product Minimization by Evolutionary Algorithms:Nicole Drechsler Mario Hilgemeier Görschwin Fey Rolf Drechsler:2009-04-19 Recently, an approach has been presented to minimize Disjoint Sumof -Products (DSOPs) based on Binary Decision Diagrams (BDDs). Due to the symbolic representation of cubes for large problem instances, the method is orders of magnitude faster than previous enumerative techniques. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1515 http //www.informatik.uni-bremen.de/agra/doc/work/evohot04.pdf en 10.1.1.1.5124 10.1.1.30.5588 10.1.1.29.2722 10.1.1.12.7617 10.1.1.125.1065 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+34:oai CiteSeerXPSU 10.1.1.1.1516:Acta Cryst. (2004). B60, 481489 DOI 10.1107/S0108768104013564 481 Acta Crystallographica Section B:Structural Science Issn Valeria Ferretti A Paola Gilli A Pier Andrea Borea B A Centro Di:2009-04-19 this paper are available from the IUCr electronic archives (Reference NA5019). Services for accessing these data are described at the back of the journal CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1516 http //journals.iucr.org/b/issues/2004/04/00/na5019/na5019.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+35:oai CiteSeerXPSU 10.1.1.1.1517:The Lattice-Boltzmann Method for Simulating Gaseous Phenomena:Xiaoming Wei Student Member Wei Li Klaus Mueller Arie E. Kaufman:2009-04-19 We present a physically-based, yet fast and simple method to simulate gaseous phenomena. In our approach, the incompressible Navier-Stokes (NS) equations governing fluid motion have been modeled in a novel way to achieve a realistic animation. We introduce the Lattice Boltzmann Model (LBM), which simulates the microscopic movement of fluid particles by linear and local rules on a grid of cells so that the macroscopic averaged properties obey the desired NS equations. The LBM is defined on a 2D or 3D discrete lattice, which is used to solve fluid animation based on different boundary conditions. The LBM simulation generates, in real-time, an accurate velocity field and can incorporate an optional temperature field to account for the buoyancy force of hot gas. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1517 http //www.cs.sunysb.edu/%7Emueller/papers/smokeTVCG04.pdf en 10.1.1.15.9203 10.1.1.25.5737 10.1.1.17.1966 10.1.1.131.9652 10.1.1.38.7021 10.1.1.23.4545 10.1.1.84.7193 10.1.1.23.5064 10.1.1.6.4632 10.1.1.15.6033 10.1.1.61.3740 10.1.1.104.6696 10.1.1.136.5635 10.1.1.1.2520 10.1.1.72.4834 10.1.1.83.4851 10.1.1.95.5556 10.1.1.110.7589 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+36:oai CiteSeerXPSU 10.1.1.1.1518:Peer-to-Peer Human-Robot Interaction for Space Exploration:Terrence Fong And Terrence Fong Illah Nourbakhsh:2009-04-19 NASA has embarked on a long-term program to develop human-robot systems for sustained, affordable space exploration. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1518 http //www.ri.cmu.edu/pub_files/pub4/fong_terrence_w_2004_1/fong_terrence_w_2004_1.pdf en 10.1.1.36.6789 10.1.1.2.8285 10.1.1.16.516 10.1.1.6.118 10.1.1.4.2304 10.1.1.99.2775 10.1.1.61.1527 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+37:oai CiteSeerXPSU 10.1.1.1.1519:Survey Taxonomy of Packet Classification Techniques:David Taylor Sa Da Prot Dp Flowid Pt:2009-04-19 Packet classification is an enabling function for a variety of Internet applications including Quality of Service, security, monitoring, and multimedia communications. In order to classify a packet as belonging to a particular flow or set of flows, network nodes must perform a search over a set of filters using multiple fields of the packet as the search key. In general, there have been two major threads of research addressing packet classification algorithmic and architectural. A few pioneering groups of researchers posed the problem, provided complexity bounds, and offered a collection of algorithmic solutions. Subsequently, the design space has been vigorously explored by many offering new algorithms and improvements upon existing algorithms. Given the inability of early algorithms to meet performance constraints imposed by high speed links, researchers in industry and academia devised architectural solutions to the problem. This thread of research produced the most widely-used packet classification device technology, Ternary Content Addressable Memory (TCAM). New architectural research combines intelligent algorithms and novel architectures to eliminate many of the unfavorable characteristics of current TCAMs. We observe that the community appears to be converging on a combined algorithmic and architectural approach to the problem. Using a taxonomy based on the high-level approach to the problem and a minimal set of running examples, we provide a survey of the seminal and recent solutions to the problem. It is our hope to foster a deeper understanding of the various packet classification techniques while providing a useful framework for discerning relationships and distinctions. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1519 http //www.arl.wustl.edu/Publications/2000-04/wucse-2004-24.pdf en 10.1.1.137.3147 10.1.1.121.1309 10.1.1.13.9939 10.1.1.39.697 10.1.1.24.3532 10.1.1.29.4777 10.1.1.12.3539 10.1.1.112.1058 10.1.1.12.5688 10.1.1.41.4744 10.1.1.41.9413 10.1.1.3.5167 10.1.1.32.9914 10.1.1.105.3710 10.1.1.58.2312 10.1.1.58.5079 10.1.1.13.3703 10.1.1.28.9719 10.1.1.135.9578 10.1.1.6.107 10.1.1.121.8780 10.1.1.133.2753 10.1.1.78.9584 10.1.1.97.442 10.1.1.86.5588 10.1.1.104.4868 10.1.1.108.4619 10.1.1.65.3134 10.1.1.76.3971 10.1.1.77.8580 10.1.1.83.3090 10.1.1.85.8699 10.1.1.89.6993 10.1.1.89.7016 10.1.1.91.9738 10.1.1.94.8479 10.1.1.97.6162 10.1.1.72.6318 10.1.1.126.1241 10.1.1.61.5847 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+38:oai CiteSeerXPSU 10.1.1.1.1520:Friscof Risco:Framework Of Information Eckhard D. Falkenberg Wolfgang Hesse Paul Lindgreen Björn E. Nilsson J. L. Han Oei Colette Rolland Ronald K. Stamper Frans J. M. Van Assche Alexander A. Verrijn-stuart Klaus Voss:2009-04-19 this report, Paul Lindgreen as secretary and as editor of the interim report [Lin90a] CiteSeerX 2009-04-19 2007-11-19 1998 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1520 http //www.cs.kun.nl/is/Library/./Data/1998/Lindgreen/FRISCO/1998-Lindgreen-FRISCO.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+39:oai CiteSeerXPSU 10.1.1.1.1521:Average performance of quasi Monte Carlo methods for global optimization:Hisham A. Al-Mharmah:2009-04-24 In this paper we compare the average performance of one class of low-discrepancy quasi-Monte Carlo sequences for global optimization. Weiner measure is assumed as the probability prior on all optimized functions. We show how to construct van der Corput sequences and we prove their consistency. Numerical experimentation shows that the van der Corput sequence in base 2 has a better average performance. CiteSeerX 2009-04-24 2007-11-19 1998 text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1521 http //www.informs-cs.org/wsc98papers/083.PDF en 10.1.1.22.679 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+40:oai CiteSeerXPSU 10.1.1.1.1522:The Virtual Ms Lyceum A Consortium For Modeling And Simulation Technology:D. J. Medeiros E. F. Watson J. S. Carson M. S. Manivannan Steven D. Farr Alex F. Sisti:2009-04-19 This paper addresses the opportunity to put into place a virtual consortium for modeling and simulation. While periodic conferences such as the Winter Simulation Conference are tremendously vital to the continued growth of modeling and simulation research, they do not offer the day-to-day technical exchange that can now be made possible with matured collaborative technologies. CiteSeerX 2009-04-19 2007-11-19 1998 text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1522 http //www.informs-cs.org/wsc98papers/228.PDF en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+41:oai CiteSeerXPSU 10.1.1.1.1523:Classification And Regression Trees, Cart - A User Manual For Identifying Indicators Of Vulnerability To Famine And Chronic Food Insecurity:Auser Manualfor Yisehac Yohannes Patrick Webb:2009-04-19 FAMINE AND CHRONIC FOOD INSECURITY YISE HAC YO HAN NES ue, the Netherlands, Norway, the Philippines, the Rockefeller Foundation, the Rural Industries Research and Development Corporation (Australia), South Africa, the Southern African Development Bank, Spain, Sweden, Switzerland, the United Kingdom, the United Nations Children's Fund, the United States, and the World Bank. CLASSIFIC ATION AND REGRESSION TREES, CART^TM A USER MANUAL FOR IDENTIFYING INDIC A TORS OF VULNERABILITY TO FAMINE AND CHRONIC FOOD INSECURITY YISEHAC YOHANNES PATRICK WEBB MICROCOMPUTERS IN POLICY RESEARCH INTERNATIONAL FOOD POLICY RESEARCH INSTITUTE CART is a registered trademark of California Statistical Software, Inc. Copyright 1999 by the International Food Policy Research Institute 2033 K Street, N.W. Washington, D.C. 20006-1002 U.S.A. Library of Congress Cataloging-in-Publication Data available Yohannes, Yisehac Classification and Regression Trees, Cart^TM A User Manual for Identifying Indicators of Vulnerability to Famine and Chronic Food Insecurity / Yise CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1523 http //www.ifpri.org/pubs/microcom/micro3.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+42:oai CiteSeerXPSU 10.1.1.1.1524:An Approach for Locating Segmentation Points of Handwritten Digit Strings:Using Neural Network:2009-04-19 An approach for segmentation of handwritten touching numeral strings is presented in this paper. A neural network has been designed to deal with various types of touching observed frequently in numeral strings. A numeral string image is split into a number of line segments while stroke extraction is being performed and the segments are represented with straight lines. Four types of primitive are defined based on the lines and used for representing the numeral string in more abstractive way and extracting clues on touching information from the string. Potential segmentation points are located using the neural network by active interpretation of the features collected from the primitives. Also, the run-length coding scheme is employed for efficient representation and manipulation of images. On a test set collected from real mail pieces, the segmentation accuracy of 89.1% was achieved, in image level, in a preliminary experiment. 1. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1524 http //www.csc.liv.ac.uk/%7Eprima/ICDAR2003/Papers/0025_697_kim_g.pdf en 10.1.1.35.2218 10.1.1.44.7527 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+43:oai CiteSeerXPSU 10.1.1.1.1525:An Overview of JML Tools and Applications:Lilian Burdy Yoonsik Cheon David Cok Michael D. Ernst Joe Kiniry Gary T. Leavens K. Rustan M. Leino Erik Poll:2009-04-19 formal specification Java runtime assertion checking static checking The Java Modeling Language (JML) can be used to specify the detailed design of Java classes and interfaces by adding annotations to Java source files. The aim of JML is to provide a specification language that is easy to use for Java programmers and that is supported by a wide range of tools for specification type-checking, runtime debugging, static analysis, and verification. CiteSeerX 2009-04-19 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1525 http //www.gemplus.com/smart/r_d/publications/pdf/BCC_03jm.pdf en 10.1.1.137.4260 10.1.1.39.1223 10.1.1.36.9943 10.1.1.29.6183 10.1.1.70.1745 10.1.1.11.2133 10.1.1.17.3839 10.1.1.24.2555 10.1.1.34.8403 10.1.1.52.3873 10.1.1.10.4654 10.1.1.1.6063 10.1.1.16.1895 10.1.1.117.5270 10.1.1.132.7016 10.1.1.1.6522 10.1.1.2.5030 10.1.1.16.800 10.1.1.10.547 10.1.1.13.5473 10.1.1.15.9976 10.1.1.120.795 10.1.1.26.1982 10.1.1.17.1067 10.1.1.2.1207 10.1.1.25.9636 10.1.1.5.8315 10.1.1.1.9075 10.1.1.39.2890 10.1.1.128.9986 10.1.1.1.3304 10.1.1.10.8374 10.1.1.12.442 10.1.1.57.6725 10.1.1.29.9417 10.1.1.5.9229 10.1.1.26.3231 10.1.1.20.6902 10.1.1.17.9620 10.1.1.72.3429 10.1.1.11.8032 10.1.1.11.1854 10.1.1.19.7736 10.1.1.59.4118 10.1.1.11.2494 10.1.1.13.4051 10.1.1.16.1105 10.1.1.19.2169 10.1.1.91.8343 10.1.1.85.6366 10.1.1.103.1977 10.1.1.19.6416 10.1.1.81.714 10.1.1.4.6241 10.1.1.11.2133 10.1.1.10.4654 10.1.1.115.5693 10.1.1.7.4458 10.1.1.5.8315 10.1.1.61.5186 10.1.1.73.5717 10.1.1.57.6725 10.1.1.11.1838 10.1.1.142.2782 10.1.1.137.316 10.1.1.129.1678 10.1.1.126.8052 10.1.1.131.2147 10.1.1.94.1164 10.1.1.83.3189 10.1.1.11.2494 10.1.1.1.6054 10.1.1.142.6301 10.1.1.86.6061 10.1.1.94.7598 10.1.1.122.2974 10.1.1.10.187 10.1.1.126.4427 10.1.1.128.5240 10.1.1.67.8455 10.1.1.131.6019 10.1.1.10.3303 10.1.1.102.4611 10.1.1.59.4566 10.1.1.94.5189 10.1.1.7.2188 10.1.1.143.5200 10.1.1.100.3930 10.1.1.111.4391 10.1.1.68.2636 10.1.1.79.7758 10.1.1.98.2224 10.1.1.101.9229 10.1.1.59.4403 10.1.1.66.8607 10.1.1.71.6156 10.1.1.71.8962 10.1.1.84.1342 10.1.1.89.8541 10.1.1.118.8269 10.1.1.107.6026 10.1.1.4.6869 10.1.1.63.4449 10.1.1.64.1790 10.1.1.80.5390 10.1.1.90.757 10.1.1.98.8827 10.1.1.127.2428 10.1.1.127.6051 10.1.1.138.5310 10.1.1.2.5369 10.1.1.4.3348 10.1.1.61.7073 10.1.1.74.7926 10.1.1.76.6474 10.1.1.91.9916 10.1.1.97.8034 10.1.1.98.2120 10.1.1.110.687 10.1.1.124.6567 10.1.1.142.3205 10.1.1.100.4344 10.1.1.100.8097 10.1.1.100.9852 10.1.1.101.6814 10.1.1.102.5622 10.1.1.104.861 10.1.1.105.5824 10.1.1.107.5138 10.1.1.59.6327 10.1.1.63.562 10.1.1.63.5758 10.1.1.63.7483 10.1.1.64.6885 10.1.1.64.9362 10.1.1.62.3908 10.1.1.119.5350 10.1.1.67.1499 10.1.1.67.5887 10.1.1.67.8572 10.1.1.69.2171 10.1.1.70.1165 10.1.1.70.6538 10.1.1.71.1298 10.1.1.71.698 10.1.1.71.769 10.1.1.71.962 10.1.1.73.1567 10.1.1.74.4934 10.1.1.74.7928 10.1.1.122.2332 10.1.1.76.3519 10.1.1.77.1867 10.1.1.77.2580 10.1.1.77.4182 10.1.1.125.1768 10.1.1.78.7630 10.1.1.62.2614 10.1.1.81.8303 10.1.1.84.3469 10.1.1.84.3675 10.1.1.84.6502 10.1.1.85.2476 10.1.1.85.4887 10.1.1.87.5805 10.1.1.87.9527 10.1.1.89.2433 10.1.1.89.3328 10.1.1.90.1517 10.1.1.90.2534 10.1.1.91.1298 10.1.1.92.1775 10.1.1.93.3743 10.1.1.94.2013 10.1.1.94.7198 10.1.1.95.1658 10.1.1.95.2688 10.1.1.95.3548 10.1.1.97.5430 10.1.1.98.6399 10.1.1.99.8561 10.1.1.111.4564 10.1.1.112.7809 10.1.1.113.6155 10.1.1.113.7814 10.1.1.115.3770 10.1.1.116.5172 10.1.1.117.7484 10.1.1.118.3171 10.1.1.118.3882 10.1.1.124.2718 10.1.1.124.8466 10.1.1.124.8516 10.1.1.126.2574 10.1.1.126.3474 10.1.1.128.5756 10.1.1.130.5902 10.1.1.130.7155 10.1.1.132.319 10.1.1.133.4597 10.1.1.135.7996 10.1.1.138.529 10.1.1.139.275 10.1.1.139.4030 10.1.1.5.4720 10.1.1.58.8470 10.1.1.59.3381 10.1.1.61.4532 10.1.1.140.1484 10.1.1.141.3512 10.1.1.142.4289 10.1.1.142.6329 10.1.1.108.5722 10.1.1.144.1222 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+44:oai CiteSeerXPSU 10.1.1.1.1526:Triage Performance Isolation and Differentiation for Storage Systems:Magnus Karlsson Christos Karamanolis Xiaoyun Zhu:2009-04-19 Ensuring performance isolation and differentiation among workloads that share a storage infrastructure is a basic requirement in consolidated data centers. Existing management tools rely on resource provisioning to meet performance goals they require detailed knowledge of the system characteristics and the workloads. Provisioning is inherently slow to react to system and workload dynamics, and in the general case, it is impossible to provision for the worst case. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1526 http //www.hpl.hp.com/research/ssp/papers/2004-06-iwqos-triage.pdf en 10.1.1.16.8551 10.1.1.114.7135 10.1.1.141.1 10.1.1.17.502 10.1.1.28.2128 10.1.1.26.662 10.1.1.13.8238 10.1.1.23.3111 10.1.1.16.7583 10.1.1.17.2232 10.1.1.64.986 10.1.1.65.8781 10.1.1.4.6982 10.1.1.19.1470 10.1.1.1.1904 10.1.1.14.8619 10.1.1.4.4818 10.1.1.75.84 10.1.1.65.3456 10.1.1.129.3204 10.1.1.109.630 10.1.1.113.223 10.1.1.72.2528 10.1.1.59.317 10.1.1.121.3572 10.1.1.119.1641 10.1.1.72.3158 10.1.1.74.8799 10.1.1.79.9021 10.1.1.85.8116 10.1.1.135.7692 10.1.1.104.267 10.1.1.107.2911 10.1.1.62.6629 10.1.1.64.5770 10.1.1.64.9860 10.1.1.65.1125 10.1.1.67.1517 10.1.1.67.2395 10.1.1.72.374 10.1.1.79.5247 10.1.1.79.748 10.1.1.81.5717 10.1.1.83.4762 10.1.1.84.3590 10.1.1.85.6390 10.1.1.89.1736 10.1.1.89.2790 10.1.1.93.7577 10.1.1.94.3072 10.1.1.94.5062 10.1.1.111.7201 10.1.1.113.4918 10.1.1.118.881 10.1.1.123.8174 10.1.1.133.38 10.1.1.134.9068 10.1.1.136.8533 10.1.1.130.7318 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+45:oai CiteSeerXPSU 10.1.1.1.1527:On-line Handwritten Japanese Text Recognition free from Constrains on Line:Direction And Character Masaki Nakagawa Motoki Onuma:2009-04-19 This paper describes an on-line handwritten Japanese text recognition method that is liberated from constraints on writing direction (line direction) and character orientation. This method estimates the line direction and character orientation using the time sequence information of pen-tip coordinates and employs writingbox -free recognition with context processing combined. The method can cope with a mixture of vertical, horizontal and skewed lines with arbitrary character orientations. It is expected useful for tablet PC's, interactive electronic whiteboards and so on. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1527 http //www.csc.liv.ac.uk/%7Eprima/ICDAR2003/Papers/0095_627_masaki_n.pdf en 10.1.1.103.5812 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+46:oai CiteSeerXPSU 10.1.1.1.1528:Data Transformation for Warehousing Web Data:Yan Zhu Christof Yan Zhu Christof Bornhövd Alejandro P. Buchmann:2009-04-19 In order to analyze market trends and make reasonable business plans, a company's local data is not sufficient. Decision making must also be based on information from suppliers, partners and competitors. This external data can be obtained from the Web in many cases, but must be integrated with the company's own data, for example, in a data warehouse. To this end, Web data has to be mapped to the star schema of the warehouse. In this paper we propose a semi-automatic approach to support this transformation process. Our approach is based on the use a rooted labeled tree representation of Web data and the existing warehouse schema. Based on this common view we can compare source and target schemata to identify correspondences. We show how the correspondences guide the transformation to be accomplished automatically. We also explain the meaning of recursion and restructuring in mapping rules, which are the core of the transformation algorithm. CiteSeerX 2009-04-19 2007-11-19 2001 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1528 http //www.cs.kun.nl/is/Library/./Data/2001/Zhu/Data/2001-Zhu-Data.pdf en 10.1.1.122.4181 10.1.1.33.3465 10.1.1.25.1724 10.1.1.24.9229 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+47:oai CiteSeerXPSU 10.1.1.1.1529:Discriminant Projections Embedding for Nearest Neighbor Classification.:Petia Radeva And Petia Radeva Jordi Vitrià:2009-04-19 In this paper we introduce a new embedding technique to linearly project labeled data samples into a new space where the performance of a Nearest Neighbor classifier is improved. The approach is based on considering a large set of simple discriminant projections and finding the subset with higher classification performance. In order to implement the feature selection process we propose the use of the adaboost algorithm. The performance of this technique is tested in a multiclass classification problem related to the production of cork stoppers for wine bottles. CiteSeerX Springer Verlag 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1529 http //www.cvc.uab.es/~jordi/ciarp2004.pdf en 10.1.1.99.3419 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+48:oai CiteSeerXPSU 10.1.1.1.1530:Vowel - Zero Alternations in Czech Prefixes:Tobias Scheer Tobias Scheer Clite E -e:2009-04-19 e inchoative, "up" p#ed 16 48 "before, in front of" roz 80 295 inch., "disperse/ break into pieces" nad 5 33 "over" pod 26 74 "under" od 41 253 distantiational movement sum 195 762 TOTAL 957 (6) the secret must be found in the different status of stem-initial CC-clusters. (7) stem-initial CCs observed with a. prefixal-V only +e b. prefixal - only -e c. both mix +e only 17 CCs -e only 38 CCs ct, dn, d#, jm, lstn, mk, pn, ps, rv, #v, sch, sr, v, tn, v#, z#, #r bl, b#, cl, cv, #l, f#, fr, hl, hm, hv, chl, chrchl, km, kr, k#, kv, m#, mr, pl, pt, sh, sv, k, n, p, r, tl, tr, tv, vd, vr, zbr, zp, zt, #h, #m, ##, #v mix 35 CCs br, #t, dm, dr, dv, hn, hr, h#, chv, jd, kd, kl, ml, mn, pj, pr, p#, sk, sl, sm, sn, sp, st, l, t, t#, v#, vl, v#, v, vz, zd, zl, zn, zv TOTAL nb CC 90 (8) A given root belongs to one and only one of these three groups. (9) CC mix represented by how many it CiteSeerX 2009-04-19 2007-11-19 1998 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1530 http //www.unice.fr/dsl/tobweb/papers/ScheerHdtSzeged98.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+49:oai CiteSeerXPSU 10.1.1.1.1531:Automatic Construction of Navigable Concept Networks Characterizing Text Databases:Claudio Carpineto Giovanni Romano Fondazione Ugo Bordoni:2009-04-19 In this paper we present a comprehensive approach to conceptual structuring and intelligent navigation of text databases. Given any collection of texts, we first automatically extract a set of index terms describing each text. Next, we use a particular lattice conceptual clustering method to build a network of clustered texts whose nodes are described using the index terms. We argue that the resulting network supports an hybrid navigational approach to text retrieval - implemented into an actual user interface - that combines browsing potentials with good retrieval performance. We present the results of an experiment on subject searching where this approach outperformed a conventional Boolean retrieval system. CiteSeerX Springer-Verlag 2009-04-19 2007-11-19 1995 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1531 http //search.fub.it/claudio/pdf/AIIA1995.pdf en 10.1.1.21.1806 10.1.1.64.7424 10.1.1.14.7549 10.1.1.26.1391 10.1.1.122.5391 10.1.1.1.4542 10.1.1.50.9283 10.1.1.140.4388 10.1.1.70.980 10.1.1.72.9991 10.1.1.60.2145 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+50:oai CiteSeerXPSU 10.1.1.1.1532:Lawrence S. Brakmo, Deborah A. Wallach, Marc A. Viredaz:Mobile And Media Lawrence S. Brakmo Lawrence S. Brakmo Deborah A. Wallach Deborah A. Wallach Marc A. Viredaz Marc A. Viredaz:2009-04-19 Energy management has become one of the great challenges in portable computing. This is the result of the increasing energy requirements of modern portable devices without a corresponding increase in battery technology. Sleep is a new energy reduction technique for handheld devices that is most effective when the handheld's processor is lightly loaded, such as when the user is reading a document or looking at a web page. When possible, rather than using the processor's idle mode, Sleep tries to put the processor in sleep mode for short periods (less than one second) without affecting the user's experience. To enhance the perception that the system is on, an image is maintained on the display and activity is resumed as a result of external events such as touch-screen and button activity. We have implemented Sleep on a prototype pocket computer, where it has reduced energy consumption by up to 60%. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1532 http //www.hpl.hp.com/techreports/2004/HPL-2004-11.pdf en 10.1.1.143.7417 10.1.1.39.3266 10.1.1.121.5295 10.1.1.29.6746 10.1.1.31.4277 10.1.1.4.1582 10.1.1.108.8205 10.1.1.36.2109 10.1.1.111.4810 10.1.1.134.8329 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+51:oai CiteSeerXPSU 10.1.1.1.1533:Answers to the Top Ten Input Modeling Questions:Bahar Biller Barry L. Nelson:2009-08-25 In this tutorial we provide answers to the top ten inputmodeling questions that new simulation users ask, point out common mistakes that occur and give relevant references. We assume that commercial input-modeling software will be used when possible, and only suggest non-commercial options when there is little else available. Detailed examples will be provided in the tutorial presentation. CiteSeerX 2009-08-25 2007-11-19 2002 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1533 http //www.informs-cs.org/wsc02papers/005.pdf en 10.1.1.58.5325 10.1.1.105.5693 10.1.1.107.780 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+52:oai CiteSeerXPSU 10.1.1.100.8780:Jaguar Java in Next-Generation Database Systems:Johannes Gehrke Www Page:2008-07-01 • Title Jaguar Java in Next-Generation Database Systems Keywords Extensibility query optimization heterogeneous environments database compression. Project Summary This project explores fundamental systems issues in query processing performance. We investigate this problem from three different directions client-server processing, heterogeneous environments, and database compression. First, we devised new query processing strategies than push processing capabilities into the client, and we devised query execution plans that can span server and clients. This allows us to trade resource usage between client, server and the interconnection network. We then extended this work to parallel query processing in heterogeneous environments we are currently implementing a parallel dataflow engine that adapts naturally to resource imbalances at the hardware components. Last, we are investigating the use of compression in database systems. We devised a new framework for database compression and new query processing and query optimization strategies to integrate compression into a modern query processor. All our techniques have been implemented in the NSF-funded Cornell Predator object-relational database system. We extended the system with several ways to store compressed relations, and we implemented a fully compression-aware query optimizer. To best of our knowledge, our work is the first result on compression-aware query optimization. Publications and Products � Project homepage CiteSeerX 2008-07-01 2008-04-02 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.100.8780 http //itlab.uta.edu/idm01/FinalReports/reports/IDM01R048.pdf en 10.1.1.20.9548 10.1.1.26.9191 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+53:oai CiteSeerXPSU 10.1.1.106.4689:Patterns for Next-Generation Database Systems IST-2001-33058 Recent Advances on Pattern Representation and Management:I. Ntoutsi (cti/piraeus A. Pikrakis G. Tsatsaronis (aueb E. Vrachnos Michalis Vazirgiannis Maria Halkidi Daniel A. Keim Irene Ntoutsi Aggelos Pikrakis Sergios Theodoridis Yannis Theodoridis George Tsatsaronis Euripides Vrachnos:2008-07-01 patterns data mining pattern modeling pattern-bases information retrieval Pattern Base Management Systems Research supported by the Commission of the European Communities under the Information CiteSeerX 2008-07-01 2008-04-03 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.106.4689 http //www.db-net.aueb.gr/gbt/publications/PANDA_TR-2003-04.pdf en 10.1.1.40.6757 10.1.1.108.8490 10.1.1.33.3138 10.1.1.144.4956 10.1.1.42.3240 10.1.1.56.8772 10.1.1.32.9565 10.1.1.50.5717 10.1.1.41.4883 10.1.1.105.8622 10.1.1.102.5562 10.1.1.16.976 10.1.1.34.2745 10.1.1.7.6588 10.1.1.44.8451 10.1.1.5.6904 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+54:oai CiteSeerXPSU 10.1.1.122.192:Query by Templates Using the Shape of Information to Search Next-Generation Databases:Arijit Sengupta Andrew Dillon:2008-12-04 Abstract—We present a user-centered database query language called QBT (Query By Templates) for user communication with databases containing complex structured data, such as data stored in the Extensible Markup Language (XML). XML has revolutionized data storage as well as representation and transfer methods in today’s internet applications. The growing popularity of XML as a language for the representation of data has enabled its use for several applications involving storage, interchange, and retrieval of data. Several textual query languages have been proposed for XML retrieval, including the World Wide Web Consortium’s (W3C) recommendation of XQuery. Native XML database systems have been implemented, all of which provide methods for user communication with the database, although most communication methods use text-based query languages or form-based interfaces. QBT, the language presented here, is one of the first attempts toward a generalized alternative language that is based on human factors of familiarity. It is ideal for documents with a simple yet highly recognizable layout (e.g., poems, dictionaries, journals, etc.). We present the QBT language and report results from an initial usability test that shows promise for this type of an interface as a generalized user–database communication method. Index Terms—Complex structured data, Extensible Markup Language (XML), information shape, query evaluation, query languages, query processing, visual languages, XQuery. CiteSeerX 2008-12-04 2008-12-03 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.122.192 http //www.ischool.utexas.edu/~adillon/Journals/IEEEJit.pdf en 10.1.1.109.4049 10.1.1.11.6264 10.1.1.22.7172 10.1.1.33.1762 10.1.1.102.1564 10.1.1.35.4300 10.1.1.20.7529 10.1.1.17.933 10.1.1.57.2983 10.1.1.17.4528 10.1.1.92.5486 10.1.1.110.6779 10.1.1.28.2863 10.1.1.105.3351 10.1.1.2.8978 10.1.1.104.2288 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+55:oai CiteSeerXPSU 10.1.1.52.456:Security in Next-Generation Databases:Chris Strahorn:2009-04-12 this paper, a summary of the various models presented for securing next-generation databases will be given. Additionally, an overview of the security features in commercial next-generation databases is also given in order to show the need for further work in this field. 1 Introduction CiteSeerX 2009-04-12 2007-11-22 1998 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.52.456 http //www.db.cs.ucdavis.edu/teaching/289F/papers/chris.ps en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+56:oai CiteSeerXPSU 10.1.1.66.4692:Security in Next-Generation Databases:Chris Strahorn:2008-07-01 In the past several years, several new types of databases have moved out of the academic world and have been released as commercial products. These new types of databases are commonly referred to as next-generation databases and include object-oriented, object-relational, active, and deductive databases. Each of these types of database offer an extended set of features when compared to a traditional relational database. In turn, these new features require new methods in order to secure the data held within. In this paper, a summary of the various models presented for securing next-generation databases will be given. Additionally, an overview of the security features in commercial next-generation databases is also given in order to show the need for further work in this field. 1 CiteSeerX 2008-07-01 2008-02-06 1998 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.66.4692 http //www.db.cs.ucdavis.edu/teaching/289F/papers/chris.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+57:oai CiteSeerXPSU 10.1.1.78.1427:Java in Next-Generation Database Systems::2008-07-01 applications, including database applications. CiteSeerX 2008-07-01 2008-02-07 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.78.1427 http //www.cs.wisc.edu/~cao/WISP98/final-versions/praveen.ps en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+58:oai CiteSeerXPSU 10.1.1.1.1534:Proceedings of the 2002 Winter Simulation Conference:Ycesan Chen Snowdon E. Yücesan C. -h. Chen J. L. Snowdon J. M. Charnes Sang D. Choi Anil R. Kumar:2009-04-19 This paper discusses the initial efforts to implement simulation modeling as a visual management and analysis tool at an automotive foundry plant manufacturing engine blocks. The foundry process was modeled using Pro Model to identify bottlenecks and evaluate machine performance, cycle times and production data (total parts, rejects, throughput, products/hr) essential for efficient production control. Results from the current system identified assembly machine work area as the bottleneck (although utilization was greater than 95% for two assembly machines) resulting in high work-in-process (WIP) inventory level, low resource and machine utilization. Based on these results, optimum numbers were identified through use of scenarios by varying the number of assembly machines and processing time of each machine. In addition to these scenarios, strategies for production control involving buffer sizes were also made. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1534 http //www.informs-cs.org/wsc02papers/138.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+59:oai CiteSeerXPSU 10.1.1.1.1535:A Sub-Quadratic Algorithm for Conjunctive and Disjunctive BESs:Jan Friso Groote Misa Keinänen:2009-04-19 We present an algorithm for conjunctive and disjunctive Boolean equation systems (BESs), which arise frequently in the verification and analysis of finite state concurrent systems. In contrast to the previously best known O(e ) time solutions, our algorithm computes the solution of such a fixpoint equation system with size e and alternation depth d in O(e log d) time. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1535 http //www.win.tue.nl/~jfg/articles/CSR-04-13.pdf en 10.1.1.58.4882 10.1.1.81.9591 10.1.1.108.4288 10.1.1.140.2376 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+60:oai CiteSeerXPSU 10.1.1.1.1536:Innovations of the NetSolve Grid Computing System:Dorian C. Arnold Henri Casanova Jack Dongarra:2009-04-19 KEY WORDS Grid computing distributed computing heterogeneous network computing client--server This article is meant to provide the reader with details regarding the present state of the project, describing the current architecture of the system, its latest innovations and other systems 10 that make use of the NetSolve infrastructure. Copyright # 2002 John Wiley & Sons, Ltd CiteSeerX 2009-04-19 2007-11-19 2002 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1536 http //icl.cs.utk.edu/news_pub/submissions/cpe678.pdf en 10.1.1.25.8254 10.1.1.49.8881 10.1.1.32.6963 10.1.1.46.3287 10.1.1.15.9060 10.1.1.43.1259 10.1.1.30.5246 10.1.1.27.3632 10.1.1.115.1390 10.1.1.107.4174 10.1.1.65.4741 10.1.1.3.4994 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+61:oai CiteSeerXPSU 10.1.1.1.1537:The InfoVis Toolkist:Jean-daniel Fekete Jean-daniel Fekete Projet In-situ:2009-04-19 This report presents the InfoVis Toolkit, designed to support the creation, extension and integration of advanced 2D Information Visualization components into interactive Java Swing applications. The InfoVis Toolkit provides specific data structures to achieve a fast action/feedback loop required by dynamic queries. It comes with a large set of components such as range sliders and tailored control panels required to control and configure the visualizations. These components are integrated into a coherent framework that simplifies the management of rich data structures and the design and extension of visualizations. Supported data structures currently include tables, trees and graphs. Supported visualizations include scatter plots, time series, Treemaps, node-link diagrams for trees and graphs and adjacency matrix for graphs. All visualizations can use fisheye lenses and dynamic labeling. The InfoVis Toolkit supports hardware acceleration when available through Agile2D, an implementation of the Java Graphics API based on OpenGL, achieving speedups of 10 to 60 times. CiteSeerX 2009-04-19 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1537 ftp //ftp.inria.fr/INRIA/publication/publi-pdf/RR/RR-4818.pdf en 10.1.1.123.805 10.1.1.43.631 10.1.1.41.1810 10.1.1.56.8772 10.1.1.13.6859 10.1.1.20.9570 10.1.1.58.2019 10.1.1.25.5975 10.1.1.111.2892 10.1.1.18.1023 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+62:oai CiteSeerXPSU 10.1.1.1.1538:Complex Systems Modeling:Christophe Lecerf Thi:2009-04-19 This paper addresses the simulation of the dynamics of complex systems by using hierarchical graph and multi-agent system. A complex system is composed of numerous interacting parts that can be described recursively. First we summarize the hierarchical aspect of the complex system. We then present a description of hierarchical graph as a data structure for structural modeling in parallel with dynamics simulation by agents. This method can be used by physiological modelers, ecological modelers, etc as well as in other domains that are considered as complex systems. An example issued from physiology will illustrate this approach. CiteSeerX 2009-04-19 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1538 http //e-ifi.org/rivf/2003/proceedings/p93-98.pdf en 10.1.1.28.9248 10.1.1.4.6467 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+63:oai CiteSeerXPSU 10.1.1.1.1539:Proceedings of the 2003 Winter Simulation Conference:Chick Snchez Ferrin S. Chick P. J. Sánchez D. Ferrin D. J. Morrice Gary Tan Na Zhao:2009-04-19 uses to deliver value to its customers. In today's competitive environment, the globalization of markets has rapidly substituted the traditional integrated business. The competitive success of an organization no longer depends only on its own efforts, but relies on the efficiency of the entire supply chain. Therefore, building an effective supply chain is fast becoming paramount in today's marketplace. Distributed Supply Chain (DSC) Simulation has been identified as one of the best means to test and analyze the performance of supply chains. The Generic Runtime Infrastructure for Distributed Simulation (GRIDS) is a middleware that supports the reuse and interoperation of DSC simulations. This paper reports the experience on employing the GRIDS to support the distributed collaboration of an automobile manufacture supply chain simulation. Several advantages of GRIDS are also discussed here which make it an ideal middleware for DSC simulations. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1539 http //www.informs-cs.org/wsc03papers/142.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+64:oai CiteSeerXPSU 10.1.1.1.1540:Multi-dimensional Visual Representations for Underwater Environmental Uncertainty:Greg S. Schmidt Sue-Ling Chen Greg S. Schmidt Sue-ling Chen Aaron N. Bryden Mark A. Livingston Bryan R. Osborn Lawrence J. Rosenblum:2009-04-19 this paper) and (2) develop a visual method for each characterization. The mariner community needs enhanced characterizations of environmental uncertainty now, but the accuracy of the characterizations is still not sufficient enough and therefore formal user evaluations cannot take place at this point in development. We received feedback on the applicability of our techniques from domain experts. We used this in conjunction with previous results to compile a set of development guidelines (some obvious, others not) CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1540 http //www.ait.nrl.navy.mil/vrlab/pages/../papers/j_IEEECGA04.pdf en 10.1.1.109.7470 10.1.1.60.7349 10.1.1.125.6248 10.1.1.144.7725 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+65:oai CiteSeerXPSU 10.1.1.1.1541:InstantGrid A Framework for On-Demand Grid:Point Construction Roy Roy S. C. Ho K. K. Yin David C. M. Lee Daniel H. F. Hung Cho-li Wang Francis C. M. Lau:2009-04-19 This paper proposes the InstantGrid framework for on-demand construction of grid points. In contrast to traditional approaches, InstantGrid is designed to substantially simplify software management in grid systems, and is able to instantly turn any computer into a grid-ready platform with the desired execution environment. Experimental results demonstrate that a 256-node grid point with commodity grid middleware can be constructed in five minutes from scratch. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1541 http //www.cs.hku.hk/~clwang/papers/InstantGrid-gcc2004-camera.pdf en 10.1.1.114.2815 10.1.1.118.9332 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+66:oai CiteSeerXPSU 10.1.1.1.1542:Prototyping Proof Carrying Code:Martin Wildmoser Tobias Nipkow Gerwin Klein Sebastian Nanz:2009-04-19 We introduce a generic framework for proof carrying code, developed and mechanically verified in Isabelle/HOL. The framework defines and proves sound a verification condition generator with minimal assumptions on the underlying programming language, safety policy, and safety logic. We demonstrate its usability for prototyping proof carrying code systems by instantiating it to a simple assembly language with procedures and a safety policy for arithmetic overflow. CiteSeerX Kluwer 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1542 http //www.doc.ic.ac.uk/~nanz/publications/./ppcc_tcs04.pdf en 10.1.1.40.2507 10.1.1.24.6526 10.1.1.29.2076 10.1.1.40.7179 10.1.1.42.4453 10.1.1.43.6143 10.1.1.103.6797 10.1.1.113.4649 10.1.1.11.9523 10.1.1.10.8649 10.1.1.84.1258 10.1.1.35.532 10.1.1.129.5517 10.1.1.86.3296 10.1.1.7.139 10.1.1.83.9822 10.1.1.103.4133 10.1.1.106.1397 10.1.1.65.5537 10.1.1.71.2795 10.1.1.123.3289 10.1.1.113.9340 10.1.1.132.1879 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+67:oai CiteSeerXPSU 10.1.1.1.1543:Proceedings of the Block Island Workshop on Cooperative Control,:Springer-Verlag Series Lecture Wei Ren Al W. Beard Timothy W. Mclain:2009-04-19 this paper. Ref [15] addresses the knowledge consensus problem when teams of agents only have local communication between nearest neighbors. Since the set of nearest neighbors is constantly changing, the overall system becomes a hybrid system. The paper shows that if the union over all bidirectional communication graphs is connected for finite periods of time, then consensus is achieved. While the results in this paper are not as strong, only unidirectional communication links are assumed CiteSeerX Springer-Verlag 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1543 http //www.et.byu.edu/~wr25/./papers/preprints/bookchapters/RenBeardMcLain03.pdf en 10.1.1.28.2247 10.1.1.10.4292 10.1.1.1.3664 10.1.1.72.7624 10.1.1.2.7148 10.1.1.32.8694 10.1.1.4.8605 10.1.1.15.1267 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+68:oai CiteSeerXPSU 10.1.1.1.1544:Hidden-Action in Multi-Hop Routing:Michal Feldman John Chuang:2009-04-19 In any multi-hop routing scheme, cooperation by the intermediate nodes are essential for the succesful delivery of traffic. However, the effort exerted by the intermediate nodes are often unobservable by the source and/or destination nodes. We show it is possible to overcome this problem of hidden action by designing contracts, in the form of payments, to induce cooperation from the intermediate nodes. Interestingly, the ability to monitor per-hop or per-path outcomes, even if costless to implement, may not improve the welfare of the participants or the performance of the network. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1544 http //www.eecs.harvard.edu/p2pecon/confman/papers/s3p1.pdf en 10.1.1.105.3673 10.1.1.19.8434 10.1.1.28.5987 10.1.1.21.4823 10.1.1.42.559 10.1.1.132.4609 10.1.1.10.8652 10.1.1.11.4819 10.1.1.11.9831 10.1.1.19.1750 10.1.1.34.2032 10.1.1.113.7948 10.1.1.11.8397 10.1.1.60.1810 10.1.1.4.1706 10.1.1.13.9006 10.1.1.119.7102 10.1.1.14.1149 10.1.1.14.6620 10.1.1.10.7859 10.1.1.107.3061 10.1.1.119.8132 10.1.1.106.6176 10.1.1.122.7182 10.1.1.123.5145 10.1.1.131.5662 10.1.1.135.2383 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+69:oai CiteSeerXPSU 10.1.1.1.1545:EPTD DISCUSSION PAPER NO. 83 HOW AGRICULTURAL RESEARCH AFFECTS URBAN POVERTY IN DEVELOPING COUNTRIES THE CASE OF CHINA:Shenggen Fan Cheng Fang Xiaobo Zhang:2009-04-19 developing countries China agricultural research urban poverty i ACKNOWLEDGMENTS This paper develops a framework to measure the impact of agricultural research on urban poverty. Increased investments in agricultural R&D can lower food prices by increasing food production, and lower food prices benefit the urban poor because they often spend more than 60% of their income on food. Application of the framework to China shows that these food price effects are large and that the benefits for the urban poor have been about as large as the benefits for the rural poor. KEYWORDS developing countries, China, agricultural research, urban, poverty ii ACKNOWLEDGMENTS The authors are grateful for helpful comments received from Peter Hazell, Robert Evanson and participants in a session at the American Agricultural Economics Association annual meeting in Chicago, August 5-8, 2001. iii TABLE OF CONTENTS 1. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1545 http //www.ifpri.org/divs/eptd/dp/papers/eptdp83.pdf en 10.1.1.144.9394 10.1.1.1.3288 10.1.1.58.6199 10.1.1.58.3593 10.1.1.31.1619 10.1.1.58.2714 10.1.1.58.2531 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+70:oai CiteSeerXPSU 10.1.1.1.1546:Ontology-Based Query Refinement:For Multimedia Meta Sonja Zillner Werner Winiwarter:2009-04-19 To enable e#cient access to multimedia content, the media data has to be augmented by semantic metadata and functionality. The semantic representation has to be integrated with domain ontologies to fully exploit domain-specific knowledge. This knowledge can be used for refining ambiguous user queries by closing the conceptual gap between the user and the information to be retrieved. In our previous research, we have introduced Enhanced Multimedia Meta Objects (EMMOs) as a new approach for semantic multimedia meta modeling, as well as the query algebra EMMA, which is adequate and complete with regard to the EMMO model. This paper focuses on the refinement of EMMA queries by incorporating ontological knowledge. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1546 http //www.ifs.univie.ac.at/~ww/iiwas04a.pdf en 10.1.1.44.6030 10.1.1.93.962 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+71:oai CiteSeerXPSU 10.1.1.1.1547:Computerising Natural History Card Archives:Downton Lucas And A. C. Downton S. M. Lucas G. Patoulas:2009-04-19 This paper summarises the achievements of a multidisciplinary Bioinformatics project which has the objective of providing a general mechanism for efficient computerisation of typewritten/hand-annotated archive card indexes, of the type found in most museums, archives and libraries. In addition to efficiently scanning, recognising and databasing the content of the cards, the original card images must be maintained as the ultimate source record, and a flexible database structure is required to allow taxonomists to reorganise and update the resulting online archive. Implementation mechanisms for each part of the overall system are described, and conversion performance for a demonstrator database of 27,578 Pyralid moth archive cards is reported. The system is currently being used to convert the full NHM archive of Lepidoptera totalling 290,886 cards. CiteSeerX 2009-04-19 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1547 http //www.csc.liv.ac.uk/%7Eprima/ICDAR2003/Papers/0064_562_downton_a.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+72:oai CiteSeerXPSU 10.1.1.1.1548:Genome-Wide Detection of Alternative Splicing in Expressed Sequences Using Partial Order Multiple Sequence Alignment Graphs:C. Grasso B. Modrek Y. Xing C. Lee:2009-04-19 this paper we present a detailed examination of the technical problems we have encountered in undertaking high-throughput analyses of alternative splicing over the last four years, and the specific solutions we have developed for these problems, in seeking to minimize both false positive and false negative errors CiteSeerX 2009-04-19 2007-11-19 2001 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1548 http //www.cs.mun.ca/~harold/BDG/grasso.pdf en 10.1.1.109.6075 10.1.1.115.1836 10.1.1.3.1101 10.1.1.104.1301 10.1.1.20.6089 10.1.1.108.6744 10.1.1.92.2847 10.1.1.102.5592 10.1.1.58.4273 10.1.1.83.8143 10.1.1.123.8933 10.1.1.100.2820 10.1.1.100.7691 10.1.1.101.5916 10.1.1.101.9727 10.1.1.102.7070 10.1.1.103.1344 10.1.1.104.9576 10.1.1.108.8802 10.1.1.112.9910 10.1.1.67.4233 10.1.1.74.9803 10.1.1.83.5745 10.1.1.83.7724 10.1.1.84.5073 10.1.1.84.6844 10.1.1.86.4181 10.1.1.87.3649 10.1.1.87.8082 10.1.1.90.2517 10.1.1.91.5647 10.1.1.92.8657 10.1.1.95.7769 10.1.1.95.7882 10.1.1.96.8415 10.1.1.99.6833 10.1.1.99.8018 10.1.1.100.4446 10.1.1.114.4132 10.1.1.115.3917 10.1.1.117.404 10.1.1.121.5264 10.1.1.122.809 10.1.1.125.5346 10.1.1.112.430 10.1.1.127.9023 10.1.1.128.4690 10.1.1.130.1155 10.1.1.130.5318 10.1.1.131.5021 10.1.1.137.3169 10.1.1.14.3143 10.1.1.1.5756 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+73:oai CiteSeerXPSU 10.1.1.1.1549:Adaptive Sampling for Environmental Robotics:Mohammad Rahimi Richard Pon William J. Kaiser Gaurav S. Sukhatme Deborah Estrin Mani Srivastava:2009-04-19 this paper we describe ######################################## a new distributed, robotic sensor methodology developed for applications including characterization of environmental structure and phenomena. NIMS exploits deployed infrastructure that provides the benefits of precise motion, aerial suspension, and low energy sustainable operations in complex environments. NIMS nodes may explore a three-dimensional environment and enable the deployment of sensor nodes at diverse locations and viewing perspectives. NIMS characterization of phenomena in a three dimensional space must now consider the selection of sensor sampling points in both time and space. Thus, we introduce a new approach of mobile node adaptive sampling with the objective of minimizing error between the actual and reconstructed spatiotemporal behavior of environmental variables while minimizing required motion. In this approach, the NIMS node first explores as an agent, gathering a statistical description of phenomena using a ##################################approach. By iteratively increasing sampling resolution, guided adaptively by the measurement results themselves, this NIMS sampling enables reconstruction of phenomena with a systematic method for balancing accuracy with sampling resource cost in time and motion. This adaptive sampling method is described analytically and also tested with simulated environmental data. Experimental evaluations of adaptive sampling algorithms have also been completed. Specifically, NIMS experimental systems have been developed for monitoring of spatiotemporal variation of atmospheric climate phenomena. A NIMS system has been deployed at a field biology station to map phenomena in a 50m width and 50m span transect in a forest environme... CiteSeerX 2009-04-19 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1549 http //cres.usc.edu/pubdb_html/files_upload/400.pdf en 10.1.1.131.2084 10.1.1.18.1128 10.1.1.20.7017 10.1.1.8.1672 10.1.1.92.7418 10.1.1.93.7998 10.1.1.71.4666 10.1.1.132.6943 10.1.1.100.8264 10.1.1.64.4012 10.1.1.79.7058 10.1.1.110.501 10.1.1.128.7519 10.1.1.106.8162 10.1.1.63.3553 10.1.1.66.1967 10.1.1.69.4362 10.1.1.72.4649 10.1.1.84.7790 10.1.1.91.7021 10.1.1.93.6045 10.1.1.97.2624 10.1.1.99.4191 10.1.1.111.6699 10.1.1.115.2562 10.1.1.129.5023 10.1.1.135.5385 10.1.1.138.7945 10.1.1.139.8283 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+74:oai CiteSeerXPSU 10.1.1.1.1550:Enhanced Expressiveness in Scripting Using AnimalScript V2:Guido Rößling Felix Gliesche Thomas Jajeh Thomas Widjaja:2009-04-19 this paper) is scripting. Here, the user provides a simple ASCII file containing commands that steer the visualization. Typically, the commands are held in plain English to make using the underlying scripting language easier. Typical examples for scripting-driven AV systems include JAWAA (Akingbade et al., 2003), JSamba (Stasko, 1998), JHAV E (Naps et al., 2000) and Animal (Roling and Freisleben, 2002) CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1550 http //nibbler.tk.informatik.tu-darmstadt.de/Publications/2004/pvw12.pdf en 10.1.1.19.5818 10.1.1.28.9812 10.1.1.103.9289 10.1.1.78.7343 10.1.1.138.1175 10.1.1.140.5036 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+75:oai CiteSeerXPSU 10.1.1.1.1551:Comparison of Clustering Algorithms in Speaker Identification:Tomi Kinnunen Teemu Kilpelinen Pasi FrÄnti:2009-04-19 Speech processing speaker identification vector In speaker identification, we match a given (unkown) speaker to the set of known speakers in a database. The database is constructed from the speech samples of each known speaker. Feature vectors are extracted from the samples by short-term spectral analysis, and processed further by vector quantization for locating the clusters in the feature space. We study the role of the vector quantization in the speaker identification system. We compare the performance of different clustering algorithms, and the influence of the codebook size. We want to find out, which method provides the best clustering result, and whether the difference in quality contribute to improvement in recognition accuracy of the system. CiteSeerX 2009-04-19 2007-11-19 0 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1551 http //cs.joensuu.fi/pages/tkinnu/research/pdf/ComparisonClusteringAlgsSpeakerRec.pdf en 10.1.1.58.5875 10.1.1.1.5615 10.1.1.58.2111 10.1.1.58.3968 10.1.1.125.5073 10.1.1.104.7507 10.1.1.78.6536 10.1.1.81.4597 10.1.1.89.5956 10.1.1.143.9804 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+76:oai CiteSeerXPSU 10.1.1.1.1552:Towards a Generic Talking Head:Brar Bailly Chabanas:2009-04-19 MPEG-4 Facial Animation Parameters and Facial Definition Parameters We present here a framework for developing a generic talking head capable of reproducing the anatomy and the facial deformations induced by speech movements with a set of a few parameters. We will show that the speaker-specific articulatory movements can be straightforward encoded into the normalized MPEG-4 Facial Animation Parameters and Facial Definition Parameters. CiteSeerX 2009-04-19 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1552 http //www.icp.inpg.fr/ICP/publis/synthese/_mb/clonegen_mb_ISSP03.pdf en 10.1.1.128.4967 10.1.1.30.2775 10.1.1.130.9240 10.1.1.15.8634 10.1.1.31.3794 10.1.1.142.982 10.1.1.32.7228 10.1.1.42.3468 10.1.1.52.5953 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+77:oai CiteSeerXPSU 10.1.1.1.1553:Language Policy and Localization in Pakistan Proposal for a Paradigmatic Shift:Tariq Rahman:2009-04-19 This paper examines the present language policy of Pakistan and its consequences for the indigenous languages of the country. It then relates this to efforts at localization--- creating computer software in the languages of the country---and argues that all such efforts have been power-oriented. This means that only those languages have been selected for localization which are used in the domains of power---government, bureaucracy, judiciary, military, commerce, media, education, research etc---thus further strengthening them vis a vis the marginalized languages of the people. It is therefore argued that the efforts at localization should be rights-based i.e. all language communities should be considered equal and their languages should be localized not because of their present use in the domains of power but because they too should be strengthened by being put to such use. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1553 http //www.elda.fr/en/proj/scalla/SCALLA2004/rahman.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+78:oai CiteSeerXPSU 10.1.1.1.1554:BioMed Central:Bmc Medical Informatics Biomed Central Cynthia S Gadd Open Access Kathleen Ann Mckibbon Kathleen Ann Mckibbon Cynthia S Gadd:2009-04-19 Background Quantitative studies are becoming more recognized as important to understanding health care with all of its richness and complexities. The purpose of this descriptive survey was to provide a quantitative evaluation of the qualitative studies published in 170 core clinical journals for 2000. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1554 http //www.biomedcentral.com/content/pdf/1472-6947-4-11.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+79:oai CiteSeerXPSU 10.1.1.1.1555:Object-Relational Management of Multiply Represented Geographic Entities:Anders Friis-Christensen National Anders Friis-christensen:2009-04-19 Multiple representation occurs when information about the same geographic entity is represented electronically more than once. This occurs frequently in practice, and it invariably results in the occurrence of inconsistencies among the different representations. We propose to resolve this situation by introducing a multiple representation management system (MRMS), the schema of which includes rules that specify how to identify representations of the same entity, rules that specify consistency requirements, and rules used to restore consistency when necessary. In this paper, we demonstrate by means of a prototype and a realworld case study that it is possible to implement a multiple representation schema language on top of an objectrelational database management system. Specifically, it is demonstrated how it is possible to map the constructs of the language used for specifying the multiple representation schema to functionality available in Oracle. Though some limitations exist, Oracle has proven to be a suitable platform for implementing an MRMS. CiteSeerX IEEE Computer Society 2009-04-19 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1555 http //www.cs.auc.dk/~csj/Papers/Files/2003_friis-christensenSSDM.pdf en 10.1.1.118.6768 10.1.1.101.5051 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+80:oai CiteSeerXPSU 10.1.1.1.1556:Netview Application Software for Constructing and Visually Exploring Phylogenetic Networks:Kirill Kryukov Naruya Saitou:2009-04-19 phylogenetic network neighbor-joining method Introduction Reconstructing evolutionary history of a group of species is a major task in biological study. Many methods exist for reconstructing such history, or phylogeny, but most of them are based on an assumption that evolution of given gene family can be represented as a tree. However some families of genes may have alternative historical structure that cannot be represented as a tree. Such history can result from events such as recombination, gene conversion and horizontal gene transfer, and require not tree but network for accurate representation. Parallel substitutions are also sources of non-tree networks. For the purpose of understanding such history we developed a program Netview, which enables constructing phylogenetic network based on the sequence data. 2 Netview Netview uses following approach to construct a network First, it constructs a NJ-tree [1] for provided set of sequences, using nucleotide di#erence distance matrix (Fig. 1). Then every nucleotide site positio CiteSeerX 2009-04-19 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1556 http //hc.ims.u-tokyo.ac.jp/JSBi/journal/GIW03/GIW03SS06.pdf en 10.1.1.85.2481 10.1.1.134.5815 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+81:oai CiteSeerXPSU 10.1.1.113.6268:Multiple Dynamic View Support for Cooperative Work:Yusuke Yokota Tatsuya Nakamura Hiroyuki Tarumi Yahiko Kambayashi:2008-08-14 VIEW Media is a cooperative hypermedia system, which supports cooperative work utilizing hypermedia documents. It provides fundamental functions for cooperative work support system and enables developers to construct various groupware such as distributed presentation systems, education systems, conference systems, and so forth. This system provides a powerful and flexible mechanism of customization. The mechanism can alter presentation, structure, behavior and authority of hypermedia documents and workspace, which supports dynamic change of roles of users. This paper describes the purpose of VIEW Media, its basic model and implementation, and a user interface which supports conferences among users who have different access rights to shared documents. 1. CiteSeerX 2008-08-14 2008-08-14 1999 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.113.6268 http //ieeexplore.ieee.org/iel5/6197/16540/00765767.pdf en 10.1.1.139.1894 10.1.1.21.4748 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+82:oai CiteSeerXPSU 10.1.1.21.7175:Adding View Support to ODMG-93:M. Dobrovnik J. Eder:2009-04-15 A concept to introduce external models in object oriented databases is presented, such that application programs do no longer interface directly the whole conceptual schema, but work against external schemas specifically designed for the applications requirements. There are virtually no restrictions for such applications, since the interaction with the database takes place via updateable views. The data model is a somewhat simplified form of ODMG-93 [4], where we incorporated the additional constructs we need for the external schema definition. The approach makes a clear distinction between types and classes, and also separates the type and class hierarchies of the conceptual schema from the external type and class hierarchies. With type derivation, we provide a powerful type restructuring mechanism, which allows to define an external type which is based on a conceptual type. In the derivation process, one can omit conceptual components and methods or redefine their types. Additional methods can be defined for external types as well. 1 To appear in "Advances in Databases and Information Systems ADBIS '94" , Proc. of the Intl. Workshop of the Moscow ACM SIGMOD Chapter By defining well formed external schemas via constraints and schema invariants, we are able to guarantee unambiguous method resolution, steadiness of method resolution and compliance with the covariant subtyping principle. The full semantics of the conceptual schema are preserved. The designer of the external schema can make use of all information contained in the conceptual schema, in particular conceptual methods can be called from externally defined ones. In this paper, we concentrate on the area of type derivation and method resolution. 1 CiteSeerX 2009-04-15 2007-11-21 1994 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.21.7175 http //www.ifi.uni-klu.ac.at/Publications/pubfiles/psfiles/1994-0001-DoEd.ps en 10.1.1.49.5388 10.1.1.45.855 10.1.1.45.224 10.1.1.50.3649 10.1.1.92.5802 10.1.1.25.3017 10.1.1.96.1587 10.1.1.42.7156 10.1.1.17.6038 10.1.1.105.8423 10.1.1.85.8868 10.1.1.86.6171 10.1.1.89.4262 10.1.1.39.5730 10.1.1.33.2495 10.1.1.28.4618 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+83:oai CiteSeerXPSU 10.1.1.31.2167:Study of the Charge Spectra Generated By Photomultipliers.:Cavasinni Cologna B. Di Girolamo G. Renzoni:2009-04-12 INTRODUCTION In this note we discuss the measurements of light yield from scintillator and wavelenght-shifting fibres (WLS) performed through the analysis of the charge distribution provided by a photomultiplier. This procedure was used in ref [1]. The analysis procedures were verified by a simple montecarlo program which generates the photomultiplier spectra. 2 THE EXPERIMENTAL APPARATUS These measurements have been carried out in the optoelectronic laboratory in Pisa with two different setups. GPIB-CAMAC interface PM 1 Fic system OS/9 Computer -fic monitor 12345... -off-line analisys Fibre Fibre Scintillator Support Dark Room Up View Support Source Digital Multimeter Figure 1 Experimental apparatus used for the measurements of charge spectra In the first case, as shown in the figure 1, the fibre is placed on proper supports at about 10 cm from the surface of an optical bench. The fibre, coupled to a small scintillat CiteSeerX 2009-04-12 2007-11-22 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.31.2167 http //atlasinfo.cern.ch/Atlas/SUB_DETECTORS/TILE/tileref/note117/fnote.ps.Z en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+84:oai CiteSeerXPSU 10.1.1.49.2435:TIGUKAT An Object Model for Query and View Support in Object Database Systems:Randal J. Peters M. Tamer Özsu Duane Szafron:2009-04-12 Object-oriented computing is influencing many areas of computer science including software engineering, user interfaces, operating systems, programming languages and database systems. The appeal of object-orientation is attributed to its higher levels of abstraction for modeling real world concepts, its support for incremental development and its potential for interoperability. Despite many advances, object-oriented computing is still in its infancy and a universally acceptable definition of an object-oriented data model is virtually nonexistent, although some standardization efforts are underway. This report presents the TIGUKAT 1 object model definition that is the result of an investigation of object-oriented modeling features which are common among earlier proposals, along with some distinctive qualities that extend the power and expressibility of this model beyond others. The literature recognizes two perspectives of an object model the structural view and the behavioral view. ... CiteSeerX 2009-04-12 2007-11-22 1992 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.49.2435 ftp //menaik.cs.ualberta.ca/pub/TechReports/1992/TR92-14/TR92-14.ps.Z en 10.1.1.123.1085 10.1.1.116.1298 10.1.1.48.1422 10.1.1.27.6071 10.1.1.84.9469 10.1.1.97.9167 10.1.1.11.8792 10.1.1.48.8578 10.1.1.31.7627 10.1.1.42.7725 10.1.1.48.2199 10.1.1.45.224 10.1.1.44.4971 10.1.1.34.624 10.1.1.86.4262 10.1.1.31.7627 10.1.1.32.4444 10.1.1.51.5051 10.1.1.32.4382 10.1.1.51.9134 10.1.1.52.9346 10.1.1.56.204 10.1.1.21.1999 10.1.1.45.4879 10.1.1.105.8443 10.1.1.53.2111 10.1.1.53.4468 10.1.1.29.8669 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+85:oai CiteSeerXPSU 10.1.1.37.8818:Overview of Multidatabase Transaction Management:Yuri Breitbart Hector Garcia-Molina Avi Silberschatz:2009-06-22 A multidatabase system (MDBS) is a facility that allows users access to data located in multiple autonomous database management systems (DBMSs). In such a system, global transactions are executed under the control of the MDBS. Independently, local transactions are executed under the control of the local DBMSs. Each local DBMS integrated by the MDBS may employ a different transaction management scheme. In addition, each local DBMS has complete control over all transactions (global and local) executing at its site, including the ability to abort at any point any of the transactions executing at its site. Typically, no design or internal DBMS structure changes are allowed in order to accommodate the MDBS. Furthermore, the local DBMSs may not be aware of each other, and, as a consequence, cannot coordinate their actions. Thus, traditional techniques for ensuring transaction atomicity and consistency in homogeneous distributed database systems may not be appropriate for an MDBS environment.... CiteSeerX 2009-06-22 2007-11-22 1992 text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.37.8818 ftp //ftp.cs.utexas.edu/pub/avi/UT-CS-TR-92-21.PS.Z en 10.1.1.101.8988 10.1.1.130.1772 10.1.1.38.6210 10.1.1.34.3768 10.1.1.36.1275 10.1.1.104.3430 10.1.1.112.244 10.1.1.94.9106 10.1.1.41.4043 10.1.1.49.5143 10.1.1.59.2034 10.1.1.53.875 10.1.1.137.5642 10.1.1.41.8832 10.1.1.21.1100 10.1.1.105.3626 10.1.1.44.773 10.1.1.21.2576 10.1.1.40.6484 10.1.1.144.2713 10.1.1.48.6718 10.1.1.16.6166 10.1.1.40.832 10.1.1.36.2660 10.1.1.30.3087 10.1.1.47.322 10.1.1.17.6532 10.1.1.33.2301 10.1.1.20.4306 10.1.1.47.6258 10.1.1.39.9212 10.1.1.46.4334 10.1.1.71.485 10.1.1.43.1405 10.1.1.49.1308 10.1.1.35.6530 10.1.1.42.5177 10.1.1.54.4068 10.1.1.133.3692 10.1.1.40.4220 10.1.1.48.7743 10.1.1.26.575 10.1.1.107.596 10.1.1.116.3495 10.1.1.33.2074 10.1.1.38.7229 10.1.1.59.4464 10.1.1.103.9562 10.1.1.36.5887 10.1.1.40.9658 10.1.1.53.6783 10.1.1.29.5010 10.1.1.107.876 10.1.1.46.2273 10.1.1.46.3657 10.1.1.49.5281 10.1.1.50.4114 10.1.1.63.3234 10.1.1.79.9607 10.1.1.83.4819 10.1.1.83.4980 10.1.1.84.8136 10.1.1.90.953 10.1.1.90.9785 10.1.1.92.2397 10.1.1.93.8911 10.1.1.94.3702 10.1.1.97.672 10.1.1.98.4604 10.1.1.117.6190 10.1.1.118.4814 10.1.1.130.880 10.1.1.137.1167 10.1.1.51.5111 10.1.1.45.2774 10.1.1.45.9165 10.1.1.40.4684 10.1.1.35.5866 10.1.1.38.3606 10.1.1.29.9166 10.1.1.31.3667 10.1.1.21.7181 10.1.1.33.2343 10.1.1.23.3117 10.1.1.24.7879 10.1.1.18.8936 10.1.1.19.3770 10.1.1.19.5246 10.1.1.12.3293 10.1.1.2.2325 10.1.1.60.116 10.1.1.140.5244 10.1.1.143.3448 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+86:oai CiteSeerXPSU 10.1.1.54.6302:Overview of Multidatabase Transaction Management:Yuri Breitbart Hector Garcia-molina Avi Silberschatz:2009-04-12 A multidatabase system (MDBS) is a facility that allows users access to data located in multiple autonomous database management systems (DBMSs). In such a system, global transactions are executed under the control of the MDBS. Independently, local transactions are executed under the control of the local DBMSs. Each local DBMS integrated by the MDBS may employ a different transaction management scheme. In addition, each local DBMS has complete control over all transactions (global and local) executing at its site, including the ability to abort at any point any of the transactions executing at its site. Typically, no design or internal DBMS structure changes are allowed in order to accommodate the MDBS. Furthermore, the local DBMSs may not be aware of each other, and, as a consequence, cannot coordinate their actions. Thus, traditional techniques for ensuring transaction atomicity and consistency in homogeneous distributed database systems may not be appropriate for an MDBS environment.... CiteSeerX 2009-04-12 2007-11-22 1992 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.6302 http //www-db.stanford.edu/pub/papers/multidatabase.ps en 10.1.1.101.8988 10.1.1.130.1772 10.1.1.38.6210 10.1.1.34.3768 10.1.1.36.1275 10.1.1.104.3430 10.1.1.112.244 10.1.1.94.9106 10.1.1.41.4043 10.1.1.49.5143 10.1.1.59.2034 10.1.1.53.875 10.1.1.137.5642 10.1.1.41.8832 10.1.1.21.1100 10.1.1.105.3626 10.1.1.44.773 10.1.1.21.2576 10.1.1.40.6484 10.1.1.144.2713 10.1.1.48.6718 10.1.1.16.6166 10.1.1.40.832 10.1.1.36.2660 10.1.1.30.3087 10.1.1.47.322 10.1.1.17.6532 10.1.1.33.2301 10.1.1.20.4306 10.1.1.47.6258 10.1.1.39.9212 10.1.1.46.4334 10.1.1.71.485 10.1.1.43.1405 10.1.1.49.1308 10.1.1.35.6530 10.1.1.42.5177 10.1.1.54.4068 10.1.1.133.3692 10.1.1.40.4220 10.1.1.48.7743 10.1.1.26.575 10.1.1.107.596 10.1.1.116.3495 10.1.1.33.2074 10.1.1.38.7229 10.1.1.59.4464 10.1.1.103.9562 10.1.1.36.5887 10.1.1.40.9658 10.1.1.53.6783 10.1.1.29.5010 10.1.1.107.876 10.1.1.46.2273 10.1.1.46.3657 10.1.1.49.5281 10.1.1.50.4114 10.1.1.63.3234 10.1.1.79.9607 10.1.1.83.4819 10.1.1.83.4980 10.1.1.84.8136 10.1.1.90.953 10.1.1.90.9785 10.1.1.92.2397 10.1.1.93.8911 10.1.1.94.3702 10.1.1.97.672 10.1.1.98.4604 10.1.1.117.6190 10.1.1.118.4814 10.1.1.130.880 10.1.1.137.1167 10.1.1.51.5111 10.1.1.45.2774 10.1.1.45.9165 10.1.1.40.4684 10.1.1.35.5866 10.1.1.38.3606 10.1.1.29.9166 10.1.1.31.3667 10.1.1.21.7181 10.1.1.33.2343 10.1.1.23.3117 10.1.1.24.7879 10.1.1.18.8936 10.1.1.19.3770 10.1.1.19.5246 10.1.1.12.3293 10.1.1.2.2325 10.1.1.60.116 10.1.1.140.5244 10.1.1.143.3448 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+87:oai CiteSeerXPSU 10.1.1.59.5918:Coupling OWL with MPEG-7 and TV-Anytime for Domain-specific Multimedia Information Integration and Retrieval:Chrisa Tsinaraki Panagiotis Polydoros Nektarios Moumoutzis Stavros Christodoulakis:2009-04-19 The success of the Web is due to a large extent in the development of standards that allow interoperability in open environments. Future work in the field will have greater impact if it is based and built on existing standards. Well-accepted international standards for multimedia content descriptions are MPEG-7 and TV-Anytime. However, these standards do not propose a concrete methodology and language for the integration of domainspecific knowledge for the multimedia content. Moreover, domain-specific knowledge for a specific domain related to the content of a video may be described in a well-accepted ontology description language such as OWL, which is independent of MPEG-7 and TV-Anytime. CiteSeerX 2009-04-19 2008-02-05 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.59.5918 http //www.riao.org/Proceedings-2004/papers/1330.pdf en 10.1.1.103.1819 10.1.1.104.7893 10.1.1.106.3954 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+88:oai CiteSeerXPSU 10.1.1.43.3839:Specification and Execution of Transactional Workflows:Marek Rusinkiewicz Amit Sheth:2009-04-13 The basic transaction model has evolved over time to incorporate more complex transaction structures and to selectively modify the atomicity and isolation properties. In this chapter we discuss the application of transaction concepts to activities that involve coordinated execution of multiple tasks (possibly of different types) over different processing entities. Such applications are referred to as transactional workflows. In this chapter we discuss the specification of such workflows and the issues involved in their execution. 1 What is a Workflow? Workflows are activities involving the coordinated execution of multiple tasks performed by different processing entities. A task defines some work to be done and can be specified in a number of ways, including a textual description in a file or an email, a form, a message, or a computer program. A processing entity that performs the tasks may be a person or a software system (e.g., a mailer, an application program, a database mana... CiteSeerX ACM Press 2009-04-13 2007-11-22 1995 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.43.3839 http //lsdis.cs.uga.edu/lib/././download/RS93.ps en 10.1.1.17.1323 10.1.1.59.5051 10.1.1.38.6210 10.1.1.68.7445 10.1.1.109.5175 10.1.1.17.7962 10.1.1.44.7778 10.1.1.112.244 10.1.1.13.7602 10.1.1.102.7874 10.1.1.41.4043 10.1.1.49.5143 10.1.1.41.7252 10.1.1.17.3225 10.1.1.54.7761 10.1.1.55.5255 10.1.1.108.958 10.1.1.35.7733 10.1.1.52.3682 10.1.1.36.1618 10.1.1.45.6317 10.1.1.43.3180 10.1.1.35.8718 10.1.1.44.6365 10.1.1.51.2883 10.1.1.50.9206 10.1.1.6.9085 10.1.1.30.1707 10.1.1.80.6634 10.1.1.49.355 10.1.1.127.3550 10.1.1.35.3562 10.1.1.137.8832 10.1.1.49.4085 10.1.1.41.5506 10.1.1.40.4657 10.1.1.43.2369 10.1.1.40.832 10.1.1.74.5411 10.1.1.90.4428 10.1.1.110.6967 10.1.1.27.2122 10.1.1.15.5605 10.1.1.54.727 10.1.1.49.7512 10.1.1.45.8796 10.1.1.50.5984 10.1.1.53.137 10.1.1.30.3262 10.1.1.28.1680 10.1.1.21.7110 10.1.1.29.3148 10.1.1.57.687 10.1.1.59.5924 10.1.1.46.2812 10.1.1.51.5552 10.1.1.17.7375 10.1.1.40.1598 10.1.1.52.9787 10.1.1.1.3496 10.1.1.50.6791 10.1.1.55.3358 10.1.1.137.7582 10.1.1.118.4127 10.1.1.49.3580 10.1.1.35.5825 10.1.1.46.9382 10.1.1.31.7411 10.1.1.48.5504 10.1.1.55.5163 10.1.1.18.1603 10.1.1.52.8129 10.1.1.1.9723 10.1.1.21.9113 10.1.1.49.7644 10.1.1.52.6646 10.1.1.75.3106 10.1.1.80.2072 10.1.1.55.8770 10.1.1.54.8188 10.1.1.101.7919 10.1.1.104.8176 10.1.1.24.5741 10.1.1.29.4667 10.1.1.4.1055 10.1.1.48.9175 10.1.1.56.792 10.1.1.65.3172 10.1.1.66.5947 10.1.1.73.8532 10.1.1.83.8299 10.1.1.86.8521 10.1.1.87.2402 10.1.1.87.4648 10.1.1.90.5638 10.1.1.91.1709 10.1.1.94.4248 10.1.1.114.511 10.1.1.119.5037 10.1.1.124.7957 10.1.1.49.215 10.1.1.53.7777 10.1.1.53.9711 10.1.1.45.9409 10.1.1.40.8789 10.1.1.43.4845 10.1.1.34.8273 10.1.1.35.4783 10.1.1.28.3176 10.1.1.16.8151 10.1.1.8.9117 10.1.1.58.3449 10.1.1.142.7041 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+89:oai CiteSeerXPSU 10.1.1.33.8596:Dynamic Query Optimization and Query Processing in Multidatabase Systems 1.:Henryk Josinski:2009-04-15 Introduction The multidatabase system (MDBS) approach, as a solution for integrated access to information distributed among diverse data sources, has gained a lot of attention in recent years. The multidatabase system is a database system which integrates pre--existing databases allowing the users to access simultaneously database systems (DBMSs) formulating a global query based on a global schema. The component DBMSs are assumed to be heterogeneous and autonomous. Heterogeneity refers to different user interfaces, data models, query languages, and query optimization strategies [5]. Local autonomy means that each DBMS retains complete control over local data and processing. As result of this, its cost model may not be available to the global query optimizer. When a global query is submitted, it is decomposed into two types of queries [1] -- subqueries, operating on sharable data items from local databases, -- assembling queries, consisting of, CiteSeerX 2009-04-15 2007-11-22 2000 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.33.8596 http //www.edbt2000.uni-konstanz.de/phd-workshop/papers/Josinski.pdf en 10.1.1.27.4704 10.1.1.51.8352 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+90:oai CiteSeerXPSU 10.1.1.55.2467:Using Reasoning of Description Logics for Query Processing in Multidatabase Systems:Alfredo Goñi Jesús Bermúdez José M. Blanco Arantza Illarramendi:2009-04-12 . Nowadays, the interest to work simultaneously with data stored in several databases is growing. Multidatabase Systems (MDBS) have been proposed as a solution to work with different pre-existing autonomous databases. Federated Database Systems (FDBS) are a special type of MDBS where an integrated schema is provided. This integrated schema is the result of an integration process among the schemata of the pre-existing autonomous databases. In our case we have built a FDBS that integrates several heterogeneous relational databases by using a particular type of Knowledge Representation system based on Description Logics (DL system) . The integrated schema is represented as a terminology formed by a set of classes and attributes. Although there has been a lot of research about the problems of translation and integration of schemata to obtain integrated ones, the problem of query processing against these integrated schemata has not been treated so much. In this paper we present an overview ... CiteSeerX 2009-04-12 2007-11-22 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.55.2467 http //sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-4/goni.ps en 10.1.1.32.6380 10.1.1.51.9343 10.1.1.47.9649 10.1.1.126.7225 10.1.1.7.9769 10.1.1.65.4869 10.1.1.32.7239 10.1.1.16.6464 10.1.1.50.3099 10.1.1.1.7655 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+91:oai CiteSeerXPSU 10.1.1.55.482:A Shared View of Sharing The Treaty of Orlando:Lynn Andrea Stein Henry Lieberman David Ungar:2009-04-12 Introduction For the past few years, researchers have been debating the relative merits of object-oriented languages with classes and inheritance as opposed to those with prototypes and delegation. It has become clear that the object-oriented programming language design space is not a dichotomy. Instead, we have identified two fundamental mechanisms---templates and empathy---and several different independent degrees of freedom for each. Templates create new objects in their own image, providing guarantees about the similarity of group members. Empathy allows an object to act as if it were some other object, thus providing sharing of state and behavior. The Smalltalk-80 TM language, 1 Actors, Lieberman's Delegation system, Self, and Hybrid each take differing stands on the forms of templates 1 Smalltalk-80 TM is a trademark of Par CiteSeerX ACM Press 2009-04-12 2007-11-22 1989 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.55.482 http //lcs.www.media.mit.edu/people/lieber/Lieberary/OOP/Treaty/Treaty.ps en 10.1.1.26.9545 10.1.1.118.6579 10.1.1.48.69 10.1.1.57.5195 10.1.1.9.570 10.1.1.47.511 10.1.1.127.5320 10.1.1.100.4334 10.1.1.5.3348 10.1.1.39.3374 10.1.1.56.4713 10.1.1.61.2065 10.1.1.27.3015 10.1.1.1.5960 10.1.1.67.5433 10.1.1.31.8109 10.1.1.68.4062 10.1.1.49.3986 10.1.1.122.9331 10.1.1.46.8283 10.1.1.54.5230 10.1.1.16.2055 10.1.1.137.5180 10.1.1.43.5722 10.1.1.68.2105 10.1.1.35.1247 10.1.1.30.1415 10.1.1.7.5014 10.1.1.102.3946 10.1.1.105.6469 10.1.1.26.223 10.1.1.26.8645 10.1.1.35.4104 10.1.1.39.6986 10.1.1.41.7822 10.1.1.42.9056 10.1.1.53.9325 10.1.1.71.1802 10.1.1.76.6993 10.1.1.89.9613 10.1.1.121.5599 10.1.1.122.3737 10.1.1.127.1894 10.1.1.55.5674 10.1.1.37.8260 10.1.1.2.2077 10.1.1.24.5782 10.1.1.19.780 10.1.1.2.4148 10.1.1.2.4173 10.1.1.131.902 10.1.1.30.2927 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+92:oai CiteSeerXPSU 10.1.1.13.2374:Integrated Office Systems:O. M. Nierstrasz D. C. Tsichritzis:2009-04-17 Introduction New techniques are sorely needed to aid in the development and maintenance of large application systems. The problem with traditional approaches to software engineering is well in evidence in the field of o#ce information systems it is costly and di#cult to extend existing applications, and to get unrelated applications to "talk" to each other. The objectoriented approach is already being tentatively applied in the modeling of "o#ce objects" and in the presentation of these entities to users as such in "desktop" interfaces to o#ce software. In order to fully exploit the approach to achieve integrated o#ce systems, we need to use object-oriented programming languages, object-oriented run-time support, and object-oriented software engineering environments. We can view the fundamental idea behind the object-oriented approach as that of encapsulation object-oriented languages and systems exploit encapsulation in various ways in an attempt to enhance productivity through, f CiteSeerX 2009-04-17 2007-11-21 1988 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.13.2374 http //www.iam.unibe.ch/~scg/Archive/OSG/Nier89bIntegOfficeSystems.pdf en 10.1.1.26.9545 10.1.1.65.5865 10.1.1.34.624 10.1.1.12.8544 10.1.1.144.6983 10.1.1.26.6746 10.1.1.49.3064 10.1.1.30.4607 10.1.1.38.4894 10.1.1.20.8197 10.1.1.26.4381 10.1.1.29.1890 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+93:oai CiteSeerXPSU 10.1.1.42.9253:Integrated Office Systems:O. M. Nierstrasz D. C. Tsichritzis:2009-04-11 Introduction New techniques are sorely needed to aid in the development and maintenance of large application systems. The problem with traditional approaches to software engineering is well in evidence in the field of office information systems it is costly and difficult to extend existing applications, and to get unrelated applications to "talk" to each other. The objectoriented approach is already being tentatively applied in the modeling of "office objects" and in the presentation of these entities to users as such in "desktop" interfaces to office software. In order to fully exploit the approach to achieve integrated office systems, we need to use object-oriented programming languages, object-oriented run-time support, and object-oriented software engineering environments. We can view the fundamental idea behind the object-oriented approach as that of encapsulation object-oriented languages and systems exploit encapsulation in various ways in an attempt t CiteSeerX ACM Press and Addison-Wesley 2009-04-11 2007-11-22 1988 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.42.9253 ftp //ftp.iam.unibe.ch/pub/scg/Papers/integratedOfficeSystems.ps.gz en 10.1.1.26.9545 10.1.1.65.5865 10.1.1.34.624 10.1.1.12.8544 10.1.1.144.6983 10.1.1.26.6746 10.1.1.49.3064 10.1.1.30.4607 10.1.1.38.4894 10.1.1.20.8197 10.1.1.26.4381 10.1.1.29.1890 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+94:oai CiteSeerXPSU 10.1.1.86.3568:Topics Selected Reference References The following books are useful references.:Alfred V. Aho John E. Hopcroft:2008-07-01 but it lacks topics in network flows and linear programming, as well as more recent algorithms. It is amazing that after more than twenty years it remains an extremely valuable book. 2. Alfred V. Aho, John E. Hopcroft, and Jeffrey D. Ullman. Data Structures and Algorithms. Addison-Wesley, 1983. Revised and more elementary version of the first six chapters of The Design and Analysis of Computer Algorithms. CiteSeerX 2008-07-01 2008-04-01 2000 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.86.3568 http //graphics.stanford.edu/courses/cs161-00-winter/handouts/references.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+95:oai CiteSeerXPSU 10.1.1.100.9631:[3] S. Abiteboul, R. Hull, and V. Vianu. Foundations of Databases. Addison-:S. Acharya P. B. Gibbons V. Poosala S. Ramaswamy The Aqua:2008-07-01 [4] S. Abiteboul and P. Kanellakis. Object identity as a query language primitive. CiteSeerX 2008-07-01 2008-04-02 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.100.9631 http //www.ateneonline.it/rama/6114-6_biblio.pdf en 10.1.1.17.2504 10.1.1.29.4781 10.1.1.96.1350 10.1.1.19.1226 10.1.1.120.576 10.1.1.28.4924 10.1.1.43.2125 10.1.1.40.8642 10.1.1.28.7845 10.1.1.91.7258 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+96:oai CiteSeerXPSU 10.1.1.25.5524:Games and total Datalog ... Queries:Jörg Flum Max Kubierschky Bertram Ludäscher:2009-04-16 We show that the expressive power of Datalog @ programs under the well-founded semantics does not decrease when restricted to total programs thereby affirmatively answering an open question posed by Abiteboul et al. (Foundations of Databases, Addison-Wesley, Reading, MA, 1995). In particular, we show that for every such program there exists an equivalent total program whose only recursive rule is of the form win( # X ) # move( # X # Y ) @win( # Y ) where move is definable by a quantifier-free first-order formula. Also, for the non-inflationary semantics we derive a new normal form whose only recursive rule simulates a version of the game of life. CiteSeerX 2009-04-16 2007-11-21 2000 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.25.5524 http //www.sdsc.edu/~ludaesch/Paper/tcs2000.pdf en 10.1.1.53.8258 10.1.1.32.9446 10.1.1.144.1424 10.1.1.46.9441 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+97:oai CiteSeerXPSU 10.1.1.62.2095:<author><last-name>Abiteboul</last-name></author> <author><last-name>Hull</last-name></author> <author><last-name>Vianu</last-name></author> <title>Foundations of Databases</title>:Peter Wood Overview Of Xml:2008-07-01 magazine → title volume issue date • event-condition-action (ECA) rules for XML • analysing ECA rules • containment and equivalence of XPath queries • conclusions and future work CiteSeerX 2008-07-01 2008-02-06 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.62.2095 http //www.dcs.bbk.ac.uk/~ptw/analysis.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+98:oai CiteSeerXPSU 10.1.1.49.2910:Active Database Systems:Umeshwar Dayal Eric N. Hanson Jennifer Widom:2009-04-12 In Won Kim editor Modern Database Systems The Object Model Integrating a production rules facility into a database system provides a uniform mechanism for a number of advanced database features including integrity constraint enforcement, derived data maintenance, triggers, alerters, protection, version control, and others. In addition, a database system with rule processing capabilities provides a useful platform for large and efficient knowledge-base and expert systems. Database systems with production rules are referred to as active database systems, and the field of active database systems has indeed been active. This chapter summarizes current work in active database systems topics covered include active database rule models and languages, rule execution semantics, and implementation issues. 1 Introduction Conventional database systems are passive they only execute queries or transactions explicitly submitted by a user or an application program. For many applications, however, it is important to monitor situations of interest, and to ... CiteSeerX ACM Press 2009-04-12 2007-11-22 1994 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.49.2910 http //www-db.stanford.edu/pub/papers/book-chapter.ps en 10.1.1.17.1323 10.1.1.143.7196 10.1.1.50.3821 10.1.1.51.9946 10.1.1.41.2030 10.1.1.46.2504 10.1.1.52.4421 10.1.1.38.2083 10.1.1.34.661 10.1.1.103.7630 10.1.1.100.9015 10.1.1.97.1699 10.1.1.107.4220 10.1.1.47.9217 10.1.1.133.7157 10.1.1.101.5051 10.1.1.30.9989 10.1.1.53.6941 10.1.1.50.8529 10.1.1.133.4287 10.1.1.50.7278 10.1.1.10.1688 10.1.1.19.8669 10.1.1.44.7600 10.1.1.144.376 10.1.1.44.1348 10.1.1.47.9998 10.1.1.90.4428 10.1.1.108.344 10.1.1.48.9470 10.1.1.53.5472 10.1.1.52.4872 10.1.1.144.4965 10.1.1.31.7578 10.1.1.32.6426 10.1.1.58.6335 10.1.1.85.8052 10.1.1.93.1931 10.1.1.55.4610 10.1.1.21.3821 10.1.1.26.9208 10.1.1.31.4869 10.1.1.48.1833 10.1.1.83.8628 10.1.1.87.9318 10.1.1.90.2195 10.1.1.36.5184 10.1.1.21.1704 10.1.1.53.1733 10.1.1.90.3181 10.1.1.53.6783 10.1.1.52.6151 10.1.1.104.6911 10.1.1.105.1691 10.1.1.21.1984 10.1.1.23.2775 10.1.1.62.5556 10.1.1.68.9063 10.1.1.74.4746 10.1.1.78.5097 10.1.1.84.743 10.1.1.84.904 10.1.1.87.6019 10.1.1.88.3907 10.1.1.89.9631 10.1.1.90.4147 10.1.1.92.365 10.1.1.100.2747 10.1.1.98.5083 10.1.1.98.6663 10.1.1.99.1894 10.1.1.99.8174 10.1.1.133.8073 10.1.1.52.7823 10.1.1.39.5341 10.1.1.35.3458 10.1.1.26.4620 10.1.1.18.8936 10.1.1.19.3694 10.1.1.12.631 10.1.1.48.6394 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+99:oai CiteSeerXPSU 10.1.1.103.3060:AN OUT-OF-CORE SPARSE SYMMETRIC INDEFINITE FACTORIZATION METHOD:Omer Meshar Sivan Toledo:2008-07-01 Abstract. We present a new out-of-core sparse symmetric-indefinite factorization algorithm. The most significant innovation of the new algorithm is a dynamic partitioning method for the sparse factor. This partitioning method results in very low input-output traffic and allows the algorithm to run at high computational rates even though the factor is stored on a slow disk. Our implementation of the new code compares well with both high-performance incore sparse symmetric-indefinite codes and with a high-performance out-of-core sparse Cholesky code. More specifically, the new code provides a new capability that none of these existing codes has it can factor symmetric indefinite matrices whose factors are larger than main memory it is somewhat slower, but not by much. For example, it factors, on a conventional 32-bit workstation, an indefinite finite-element matrix whose factor size is about 10 GB in less than an hour. 1. CiteSeerX 2008-07-01 2008-04-03 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.103.3060 http //www.cs.tau.ac.il/~stoledo/Pubs/ooc-indef.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+100:oai CiteSeerXPSU 10.1.1.33.948:Model-Checking CSP-Z Strategy, Tool Support and Industrial Application:Alexandre Mota Augusto Sampaio:2009-04-15 Key words Model-Checking Linking Theories and Tools Industrial Case Study Formal Verication Concurrent and Model-Based Specications satellite Model-checking is now widely accepted as an ecient method for analysing computer system properties, such as deadlock-freedom. Its practical applicability is due to existing automatic tools which deal with tedious proofs. Another research area of increasing interest is formal language integration where the capabilities of each language are used to capture precisely some aspects of a system. In this paper we propose a general strategy for model-checking CSP-Z specications using as tool support the FDR model-checker. The CSP-Z language is a semantical integration of CSP and Z, such that CSP handles the concurrent aspects of a system, and Z the data structures part. We also present a modular approach for model-checking complex CSP-Z specications, specically to verify deadlock-freedom. Finally, we present a CSP-Z specication for a subset of a real Brazilian articial microssatellite, and apply the proposed strategy to prove that this specication is deadlock-free. Key words Model-Chec... CiteSeerX 2009-04-15 2007-11-22 2001 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.33.948 http //www.di.ufpe.br/~acm/Publications/Journal99.ps.gz en 10.1.1.139.4033 10.1.1.51.8952 10.1.1.49.7093 10.1.1.70.224 10.1.1.33.1213 10.1.1.29.1650 10.1.1.29.2879 10.1.1.26.8614 10.1.1.130.3103 10.1.1.4.3615 10.1.1.88.2106 10.1.1.107.9213 10.1.1.65.6682 10.1.1.71.769 10.1.1.98.2521 10.1.1.117.2016 10.1.1.58.1370 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
diff --git a/asterix-fuzzyjoin/data/pub-small/raw.csx-000/part-00000 b/asterix-fuzzyjoin/data/pub-small/raw.csx-000/part-00000
new file mode 100644
index 0000000..0813657
--- /dev/null
+++ b/asterix-fuzzyjoin/data/pub-small/raw.csx-000/part-00000
@@ -0,0 +1,100 @@
+oai CiteSeerXPSU 10.1.1.39.1830:Object SQL - A Language for the Design and Implementation of Object Databases:Jurgen Annevelink Rafiul Ahad Amelia Carlson Dan Fishman Mike Heytens William Kent:2009-04-13 ly, a function application expression consists of two expressions a function reference (labelled func_ref in Figure 3 line 2), and an argument (labelled arg). The func_ref expression evaluates to a (generic or specific) function identifier, which may be the same as the function that the expression is a part of, thus allowing recursive function invocations. The expression labelled arg evaluates to an arbitrary object or aggregate object. The semantics of evaluating function applications was discussed in detail in section 2. For example, to set the name of a person, we evaluate the following expression FunAssign(function name.person) (p1,'John') In this example, the first expression is itself a function call, applying the function FunAssign to the function name.person (an example of a specific function reference). This returns the oid of the function that sets a person's name, which is subsequently applied to a tuple of two elements, the oid of the person and the new name (a string o... CiteSeerX ACM Press 2009-04-13 2007-11-22 1994 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.39.1830 http //www.tu-chemnitz.de/~igrdb/docs/OpenODB/osql.ps.gz en 10.1.1.31.2534 10.1.1.28.4658 10.1.1.44.5947 10.1.1.39.199 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1483:Candidate high myopia loci on chromosomes 18p and 12q do not play a major role in susceptibility to common myopia:Grace Ibay Betty Doan Lauren Reider Debra Dana Melissa Schlifka Heping Hu Taura Holmes Jennifer O'Neill Robert Owens Elise Ciner Joan Bailey-Wilson Dwight Stambolian:2009-05-24 Background To determine whether previously reported loci predisposing to nonsyndromic high myopia show linkage to common myopia in pedigrees from two ethnic groups Ashkenazi Jewish and Amish. We hypothesized that these high myopia loci might exhibit allelic heterogeneity and be responsible for moderate /mild or common myopia. Methods Cycloplegic and manifest refraction were performed on 38 Jewish and 40 Amish families. Individuals with at least -1.00 D in each meridian of both eyes were classified as myopic. Genomic DNA was genotyped with 12 markers on chromosomes 12q21-23 and 18p11.3. Parametric and nonparametric linkage analyses were conducted to determine whether susceptibility alleles at these loci are important in families with less severe, clinical forms of myopia. Results There was no strong evidence of linkage of common myopia to these candidate regions all two-point and multipoint heterogeneity LOD scores were < 1.0 and non-parametric linkage p-values were > 0.01. However, one Amish family showed slight evidence of linkage (LOD>1.0) on 12q another 3 Amish families each gave LOD >1.0 on 18p and 3 Jewish families each gave LOD >1.0 on 12q. Conclusions Significant evidence of linkage (LOD> 3) of myopia was not found on chromosome 18p or 12q loci in these families. These results suggest that these loci do not play a major role in the causation of common myopia in our families studied. CiteSeerX 2009-05-24 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1483 http //www.biomedcentral.com/content/pdf/1471-2350-5-20.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1484:Winner-Take-All Network Utilising Pseudoinverse Reconstruction Subnets Demonstrates Robustness on the Handprinted Character Recognition Problem:J. Körmendy-rácz S. Szabó J. Lörincz G. Antal G. Kovács A. Lörincz:2009-05-24 Correspondence and offprint requests to J. Kormendy-Rácz Wittmeyer’s pseudoinverse iterative algorithm is formulated as a dynamic connectionist Data Compression and Reconstruction (DCR) network, and subnets of this type are supplemented by the winner-take-all paradigm. The winner is selected upon the goodness-of-fit of the input reconstruction. The network can be characterised as a competitive-cooperative-competitive architecture by virtue of the contrast enhancing properties of the pseudoinverse subnets. The network is capable of fast learning. The adopted learning method gives rise to increased sampling in the vicinity of dubious boundary regions that resembles the phenomenon of categorical perception. The generalising abilities of the scheme allow one to utilise single bit connection strengths. The network is robust against input noise and contrast levels, shows little sensitivity to imprecise connection strengths, and is promising for mixed VLSI implementation with on-chip learning properties. The features of the DCR network are demonstrated on the NIST database of handprinted characters. CiteSeerX Springer 2009-05-24 2007-11-19 1999 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1484 http //people.inf.elte.hu/lorincz/Files/publications/WTA_NCA.pdf en 10.1.1.134.6077 10.1.1.65.2144 10.1.1.54.7277 10.1.1.48.5282 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1485:DEEM a Tool for the Dependability Modeling and Evaluation:A. Bondavalli I. Mura S. Chiaradonna S. Poli F. Sandrini:2009-05-24 Processes Multiple-Phased Systems, whose operational life can be partitioned in a set of disjoint periods, called ¿phases¿ include several classes of systems such as Phased Mission Systems and Scheduled Maintenance Systems. Because of their deployment in critical applications, the dependability modeling and analysis of Multiple-Phased Systems is a task of primary relevance. However, the phased behavior makes the analysis of Multiple-Phased Systems extremely complex. This paper is centered on the description and application of DEEM, a dependability modeling and evaluation tool for Multiple Phased Systems. DEEM supports a powerful and efficient methodology for the analytical dependability modeling and evaluation of Multiple Phased Systems, based on Deterministic and Stochastic Petri Nets and on Markov Regenerative Processes. CiteSeerX IEEE Computer Society 2009-05-24 2007-11-19 2000 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1485 http //bonda.cnuce.cnr.it/Documentation/Papers/file-BMCFPS00-DSN2000-76.pdf en 10.1.1.47.2594 10.1.1.58.2039 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1486:Dynamical networks in function dynamics:Naoto Kataoka Kunihiko Kaneko:2009-05-24 Function dynamics Iterated map Self-reference Dynamical network As a first step toward realizing a dynamical system that evolves while spontaneously determining its own rule for time evolution, function dynamics (FD) is analyzed. FD consists of a functional equation with a self-referential term, given as a dynamical system of a one-dimensional map. Through the time evolution of this system, a dynamical graph (a network) emerges. This graph has three interesting properties (i) vertices appear as stable elements, (ii) the terminals of directed edges change in time, and (iii) some vertices determine the dynamics of edges, and edges determine the stability of the vertices, complementarily. Two aspects of FD are studied, the generation of a graph (network) structure and the dynamics of this graph (network) in the system. CiteSeerX 2009-05-24 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1486 http //chaos.c.u-tokyo.ac.jp/others/kataoka03.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1487:Simulation Prototyping:Ingolf Ståhl:2009-04-24 A simulation model is successful if it leads to policy action, i.e., if it is implemented. Studies show that for a model to be implemented, it must have good correspondence with the mental model of the system held by the user of the model. The user must feel confident that the simulation model corresponds to this mental model. An understanding of how the model works is required. Simulation models for implementation must be developed step by step, starting with a simple model, the simulation prototype. After this has been explained to the user, a more detailed model can be developed on the basis of feedback from the user. Software for simulation prototyping is discussed, e.g., with regard to the ease with which models and output can be explained and the speed with which small models can be written. CiteSeerX 2009-04-24 2007-11-19 2002 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1487 http //www.informs-cs.org/wsc02papers/073.pdf en 10.1.1.17.7647 10.1.1.134.3230 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1488:Hedging beyond duration and convexity:Jian Chen Michael C. Fu:2009-05-24 Hedging of fixed income securities remains one of the most challenging problems faced by financial institutions. The predominantly used measures of duration and convexity do not completely capture the interest rate risks borne by the holder of these securities. Using historical data for the entire yield curve, we perform a principal components analysis and find that the first four factors capture over 99.99% of the yield curve variation. Incorporating these factors into the pricing of arbitrary fixed income securities via Monte Carlo simulation, we derive perturbation analysis (PA) estimators for the price sensitivities with respect to the factors. Computational results for mortgage-backed securities (MBS) indicate that using these sensitivity measures in hedging provides far more protection against interest risk exposure than the conventional measures of duration and convexity. CiteSeerX 2009-05-24 2007-11-19 2002 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1488 http //www.informs-cs.org/wsc02papers/218.pdf en 10.1.1.113.9305 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1489:Designing for social friction Exploring ubiquitous computing as means of cultural interventions in urban space:Rune Huvendick Jensen Tau Ulv Lenskjold:2009-05-24 everyday life urban space Situationism As ubiquitous computing emerges in our lives and cities new opportunities for artistic and otherwise cultural interventions in urban space follow, but so far not much work has been done in order to articulate the socio-cultural significance of these new opportunities. This paper is part of a general attempt to develop a coherent understanding of the implications and potentials of ubiquitous computing in the context of everyday city life. On a more specific level the paper examines how the notion of social friction can be helpful in the development and analysis of ubiquitous computing in relation to art and design. Social friction is articulated as a critical position, which could be applied as a strategy for design. Our approach consists of a theoretical analysis and precedes concrete development and real-life experiments. As such the paper aims to establish a steppingstone from which to launch actual digital designs. We argue that by designing for the social friction, which is an intrinsic characteristic of everyday life, new forms of social and cultural potentials can be released. By means of discussing CityNova, a vision for a possible use of ubiquitous computing in urban space, we explore how this approach might lead to systems that create new ways of experiencing the city. CiteSeerX 2009-05-24 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1489 http //asp.cbs.dk/cade2004/proceedings/fullpapers/7_jensen_final_fullpaper.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1490:Optimal Combination of Number of Taps and Coefficient Bit-Width for Low Power FIR Filter Realization:João Portela Eduardo Costa José Monteiro:2009-05-24 This paper addresses the optimization of FIR filters for low power. We propose a search algorithm to find the combination of the number of taps and coe#cient bit-width that leads to the minimum number of total partial sums, and hence to the least power consumption. We show that the minimum number of taps does not necessarily lead to the least power consumption in fully parallel FIR filter architectures. This is particularly true if the reduction of the bit-width of the coe#cients is taken into account. We show that power is directly related to the total number of partial sums in the FIR filter, which in turn is determined by the number of bits set to 1 in the coe#cients. We have developed a search algorithm that achieves up to 36% less power consumption when compared to an implementation using the minimum number of taps. CiteSeerX 2009-05-24 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1490 http //tahoe.inesc-id.pt/pt/Ficheiros/1188.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1491:The Influence of a Course on Direct and Activating Instruction upon Student Teachers' Classroom Practice:Simon Veenman Eddie Denessen Ingrid Van Den Oord Ferdy Naafs:2009-05-24 Educational research has highlighted the importance of maintaining an orderly classroom environment and providing both clear and well-organized instruction tailored to the needs of individual students. Time spent on direct instruction and particularly the direct instruction of basic skills is associated with school learning (Wang, Haertel & Walberg, 1993). With the increased interest in constructivistic conceptions of learning and teaching today, educators with constructivistic orientations contend that various forms of knowledge and skills are applied more generally when constructed by the learners themselves as opposed to explicitly taught "knowledge is made, not acquired" (Phillips, 2000, p. 7). Such a view nevertheless often leads to an inclination to reject direct instruction by the teacher (see, for example, Brooks & Brooks, 1993). It should be noted, however, that many of the discussions of constructivistic orientations to learning and instruction are at the level of slogan and cliché (Duffy & Cunningham, 1996 Finn & Ravitch, 1996 Kozloff, 1998). In addition, the term constructivism has come to serve as an umbrella term for a diversity of views (Phillips, 1995 2000). CiteSeerX 2009-05-24 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1491 http //www.socsci.kun.nl/ped/owk/onderwijs/cursussen/io242/papers/earli2003_direct_instruction.pdf en 10.1.1.29.1993 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1492:Multiplanar Applications and Multimodal Networks:S. Keshav:2009-05-24 Future applications network architecture We believe that a broad class of future applications will span both the Internet and the telephone network because such multiplanar applications have several economic and architectural advantages over conventional ones. We also envision the close interlinking of the telephone network and the Internet to form a multimodal network. In this paper, we describe these applications and networks, outline their architecture, and present our experiences in constructing a prototype multiplanar application. CiteSeerX 2009-05-24 2007-11-19 1999 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1492 http //www.cs.cornell.edu/skeshav/papers/openarch99.pdf en 10.1.1.17.5614 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1493:Free-Riding and Whitewashing in Peer-to-Peer Systems:Michal Feldman Christos Papadimitriou John Chuang Ion Stoica:2009-05-24 We devise a simple model to study the phenomenon of free-riding and the effect of free identities on user behavior in peer-to-peer systems. At the heart of our model is a strategic user of a certain type, an intrinsic and private parameter that reflects the user's generosity. The user decides whether to contribute or free-ride based on how the current burden of contributing in the system compares to her type. We derive the emerging cooperation level in equilibrium and quantify the effect of providing free-riders with degraded service on the emerging cooperation. We find that this penalty mechanism is beneficial mostly when the "generosity level" of the society (i.e., the average type) is low. To quantify the social cost of free identities, we extend the model to account for dynamic scenarios with turnover (users joining and leaving) and with whitewashers users who strategically leave the system and re-join with a new identity. We find that the imposition of penalty on all legitimate newcomers incurs a significant social loss only under high turnover rates in conjunction with intermediate societal generosity levels. CiteSeerX 2009-05-24 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1493 http //www.dtc.umn.edu/weis2004/feldman.pdf en 10.1.1.18.6611 10.1.1.12.2253 10.1.1.12.6173 10.1.1.11.4846 10.1.1.127.6039 10.1.1.130.2771 10.1.1.30.6376 10.1.1.1.3415 10.1.1.42.1211 10.1.1.15.3743 10.1.1.11.6993 10.1.1.13.9315 10.1.1.12.9564 10.1.1.6.1046 10.1.1.101.3865 10.1.1.112.397 10.1.1.77.6233 10.1.1.76.9348 10.1.1.60.8693 10.1.1.1.3899 10.1.1.84.7582 10.1.1.116.3542 10.1.1.59.9845 10.1.1.119.8132 10.1.1.132.4448 10.1.1.143.2268 10.1.1.60.4396 10.1.1.87.4710 10.1.1.137.696 10.1.1.60.8756 10.1.1.97.1922 10.1.1.105.4069 10.1.1.76.3341 10.1.1.80.5437 10.1.1.93.1336 10.1.1.131.1666 10.1.1.102.2386 10.1.1.108.4119 10.1.1.108.5840 10.1.1.109.7946 10.1.1.66.8436 10.1.1.73.5608 10.1.1.73.5797 10.1.1.75.4852 10.1.1.83.2020 10.1.1.85.8881 10.1.1.87.5160 10.1.1.92.3905 10.1.1.92.5572 10.1.1.95.1197 10.1.1.95.6876 10.1.1.110.4873 10.1.1.111.8663 10.1.1.112.1051 10.1.1.113.5715 10.1.1.113.7254 10.1.1.115.348 10.1.1.120.8054 10.1.1.123.6169 10.1.1.128.7663 10.1.1.129.3166 10.1.1.134.3014 10.1.1.135.98 10.1.1.59.9602 10.1.1.129.9940 10.1.1.141.1032 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1494:A Simple Algorithm for Complete Motion Planning of Translating Polyhedral Robots:Gokul Varadhan Shankar Krishnan T. V. N. Sriram Dinesh Manocha:2009-05-24 We present an algorithm for complete path planning for translating polyhedral robots in 3D. Instead of exactly computing an explicit representation of the free space, we compute a roadmap that captures its connectivity. This representation encodes the complete connectivity of free space and allows us to perform exact path planning. We construct the roadmap by computing deterministic samples in free space that lie on an adaptive volumetric grid. Our algorithm is simple to implement and uses two tests a complex cell test and a star-shaped test. These tests can be efficiently performed on polyhedral objects using max-norm distance computation and linear programming. The complexity of our algorithm varies as a function of the size of narrow passages in the configuration space. We demonstrate the performance of our algorithm on environments with very small narrow passages or no collision-free paths. CiteSeerX Sage Publications, Inc. 2009-05-24 2007-11-19 2005 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1494 http //www.cs.unc.edu/~varadhan/papers/motion.pdf en 10.1.1.19.3462 10.1.1.20.8539 10.1.1.52.7808 10.1.1.31.1678 10.1.1.34.1071 10.1.1.88.5053 10.1.1.1.3224 10.1.1.66.385 10.1.1.58.2466 10.1.1.76.8798 10.1.1.88.305 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1495:Modeling Ship Arrivals in Ports:Eelco van Asperen Rommert Dekker Mark Polman Henk de Swaan Arons:2009-04-29 The model used in this report focuses on the analysis of ship waiting statistics and stock fluctuations under different arrival processes. However, the basic outline is the same central to both models are a jetty and accompanying tankfarm facilities belonging to a new chemical plant in the Port of Rotterdam. Both the supply of raw materials and the export of finished products occur through ships loading and unloading at the jetty. Since disruptions in the plants production process are very expensive, buffer stock is needed to allow for variations in ship arrivals and overseas exports through large ships. Ports provide jetty facilities for ships to load and unload their cargo. Since ship delays are costly, terminal operators attempt to minimize their number and duration. Here, simulation has proved to be a very suitable tool. However, in port simulation models, the impact of the arrival process of ships on the model outcomes tends to be underestimated. This article considers three arrival processes stock-controlled, equidistant per ship type, and Poisson. We assess how their deployment in a port simulation model, based on data from a real case study, affects the efficiency of the loading and unloading process. Poisson, which is the chosen arrival process in many client-oriented simulations, actually performs worst in terms of both ship delays and required storage capacity. Stock-controlled arrivals perform best with regard to ship delays and required storage capacity. In the case study two types of arrival processes were considered. The first type are the so-called stock-controlled arrivals, i.e., ship arrivals are scheduled in such a way, that a base stock level is maintained in the tanks. Given a base stock level of a raw material or ... CiteSeerX 2009-04-29 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1495 http //www.informs-cs.org/wsc03papers/222.pdf en 10.1.1.17.1837 10.1.1.120.9692 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1496:In Proceedings of the16th IFAC Symposium on Automatic Control in Aerospace, Elsevier Science Ltd, Oxford,:Uk Cognitive Tools Donald Sofge Dennis Perzanowski Marjorie Skubic Magdalena Bugajska J. Gregory Trafton Nicholas Cassimatis Derek Brock William Adams Alan Schultz:2009-04-19 Cognitive Systems Co-operative Control Speech Recognition Natural Language Human-Machine Interface Autonomous Mobile Robots The effective use of humanoid robots in space will depend upon the efficacy of interaction between humans and robots. The key to achieving this interaction is to provide the robot with sufficient skills for natural communication with humans so that humans can interact with the robot almost as though it were another human. This requires that a number of basic capabilities be incorporated into the robot, including voice recognition, natural language, and cognitive tools on-board the robot to facilitate interaction between humans and robots through use of common representations and shared humanlike behaviors. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1496 http //www.aic.nrl.navy.mil/~dennisp/ifac.aca.2004.pdf en 10.1.1.13.8248 10.1.1.101.9124 10.1.1.58.5211 10.1.1.99.4007 10.1.1.58.4797 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1497:Exploiting Computer Automation to Improve the Interview Process and Increase Survey Cooperation:Jeffrey C. Moore Jeffrey C. Moore:2009-04-19 burden conversational norms efficiency flow nonresponse/attrition questionnaire design respondent-friendly I. In Couper (2002) outlines the "challenges and opportunities" of recent and stillemerging technological developments on the conduct of survey research. This paper focuses on one such development -- the use of computer-assisted survey instruments in place of paper-andpencil questionnaires -- and it focuses on one particular opportunity which this development presents the ability to improve efficiency, "flow," and naturalness, and in general make the interview experience a more pleasant one for all participants, while still controlling question wording and sequencing. Moral arguments can be raised in defense of such efforts the potential for important practical benefits, including improved survey cooperation, lends more mundane but perhaps more potent support. Although the research literature is surprisingly scant, there is some evidence that improved instrument design can reduce nonresponse. A recent effort by the U.S. Census Bureau to redesign the core instrument for the Survey of Income and Program Participation (SIPP) offers additional support. Motivated in large measure by evidence of increasing unit nonresponse and attrition, the primary goal of the SIPP redesign effort was to improve the interview process, and in particular to seek ways to avoid violations of conversational norms (e.g., Grice, 1975). A great many of the SIPP interview process improvements would not have been feasible without the computerization of the survey instrument. This paper briefly summarizes many of the technology-based changes implemented in the SIPP instrument, and briefly describes a set of field experiments used to develop and refine the new procedures and to evaluate their success in achieving SIPP's redesign goals. Keywords burden, conversational norms, efficiency, flow, nonresponse/... CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1497 http //www.census.gov/srd/papers/pdf/rsm2004-01.pdf en 10.1.1.131.9305 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1499:Performance and Design Evaluation:Of The Raid-Ii Peter M. Chen Edward K. Lee Ann L. Drapeau Ethan L. Miller Srinivasan Seshan Ken Shirriff David A. Patterson Y H. Katz:2009-04-19 RAID-II is a high-bandwidth, networkattached storage server designed and implemented at the University of California at Berkeley. In this paper, we measure the performance of RAID-II and evaluate various architectural decisions made during the design process. We first measure the end-to-end performance of the system to be approximately 20 MB/s for both disk array reads and writes. We then perform a bottleneck analysis by examining the performance of each individual subsystem and conclude that the disk subsystem limits performance. By adding a custom interconnect board with a high-speed memory and bus system and parity engine, we are able to achieve a performance speedup of 8 to 15 over a comparative system using only off-theshelf hardware. CiteSeerX 2009-04-19 2007-11-19 1994 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1499 http //ssrc.cse.ucsc.edu/~elm/Papers/ipps93.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1500:The Marinov Motor, Notional Induction:Without Magnetic Field J. P Wesley:2009-04-19 Introduction The force of induction F on a charge q is given by FA=-qtcdd,(1) where A is the usual magnetic vector potential defined by A r rJr rrc - s ,(2) where J is the current density. Slowly varying effects are assumed here, where the basic theory may be given as a true relativity theory, involving the separation distance between two charges and its time derivatives. This force of induction, Eq. (1), yields Faraday's law of electromagnetic induction for the special case of an electromotive force (emf) around a fixed closed loop. In particular, emf d d d d d ' & ( 0 ) =- =- =- s s s sF q s tc tc an tc an A B ,(3) where F is the magnetic flux through the loop. It is observed in the laboratory that an emf is also induced when =A tc 0 , and the magnetic flux through the loop is changed by moving the loop, so Faraday's law becomes emf = - .-(4) Francisco Mller's (1987) experiments show that induction occurs locally and that the force CiteSeerX 2009-04-19 2007-11-19 2001 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1500 http //redshift.vif.com/JournalFiles/Pre2001/V05NO3PDF/v05n3wes.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1501:Biometric Verification Based on Grip-Pattern Recognition:Raymond Veldhuis Asker Bazen Joost Kauffman Pieter Hartel:2009-04-19 Biometric verification likelihood ratio smart gun grip-pattern recognition This paper describes the design, implementation and evaluation of a user-verification system for a smart gun, which is based on grip-pattern recognition. An existing pressure sensor consisting of an array of 44 44 piezoresistive elements is used to measure the grip pattern. An interface has been developed to acquire pressure images from the sensor. The values of the pixels in the pressure-pattern images are used as inputs for a verification algorithm, which is currently implemented in software on a PC. The verification algorithm is based on a likelihoodratio classifier for Gaussian probability densities. First results indicate that it is feasible to use grip-pattern recognition for biometric verification. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1501 http //www.ub.utwente.nl/webdocs/ctit/1/000000f5.pdf en 10.1.1.9.5838 10.1.1.101.5555 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1502:Relativistic Doppler Effect and the Principle of Relativity:W. Engelhardt:2009-04-19 Relativity Doppler Effect Aberration a private address Fasaneriestrasse 8 D-80636 München The frequency shifts predicted by the `relativistic' Doppler e#ect are derived in the photon picture of light. It turns out that, in general, the results do not depend exclusively on the relative velocity between observer and light source. CiteSeerX 2009-04-19 2007-11-19 2003 text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1502 http //redshift.vif.com/JournalFiles/V10NO4PDF/V10N4ENG.PDF en 10.1.1.58.3335 10.1.1.140.9931 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1503:Differential Association Rule Mining for the Study of Protein-Protein Interaction Networks:Christopher Besemann Anne Denton Ajay Yekkirala Ron Hutchison Marc Anderson:2009-04-19 Protein-protein interactions are of great interest to biologists. A variety of high-throughput techniques have been devised, each of which leads to a separate definition of an interaction network. The concept of differential association rule mining is introduced to study the annotations of proteins in the context of one or more interaction networks. Differences among items across edges of a network are explicitly targeted. As a second step we identify differences between networks that are separately defined on the same set of nodes. The technique of differential association rule mining is applied to the comparison of protein annotations within an interaction network and between different interaction networks. In both cases we were able to find rules that explain known properties of protein interaction networks as well as rules that show promise for advanced study. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1503 http //www.cs.ndsu.nodak.edu/%7Eadenton/publications/BIOKDD040712a.pdf en 10.1.1.40.6984 10.1.1.40.9892 10.1.1.12.6495 10.1.1.13.6963 10.1.1.113.6042 10.1.1.36.2485 10.1.1.10.7611 10.1.1.18.8344 10.1.1.12.7211 10.1.1.32.7066 10.1.1.12.3538 10.1.1.56.7889 10.1.1.12.8995 10.1.1.11.2425 10.1.1.27.9671 10.1.1.21.3747 10.1.1.1.5073 10.1.1.66.4476 10.1.1.67.617 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1504:Combining Adaptive and Deterministic Routing Evaluation of a Hybrid Router:Dianne Kumar And Dianne Kumar Walid A. Najjar:2009-04-19 This paper reports on the implementation and evaluation ofahybrid routing scheme that combines the advantages of deterministic and adaptive routing. An expanded version of this paper can be found in #1# In the deterministic, or dimension-ordered, routing algorithm a message is routed along decreasing dimensions with a dimension decrease occurring only when zero hops remain in all higher dimensions. Virtual channels #VCs# are included in the router to avoid deadlock #6#. Deterministic routing can su#er from congestion since only a single path between source and destination can be used CiteSeerX Springer 2009-04-19 2007-11-19 1999 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1504 http //www.cs.ucr.edu/%7Enajjar/papers/canpc99.pdf en 10.1.1.117.7403 10.1.1.80.5595 10.1.1.44.6553 10.1.1.53.4729 10.1.1.22.6577 10.1.1.24.4663 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1505:Towards a More Complete Model of Role:Adrian Baldwin Cheh Goh Cheh Goh:2009-04-19 In order to manage the use of roles for the purpose of access control, it is important to look at attributes beyond the consideration of capability assignment. Fundamentally, a generic attribute description using a constraint-based approach will allow many of the important aspects of role, such as scope, activation and deactivation, to be included. Furthermore, the commonly accepted concept of role hierarchy is challenged from the point of view of subsidiarity in real organisations, with the suggestion that role hierarchy has limited usefulness that does not seem to apply widely. CiteSeerX 2009-04-19 2007-11-19 1998 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1505 http //www.cs.kun.nl/is/Library/./Data/1998/Goh/Towards/1998-Goh-Towards.pdf en 10.1.1.47.3914 10.1.1.26.2311 10.1.1.45.1616 10.1.1.18.5632 10.1.1.103.8527 10.1.1.11.1495 10.1.1.21.1056 10.1.1.18.6290 10.1.1.88.1656 10.1.1.130.4738 10.1.1.57.6574 10.1.1.10.7333 10.1.1.21.3059 10.1.1.76.1573 10.1.1.85.2890 10.1.1.14.6686 10.1.1.59.2363 10.1.1.93.2667 10.1.1.100.3563 10.1.1.105.1864 10.1.1.88.5400 10.1.1.88.7033 10.1.1.90.1790 10.1.1.90.2382 10.1.1.90.3968 10.1.1.91.1743 10.1.1.94.4357 10.1.1.124.6837 10.1.1.130.3601 10.1.1.18.2266 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1506:Target Tracking with Distributed Sensors The Focus of Attention Problem:V. Isler Sanjeev Khanna J. Spletzer C. J. Taylor Volkan Isler A Camillo J. Taylor A:2009-04-19 In this paper, we consider the problem of assigning sensors to track targets so as to minimize the expected error in the resulting estimation for target locations. Specifically, we are interested in how disjoint pairs of bearing or range sensors can be best assigned to targets in order to minimize the expected error in the estimates. We refer to this as the focus of attention (FOA) problem. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1506 http //www.cis.upenn.edu/%7Eisleri/research/papers/foaCVIU.pdf en 10.1.1.36.8357 10.1.1.18.8488 10.1.1.19.8778 10.1.1.14.3443 10.1.1.58.6802 10.1.1.49.9816 10.1.1.84.4195 10.1.1.144.2859 10.1.1.71.7438 10.1.1.78.7656 10.1.1.126.3811 10.1.1.130.1224 10.1.1.132.8302 10.1.1.58.5357 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1507:Creating an Integrated Computer Assisted Learning and Assessment Experience in the School of European Languages and Cultures at the University of Edinburgh:John Hobbs John Hobbs Marcus Duran Marcus Duran Eh Jx:2009-04-19 In the field of Computer-Aided anything, acronyms abound. They are, after all, useful tools. However, there is a risk that we become constrained by them and, as a result, fail to see beyond them. CiteSeerX 2009-04-19 2007-11-19 2002 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1507 http //www.lboro.ac.uk/service/ltd/flicaa/conf2002/pdfs/hobbs_jm.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1508:Web Structure Analysis for Information Mining:Lakshmi Vijjappu Ah-Hwee Ah-hwee Tan Chew-lim Tan:2009-04-19 Our approach to extracting information from the web analyzes the structural content of web pages through exploiting the latent information given by HTML tags. For each specific extraction task, an object model is created consisting of the salient fields to be extracted and the corresponding extraction rules based on a library of HTML parsing functions. We derive extraction rules for both single-slot and multiple-slot extraction tasks which we illustrate through two sample domains. CiteSeerX 2009-04-19 2007-11-19 2001 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1508 http //www.csc.liv.ac.uk/~wda2001/Papers/18_lakshmi_wda2001.pdf en 10.1.1.46.6008 10.1.1.32.8501 10.1.1.51.8159 10.1.1.54.3298 10.1.1.20.8120 10.1.1.36.6286 10.1.1.47.6312 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1509:Bayesian Inference for Transductive Learning of Kernel Matrix Using the Tanner-Wong Data Augmentation Algorithm:Zhihua Zhang Dit-Yan Yeung James T. Kwok:2009-04-19 In kernel methods, an interesting recent development seeks to learn a good kernel from empirical data automatically. In this paper, by regarding the transductive learning of the kernel matrix as a missing data problem, we propose a Bayesian hierarchical model for the problem and devise the Tanner-Wong data augmentation algorithm for making inference on the model. The Tanner-Wong algorithm is closely related to Gibbs sampling, and it also bears a strong resemblance to the expectation-maximization (EM) algorithm. For an e#cient implementation, we propose a simplified Bayesian hierarchical model and the corresponding TannerWong algorithm. We express the relationship between the kernel on the input space and the kernel on the output space as a symmetric-definite generalized eigenproblem. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1509 http //www.cs.ust.hk/~zhzhang/papers/icml04tw.pdf en 10.1.1.133.4884 10.1.1.16.1922 10.1.1.23.6757 10.1.1.37.8662 10.1.1.72.509 10.1.1.71.5318 10.1.1.94.7695 10.1.1.119.4637 10.1.1.102.9977 10.1.1.73.7176 10.1.1.77.7873 10.1.1.112.9663 10.1.1.116.111 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1510:Genetic Improvisation Model - a framework for real-time performance environments:Paul Nemirovsky Richard Watson:2009-04-19 This paper presents the current state in an ongoing development of the Genetic Improvisation Model (GIM) a framework for the design of real-time improvisational systems. The aesthetic rationale for the model is presented, followed by a discussion of its general principles. A discussion of the Emonic Environment, a networked system for audiovisual creation built on GIM's principles, follows CiteSeerX 2009-04-19 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1510 http //www.media.mit.edu/~pauln/research/emonic/docs/evomusart2003.pdf en 10.1.1.46.6615 10.1.1.58.3628 10.1.1.1.4031 10.1.1.57.9915 10.1.1.59.1804 10.1.1.69.8249 10.1.1.90.5797 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1511:Mean-Variance Hedging under Additional:Market Information Frank Frank Thierbach Frank Thierbach:2009-04-19 JEL classification G11 G12 In this paper we analyse the mean-variance hedging approach in an incomplete market under the assumption of additional market information, which is represented by a given, finite set of observed prices of non-attainable contingent claims. Due to no-arbitrage arguments, our set of investment opportunities increases and the set of possible equivalent martingale measures shrinks. Therefore, we obtain a modified mean-variance hedging problem, which takes into account the observed additional market information. Solving this by means of the techniques developed by Gourieroux, Laurent and Pham (1998), we obtain an explicit description of the optimal hedging strategy and an admissible, constrained variance-optimal signed martingale measure, that generates both the approximation price and the observed option prices. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1511 http //www.finasto.uni-bonn.de/papers/mvhedging.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1512:Sis-Prueba:Tool For Rapid Pedro Concejero Cerezo Juan José Rodríguez Soler Daniel Tapias Merino Telefónica Móviles España:2009-04-19 SIS PRUEBA is a software tool to integrate usability and user-centred design principles in the development process of services within Telefnica Mviles Espaa (TME), the largest mobile telecommunications operator in Spain. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1512 http //sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS//Vol-103/concejero-et-al.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1513:Reducing the Computational Load of Energy Evaluations for:Protein Folding Eunice Eunice E. Santos Eugene Santos:2009-04-19 protein folding triangular lattice HP energy model caching reuse evolutionary Predicting the native conformation using computational protein models requires a large number of energy evaluations even with simplified models such as hydrophobic-hydrophilic (HP) models. Clearly, energy evaluations constitute a significant portion of computational time. We hypothesize that given the structured nature of algorithms that search for candidate conformations such as stochastic methods, energy evaluation computations can be cached and reused, thus saving computational time and e#ort. In this paper, we present a caching approach and apply it to 2D triangular HP lattice model. We provide theoretical analysis and prediction of the expected savings from caching as applied this model. We conduct experiments using a sophisticated evolutionary algorithm that contains elements of local search, memetic algorithms, diversity replacement, etc. in order to verify our hypothesis and demonstrate a significant level 1 of savings in computational e#ort and time that caching can provide. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1513 http //excalibur.brc.uconn.edu/Papers/Conference/bibe04-submit.pdf en 10.1.1.53.7409 10.1.1.46.770 10.1.1.10.5827 10.1.1.54.8912 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1514:Human-Robot Interactions in Active Sensor Networks:Alexei Makarenko Tobias Kaupp Ben Grocholsky Hugh Durrant-whyte:2009-04-19 This paper considers the modes of interaction between one or several human operators and an active sensor network -- a fully decentralized network of sensors some or all of which have actuators and are in that sense active. The primary goal of this study is to investigate the conditions under which the human involvement will not jeopardize scalability of the overall system. Two aspects of human-robot interaction are considered the ways in which the global view of the system may be conveyed to the operators, and how the operators may influence the behavior of the system during the course of its operation. The results of analysis favor peer-topeer information-based interactions between the operators and the network whereby the humans act as extended sensors and communication nodes of the network itself. Experiments on an indoor active sensor network are described. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1514 http //www.grasp.upenn.edu/~bpg/makarenko03_hum_rob_in_asn.pdf en 10.1.1.110.6935 10.1.1.18.2772 10.1.1.70.4661 10.1.1.42.1968 10.1.1.14.7286 10.1.1.123.9637 10.1.1.58.5090 10.1.1.11.6081 10.1.1.58.5988 10.1.1.4.8345 10.1.1.58.4968 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1515:Disjoint Sum of Product Minimization by Evolutionary Algorithms:Nicole Drechsler Mario Hilgemeier Görschwin Fey Rolf Drechsler:2009-04-19 Recently, an approach has been presented to minimize Disjoint Sumof -Products (DSOPs) based on Binary Decision Diagrams (BDDs). Due to the symbolic representation of cubes for large problem instances, the method is orders of magnitude faster than previous enumerative techniques. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1515 http //www.informatik.uni-bremen.de/agra/doc/work/evohot04.pdf en 10.1.1.1.5124 10.1.1.30.5588 10.1.1.29.2722 10.1.1.12.7617 10.1.1.125.1065 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1516:Acta Cryst. (2004). B60, 481489 DOI 10.1107/S0108768104013564 481 Acta Crystallographica Section B:Structural Science Issn Valeria Ferretti A Paola Gilli A Pier Andrea Borea B A Centro Di:2009-04-19 this paper are available from the IUCr electronic archives (Reference NA5019). Services for accessing these data are described at the back of the journal CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1516 http //journals.iucr.org/b/issues/2004/04/00/na5019/na5019.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1517:The Lattice-Boltzmann Method for Simulating Gaseous Phenomena:Xiaoming Wei Student Member Wei Li Klaus Mueller Arie E. Kaufman:2009-04-19 We present a physically-based, yet fast and simple method to simulate gaseous phenomena. In our approach, the incompressible Navier-Stokes (NS) equations governing fluid motion have been modeled in a novel way to achieve a realistic animation. We introduce the Lattice Boltzmann Model (LBM), which simulates the microscopic movement of fluid particles by linear and local rules on a grid of cells so that the macroscopic averaged properties obey the desired NS equations. The LBM is defined on a 2D or 3D discrete lattice, which is used to solve fluid animation based on different boundary conditions. The LBM simulation generates, in real-time, an accurate velocity field and can incorporate an optional temperature field to account for the buoyancy force of hot gas. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1517 http //www.cs.sunysb.edu/%7Emueller/papers/smokeTVCG04.pdf en 10.1.1.15.9203 10.1.1.25.5737 10.1.1.17.1966 10.1.1.131.9652 10.1.1.38.7021 10.1.1.23.4545 10.1.1.84.7193 10.1.1.23.5064 10.1.1.6.4632 10.1.1.15.6033 10.1.1.61.3740 10.1.1.104.6696 10.1.1.136.5635 10.1.1.1.2520 10.1.1.72.4834 10.1.1.83.4851 10.1.1.95.5556 10.1.1.110.7589 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1518:Peer-to-Peer Human-Robot Interaction for Space Exploration:Terrence Fong And Terrence Fong Illah Nourbakhsh:2009-04-19 NASA has embarked on a long-term program to develop human-robot systems for sustained, affordable space exploration. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1518 http //www.ri.cmu.edu/pub_files/pub4/fong_terrence_w_2004_1/fong_terrence_w_2004_1.pdf en 10.1.1.36.6789 10.1.1.2.8285 10.1.1.16.516 10.1.1.6.118 10.1.1.4.2304 10.1.1.99.2775 10.1.1.61.1527 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1519:Survey Taxonomy of Packet Classification Techniques:David Taylor Sa Da Prot Dp Flowid Pt:2009-04-19 Packet classification is an enabling function for a variety of Internet applications including Quality of Service, security, monitoring, and multimedia communications. In order to classify a packet as belonging to a particular flow or set of flows, network nodes must perform a search over a set of filters using multiple fields of the packet as the search key. In general, there have been two major threads of research addressing packet classification algorithmic and architectural. A few pioneering groups of researchers posed the problem, provided complexity bounds, and offered a collection of algorithmic solutions. Subsequently, the design space has been vigorously explored by many offering new algorithms and improvements upon existing algorithms. Given the inability of early algorithms to meet performance constraints imposed by high speed links, researchers in industry and academia devised architectural solutions to the problem. This thread of research produced the most widely-used packet classification device technology, Ternary Content Addressable Memory (TCAM). New architectural research combines intelligent algorithms and novel architectures to eliminate many of the unfavorable characteristics of current TCAMs. We observe that the community appears to be converging on a combined algorithmic and architectural approach to the problem. Using a taxonomy based on the high-level approach to the problem and a minimal set of running examples, we provide a survey of the seminal and recent solutions to the problem. It is our hope to foster a deeper understanding of the various packet classification techniques while providing a useful framework for discerning relationships and distinctions. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1519 http //www.arl.wustl.edu/Publications/2000-04/wucse-2004-24.pdf en 10.1.1.137.3147 10.1.1.121.1309 10.1.1.13.9939 10.1.1.39.697 10.1.1.24.3532 10.1.1.29.4777 10.1.1.12.3539 10.1.1.112.1058 10.1.1.12.5688 10.1.1.41.4744 10.1.1.41.9413 10.1.1.3.5167 10.1.1.32.9914 10.1.1.105.3710 10.1.1.58.2312 10.1.1.58.5079 10.1.1.13.3703 10.1.1.28.9719 10.1.1.135.9578 10.1.1.6.107 10.1.1.121.8780 10.1.1.133.2753 10.1.1.78.9584 10.1.1.97.442 10.1.1.86.5588 10.1.1.104.4868 10.1.1.108.4619 10.1.1.65.3134 10.1.1.76.3971 10.1.1.77.8580 10.1.1.83.3090 10.1.1.85.8699 10.1.1.89.6993 10.1.1.89.7016 10.1.1.91.9738 10.1.1.94.8479 10.1.1.97.6162 10.1.1.72.6318 10.1.1.126.1241 10.1.1.61.5847 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1520:Friscof Risco:Framework Of Information Eckhard D. Falkenberg Wolfgang Hesse Paul Lindgreen Björn E. Nilsson J. L. Han Oei Colette Rolland Ronald K. Stamper Frans J. M. Van Assche Alexander A. Verrijn-stuart Klaus Voss:2009-04-19 this report, Paul Lindgreen as secretary and as editor of the interim report [Lin90a] CiteSeerX 2009-04-19 2007-11-19 1998 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1520 http //www.cs.kun.nl/is/Library/./Data/1998/Lindgreen/FRISCO/1998-Lindgreen-FRISCO.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1521:Average performance of quasi Monte Carlo methods for global optimization:Hisham A. Al-Mharmah:2009-04-24 In this paper we compare the average performance of one class of low-discrepancy quasi-Monte Carlo sequences for global optimization. Weiner measure is assumed as the probability prior on all optimized functions. We show how to construct van der Corput sequences and we prove their consistency. Numerical experimentation shows that the van der Corput sequence in base 2 has a better average performance. CiteSeerX 2009-04-24 2007-11-19 1998 text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1521 http //www.informs-cs.org/wsc98papers/083.PDF en 10.1.1.22.679 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1522:The Virtual Ms Lyceum A Consortium For Modeling And Simulation Technology:D. J. Medeiros E. F. Watson J. S. Carson M. S. Manivannan Steven D. Farr Alex F. Sisti:2009-04-19 This paper addresses the opportunity to put into place a virtual consortium for modeling and simulation. While periodic conferences such as the Winter Simulation Conference are tremendously vital to the continued growth of modeling and simulation research, they do not offer the day-to-day technical exchange that can now be made possible with matured collaborative technologies. CiteSeerX 2009-04-19 2007-11-19 1998 text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1522 http //www.informs-cs.org/wsc98papers/228.PDF en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1523:Classification And Regression Trees, Cart - A User Manual For Identifying Indicators Of Vulnerability To Famine And Chronic Food Insecurity:Auser Manualfor Yisehac Yohannes Patrick Webb:2009-04-19 FAMINE AND CHRONIC FOOD INSECURITY YISE HAC YO HAN NES ue, the Netherlands, Norway, the Philippines, the Rockefeller Foundation, the Rural Industries Research and Development Corporation (Australia), South Africa, the Southern African Development Bank, Spain, Sweden, Switzerland, the United Kingdom, the United Nations Children's Fund, the United States, and the World Bank. CLASSIFIC ATION AND REGRESSION TREES, CART^TM A USER MANUAL FOR IDENTIFYING INDIC A TORS OF VULNERABILITY TO FAMINE AND CHRONIC FOOD INSECURITY YISEHAC YOHANNES PATRICK WEBB MICROCOMPUTERS IN POLICY RESEARCH INTERNATIONAL FOOD POLICY RESEARCH INSTITUTE CART is a registered trademark of California Statistical Software, Inc. Copyright 1999 by the International Food Policy Research Institute 2033 K Street, N.W. Washington, D.C. 20006-1002 U.S.A. Library of Congress Cataloging-in-Publication Data available Yohannes, Yisehac Classification and Regression Trees, Cart^TM A User Manual for Identifying Indicators of Vulnerability to Famine and Chronic Food Insecurity / Yise CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1523 http //www.ifpri.org/pubs/microcom/micro3.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1524:An Approach for Locating Segmentation Points of Handwritten Digit Strings:Using Neural Network:2009-04-19 An approach for segmentation of handwritten touching numeral strings is presented in this paper. A neural network has been designed to deal with various types of touching observed frequently in numeral strings. A numeral string image is split into a number of line segments while stroke extraction is being performed and the segments are represented with straight lines. Four types of primitive are defined based on the lines and used for representing the numeral string in more abstractive way and extracting clues on touching information from the string. Potential segmentation points are located using the neural network by active interpretation of the features collected from the primitives. Also, the run-length coding scheme is employed for efficient representation and manipulation of images. On a test set collected from real mail pieces, the segmentation accuracy of 89.1% was achieved, in image level, in a preliminary experiment. 1. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1524 http //www.csc.liv.ac.uk/%7Eprima/ICDAR2003/Papers/0025_697_kim_g.pdf en 10.1.1.35.2218 10.1.1.44.7527 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1525:An Overview of JML Tools and Applications:Lilian Burdy Yoonsik Cheon David Cok Michael D. Ernst Joe Kiniry Gary T. Leavens K. Rustan M. Leino Erik Poll:2009-04-19 formal specification Java runtime assertion checking static checking The Java Modeling Language (JML) can be used to specify the detailed design of Java classes and interfaces by adding annotations to Java source files. The aim of JML is to provide a specification language that is easy to use for Java programmers and that is supported by a wide range of tools for specification type-checking, runtime debugging, static analysis, and verification. CiteSeerX 2009-04-19 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1525 http //www.gemplus.com/smart/r_d/publications/pdf/BCC_03jm.pdf en 10.1.1.137.4260 10.1.1.39.1223 10.1.1.36.9943 10.1.1.29.6183 10.1.1.70.1745 10.1.1.11.2133 10.1.1.17.3839 10.1.1.24.2555 10.1.1.34.8403 10.1.1.52.3873 10.1.1.10.4654 10.1.1.1.6063 10.1.1.16.1895 10.1.1.117.5270 10.1.1.132.7016 10.1.1.1.6522 10.1.1.2.5030 10.1.1.16.800 10.1.1.10.547 10.1.1.13.5473 10.1.1.15.9976 10.1.1.120.795 10.1.1.26.1982 10.1.1.17.1067 10.1.1.2.1207 10.1.1.25.9636 10.1.1.5.8315 10.1.1.1.9075 10.1.1.39.2890 10.1.1.128.9986 10.1.1.1.3304 10.1.1.10.8374 10.1.1.12.442 10.1.1.57.6725 10.1.1.29.9417 10.1.1.5.9229 10.1.1.26.3231 10.1.1.20.6902 10.1.1.17.9620 10.1.1.72.3429 10.1.1.11.8032 10.1.1.11.1854 10.1.1.19.7736 10.1.1.59.4118 10.1.1.11.2494 10.1.1.13.4051 10.1.1.16.1105 10.1.1.19.2169 10.1.1.91.8343 10.1.1.85.6366 10.1.1.103.1977 10.1.1.19.6416 10.1.1.81.714 10.1.1.4.6241 10.1.1.11.2133 10.1.1.10.4654 10.1.1.115.5693 10.1.1.7.4458 10.1.1.5.8315 10.1.1.61.5186 10.1.1.73.5717 10.1.1.57.6725 10.1.1.11.1838 10.1.1.142.2782 10.1.1.137.316 10.1.1.129.1678 10.1.1.126.8052 10.1.1.131.2147 10.1.1.94.1164 10.1.1.83.3189 10.1.1.11.2494 10.1.1.1.6054 10.1.1.142.6301 10.1.1.86.6061 10.1.1.94.7598 10.1.1.122.2974 10.1.1.10.187 10.1.1.126.4427 10.1.1.128.5240 10.1.1.67.8455 10.1.1.131.6019 10.1.1.10.3303 10.1.1.102.4611 10.1.1.59.4566 10.1.1.94.5189 10.1.1.7.2188 10.1.1.143.5200 10.1.1.100.3930 10.1.1.111.4391 10.1.1.68.2636 10.1.1.79.7758 10.1.1.98.2224 10.1.1.101.9229 10.1.1.59.4403 10.1.1.66.8607 10.1.1.71.6156 10.1.1.71.8962 10.1.1.84.1342 10.1.1.89.8541 10.1.1.118.8269 10.1.1.107.6026 10.1.1.4.6869 10.1.1.63.4449 10.1.1.64.1790 10.1.1.80.5390 10.1.1.90.757 10.1.1.98.8827 10.1.1.127.2428 10.1.1.127.6051 10.1.1.138.5310 10.1.1.2.5369 10.1.1.4.3348 10.1.1.61.7073 10.1.1.74.7926 10.1.1.76.6474 10.1.1.91.9916 10.1.1.97.8034 10.1.1.98.2120 10.1.1.110.687 10.1.1.124.6567 10.1.1.142.3205 10.1.1.100.4344 10.1.1.100.8097 10.1.1.100.9852 10.1.1.101.6814 10.1.1.102.5622 10.1.1.104.861 10.1.1.105.5824 10.1.1.107.5138 10.1.1.59.6327 10.1.1.63.562 10.1.1.63.5758 10.1.1.63.7483 10.1.1.64.6885 10.1.1.64.9362 10.1.1.62.3908 10.1.1.119.5350 10.1.1.67.1499 10.1.1.67.5887 10.1.1.67.8572 10.1.1.69.2171 10.1.1.70.1165 10.1.1.70.6538 10.1.1.71.1298 10.1.1.71.698 10.1.1.71.769 10.1.1.71.962 10.1.1.73.1567 10.1.1.74.4934 10.1.1.74.7928 10.1.1.122.2332 10.1.1.76.3519 10.1.1.77.1867 10.1.1.77.2580 10.1.1.77.4182 10.1.1.125.1768 10.1.1.78.7630 10.1.1.62.2614 10.1.1.81.8303 10.1.1.84.3469 10.1.1.84.3675 10.1.1.84.6502 10.1.1.85.2476 10.1.1.85.4887 10.1.1.87.5805 10.1.1.87.9527 10.1.1.89.2433 10.1.1.89.3328 10.1.1.90.1517 10.1.1.90.2534 10.1.1.91.1298 10.1.1.92.1775 10.1.1.93.3743 10.1.1.94.2013 10.1.1.94.7198 10.1.1.95.1658 10.1.1.95.2688 10.1.1.95.3548 10.1.1.97.5430 10.1.1.98.6399 10.1.1.99.8561 10.1.1.111.4564 10.1.1.112.7809 10.1.1.113.6155 10.1.1.113.7814 10.1.1.115.3770 10.1.1.116.5172 10.1.1.117.7484 10.1.1.118.3171 10.1.1.118.3882 10.1.1.124.2718 10.1.1.124.8466 10.1.1.124.8516 10.1.1.126.2574 10.1.1.126.3474 10.1.1.128.5756 10.1.1.130.5902 10.1.1.130.7155 10.1.1.132.319 10.1.1.133.4597 10.1.1.135.7996 10.1.1.138.529 10.1.1.139.275 10.1.1.139.4030 10.1.1.5.4720 10.1.1.58.8470 10.1.1.59.3381 10.1.1.61.4532 10.1.1.140.1484 10.1.1.141.3512 10.1.1.142.4289 10.1.1.142.6329 10.1.1.108.5722 10.1.1.144.1222 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1526:Triage Performance Isolation and Differentiation for Storage Systems:Magnus Karlsson Christos Karamanolis Xiaoyun Zhu:2009-04-19 Ensuring performance isolation and differentiation among workloads that share a storage infrastructure is a basic requirement in consolidated data centers. Existing management tools rely on resource provisioning to meet performance goals they require detailed knowledge of the system characteristics and the workloads. Provisioning is inherently slow to react to system and workload dynamics, and in the general case, it is impossible to provision for the worst case. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1526 http //www.hpl.hp.com/research/ssp/papers/2004-06-iwqos-triage.pdf en 10.1.1.16.8551 10.1.1.114.7135 10.1.1.141.1 10.1.1.17.502 10.1.1.28.2128 10.1.1.26.662 10.1.1.13.8238 10.1.1.23.3111 10.1.1.16.7583 10.1.1.17.2232 10.1.1.64.986 10.1.1.65.8781 10.1.1.4.6982 10.1.1.19.1470 10.1.1.1.1904 10.1.1.14.8619 10.1.1.4.4818 10.1.1.75.84 10.1.1.65.3456 10.1.1.129.3204 10.1.1.109.630 10.1.1.113.223 10.1.1.72.2528 10.1.1.59.317 10.1.1.121.3572 10.1.1.119.1641 10.1.1.72.3158 10.1.1.74.8799 10.1.1.79.9021 10.1.1.85.8116 10.1.1.135.7692 10.1.1.104.267 10.1.1.107.2911 10.1.1.62.6629 10.1.1.64.5770 10.1.1.64.9860 10.1.1.65.1125 10.1.1.67.1517 10.1.1.67.2395 10.1.1.72.374 10.1.1.79.5247 10.1.1.79.748 10.1.1.81.5717 10.1.1.83.4762 10.1.1.84.3590 10.1.1.85.6390 10.1.1.89.1736 10.1.1.89.2790 10.1.1.93.7577 10.1.1.94.3072 10.1.1.94.5062 10.1.1.111.7201 10.1.1.113.4918 10.1.1.118.881 10.1.1.123.8174 10.1.1.133.38 10.1.1.134.9068 10.1.1.136.8533 10.1.1.130.7318 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1527:On-line Handwritten Japanese Text Recognition free from Constrains on Line:Direction And Character Masaki Nakagawa Motoki Onuma:2009-04-19 This paper describes an on-line handwritten Japanese text recognition method that is liberated from constraints on writing direction (line direction) and character orientation. This method estimates the line direction and character orientation using the time sequence information of pen-tip coordinates and employs writingbox -free recognition with context processing combined. The method can cope with a mixture of vertical, horizontal and skewed lines with arbitrary character orientations. It is expected useful for tablet PC's, interactive electronic whiteboards and so on. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1527 http //www.csc.liv.ac.uk/%7Eprima/ICDAR2003/Papers/0095_627_masaki_n.pdf en 10.1.1.103.5812 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1528:Data Transformation for Warehousing Web Data:Yan Zhu Christof Yan Zhu Christof Bornhövd Alejandro P. Buchmann:2009-04-19 In order to analyze market trends and make reasonable business plans, a company's local data is not sufficient. Decision making must also be based on information from suppliers, partners and competitors. This external data can be obtained from the Web in many cases, but must be integrated with the company's own data, for example, in a data warehouse. To this end, Web data has to be mapped to the star schema of the warehouse. In this paper we propose a semi-automatic approach to support this transformation process. Our approach is based on the use a rooted labeled tree representation of Web data and the existing warehouse schema. Based on this common view we can compare source and target schemata to identify correspondences. We show how the correspondences guide the transformation to be accomplished automatically. We also explain the meaning of recursion and restructuring in mapping rules, which are the core of the transformation algorithm. CiteSeerX 2009-04-19 2007-11-19 2001 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1528 http //www.cs.kun.nl/is/Library/./Data/2001/Zhu/Data/2001-Zhu-Data.pdf en 10.1.1.122.4181 10.1.1.33.3465 10.1.1.25.1724 10.1.1.24.9229 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1529:Discriminant Projections Embedding for Nearest Neighbor Classification.:Petia Radeva And Petia Radeva Jordi Vitrià:2009-04-19 In this paper we introduce a new embedding technique to linearly project labeled data samples into a new space where the performance of a Nearest Neighbor classifier is improved. The approach is based on considering a large set of simple discriminant projections and finding the subset with higher classification performance. In order to implement the feature selection process we propose the use of the adaboost algorithm. The performance of this technique is tested in a multiclass classification problem related to the production of cork stoppers for wine bottles. CiteSeerX Springer Verlag 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1529 http //www.cvc.uab.es/~jordi/ciarp2004.pdf en 10.1.1.99.3419 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1530:Vowel - Zero Alternations in Czech Prefixes:Tobias Scheer Tobias Scheer Clite E -e:2009-04-19 e inchoative, "up" p#ed 16 48 "before, in front of" roz 80 295 inch., "disperse/ break into pieces" nad 5 33 "over" pod 26 74 "under" od 41 253 distantiational movement sum 195 762 TOTAL 957 (6) the secret must be found in the different status of stem-initial CC-clusters. (7) stem-initial CCs observed with a. prefixal-V only +e b. prefixal - only -e c. both mix +e only 17 CCs -e only 38 CCs ct, dn, d#, jm, lstn, mk, pn, ps, rv, #v, sch, sr, v, tn, v#, z#, #r bl, b#, cl, cv, #l, f#, fr, hl, hm, hv, chl, chrchl, km, kr, k#, kv, m#, mr, pl, pt, sh, sv, k, n, p, r, tl, tr, tv, vd, vr, zbr, zp, zt, #h, #m, ##, #v mix 35 CCs br, #t, dm, dr, dv, hn, hr, h#, chv, jd, kd, kl, ml, mn, pj, pr, p#, sk, sl, sm, sn, sp, st, l, t, t#, v#, vl, v#, v, vz, zd, zl, zn, zv TOTAL nb CC 90 (8) A given root belongs to one and only one of these three groups. (9) CC mix represented by how many it CiteSeerX 2009-04-19 2007-11-19 1998 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1530 http //www.unice.fr/dsl/tobweb/papers/ScheerHdtSzeged98.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1531:Automatic Construction of Navigable Concept Networks Characterizing Text Databases:Claudio Carpineto Giovanni Romano Fondazione Ugo Bordoni:2009-04-19 In this paper we present a comprehensive approach to conceptual structuring and intelligent navigation of text databases. Given any collection of texts, we first automatically extract a set of index terms describing each text. Next, we use a particular lattice conceptual clustering method to build a network of clustered texts whose nodes are described using the index terms. We argue that the resulting network supports an hybrid navigational approach to text retrieval - implemented into an actual user interface - that combines browsing potentials with good retrieval performance. We present the results of an experiment on subject searching where this approach outperformed a conventional Boolean retrieval system. CiteSeerX Springer-Verlag 2009-04-19 2007-11-19 1995 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1531 http //search.fub.it/claudio/pdf/AIIA1995.pdf en 10.1.1.21.1806 10.1.1.64.7424 10.1.1.14.7549 10.1.1.26.1391 10.1.1.122.5391 10.1.1.1.4542 10.1.1.50.9283 10.1.1.140.4388 10.1.1.70.980 10.1.1.72.9991 10.1.1.60.2145 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1532:Lawrence S. Brakmo, Deborah A. Wallach, Marc A. Viredaz:Mobile And Media Lawrence S. Brakmo Lawrence S. Brakmo Deborah A. Wallach Deborah A. Wallach Marc A. Viredaz Marc A. Viredaz:2009-04-19 Energy management has become one of the great challenges in portable computing. This is the result of the increasing energy requirements of modern portable devices without a corresponding increase in battery technology. Sleep is a new energy reduction technique for handheld devices that is most effective when the handheld's processor is lightly loaded, such as when the user is reading a document or looking at a web page. When possible, rather than using the processor's idle mode, Sleep tries to put the processor in sleep mode for short periods (less than one second) without affecting the user's experience. To enhance the perception that the system is on, an image is maintained on the display and activity is resumed as a result of external events such as touch-screen and button activity. We have implemented Sleep on a prototype pocket computer, where it has reduced energy consumption by up to 60%. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1532 http //www.hpl.hp.com/techreports/2004/HPL-2004-11.pdf en 10.1.1.143.7417 10.1.1.39.3266 10.1.1.121.5295 10.1.1.29.6746 10.1.1.31.4277 10.1.1.4.1582 10.1.1.108.8205 10.1.1.36.2109 10.1.1.111.4810 10.1.1.134.8329 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1533:Answers to the Top Ten Input Modeling Questions:Bahar Biller Barry L. Nelson:2009-08-25 In this tutorial we provide answers to the top ten inputmodeling questions that new simulation users ask, point out common mistakes that occur and give relevant references. We assume that commercial input-modeling software will be used when possible, and only suggest non-commercial options when there is little else available. Detailed examples will be provided in the tutorial presentation. CiteSeerX 2009-08-25 2007-11-19 2002 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1533 http //www.informs-cs.org/wsc02papers/005.pdf en 10.1.1.58.5325 10.1.1.105.5693 10.1.1.107.780 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.100.8780:Jaguar Java in Next-Generation Database Systems:Johannes Gehrke Www Page:2008-07-01 • Title Jaguar Java in Next-Generation Database Systems Keywords Extensibility query optimization heterogeneous environments database compression. Project Summary This project explores fundamental systems issues in query processing performance. We investigate this problem from three different directions client-server processing, heterogeneous environments, and database compression. First, we devised new query processing strategies than push processing capabilities into the client, and we devised query execution plans that can span server and clients. This allows us to trade resource usage between client, server and the interconnection network. We then extended this work to parallel query processing in heterogeneous environments we are currently implementing a parallel dataflow engine that adapts naturally to resource imbalances at the hardware components. Last, we are investigating the use of compression in database systems. We devised a new framework for database compression and new query processing and query optimization strategies to integrate compression into a modern query processor. All our techniques have been implemented in the NSF-funded Cornell Predator object-relational database system. We extended the system with several ways to store compressed relations, and we implemented a fully compression-aware query optimizer. To best of our knowledge, our work is the first result on compression-aware query optimization. Publications and Products � Project homepage CiteSeerX 2008-07-01 2008-04-02 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.100.8780 http //itlab.uta.edu/idm01/FinalReports/reports/IDM01R048.pdf en 10.1.1.20.9548 10.1.1.26.9191 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.106.4689:Patterns for Next-Generation Database Systems IST-2001-33058 Recent Advances on Pattern Representation and Management:I. Ntoutsi (cti/piraeus A. Pikrakis G. Tsatsaronis (aueb E. Vrachnos Michalis Vazirgiannis Maria Halkidi Daniel A. Keim Irene Ntoutsi Aggelos Pikrakis Sergios Theodoridis Yannis Theodoridis George Tsatsaronis Euripides Vrachnos:2008-07-01 patterns data mining pattern modeling pattern-bases information retrieval Pattern Base Management Systems Research supported by the Commission of the European Communities under the Information CiteSeerX 2008-07-01 2008-04-03 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.106.4689 http //www.db-net.aueb.gr/gbt/publications/PANDA_TR-2003-04.pdf en 10.1.1.40.6757 10.1.1.108.8490 10.1.1.33.3138 10.1.1.144.4956 10.1.1.42.3240 10.1.1.56.8772 10.1.1.32.9565 10.1.1.50.5717 10.1.1.41.4883 10.1.1.105.8622 10.1.1.102.5562 10.1.1.16.976 10.1.1.34.2745 10.1.1.7.6588 10.1.1.44.8451 10.1.1.5.6904 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.122.192:Query by Templates Using the Shape of Information to Search Next-Generation Databases:Arijit Sengupta Andrew Dillon:2008-12-04 Abstract—We present a user-centered database query language called QBT (Query By Templates) for user communication with databases containing complex structured data, such as data stored in the Extensible Markup Language (XML). XML has revolutionized data storage as well as representation and transfer methods in today’s internet applications. The growing popularity of XML as a language for the representation of data has enabled its use for several applications involving storage, interchange, and retrieval of data. Several textual query languages have been proposed for XML retrieval, including the World Wide Web Consortium’s (W3C) recommendation of XQuery. Native XML database systems have been implemented, all of which provide methods for user communication with the database, although most communication methods use text-based query languages or form-based interfaces. QBT, the language presented here, is one of the first attempts toward a generalized alternative language that is based on human factors of familiarity. It is ideal for documents with a simple yet highly recognizable layout (e.g., poems, dictionaries, journals, etc.). We present the QBT language and report results from an initial usability test that shows promise for this type of an interface as a generalized user–database communication method. Index Terms—Complex structured data, Extensible Markup Language (XML), information shape, query evaluation, query languages, query processing, visual languages, XQuery. CiteSeerX 2008-12-04 2008-12-03 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.122.192 http //www.ischool.utexas.edu/~adillon/Journals/IEEEJit.pdf en 10.1.1.109.4049 10.1.1.11.6264 10.1.1.22.7172 10.1.1.33.1762 10.1.1.102.1564 10.1.1.35.4300 10.1.1.20.7529 10.1.1.17.933 10.1.1.57.2983 10.1.1.17.4528 10.1.1.92.5486 10.1.1.110.6779 10.1.1.28.2863 10.1.1.105.3351 10.1.1.2.8978 10.1.1.104.2288 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.52.456:Security in Next-Generation Databases:Chris Strahorn:2009-04-12 this paper, a summary of the various models presented for securing next-generation databases will be given. Additionally, an overview of the security features in commercial next-generation databases is also given in order to show the need for further work in this field. 1 Introduction CiteSeerX 2009-04-12 2007-11-22 1998 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.52.456 http //www.db.cs.ucdavis.edu/teaching/289F/papers/chris.ps en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.66.4692:Security in Next-Generation Databases:Chris Strahorn:2008-07-01 In the past several years, several new types of databases have moved out of the academic world and have been released as commercial products. These new types of databases are commonly referred to as next-generation databases and include object-oriented, object-relational, active, and deductive databases. Each of these types of database offer an extended set of features when compared to a traditional relational database. In turn, these new features require new methods in order to secure the data held within. In this paper, a summary of the various models presented for securing next-generation databases will be given. Additionally, an overview of the security features in commercial next-generation databases is also given in order to show the need for further work in this field. 1 CiteSeerX 2008-07-01 2008-02-06 1998 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.66.4692 http //www.db.cs.ucdavis.edu/teaching/289F/papers/chris.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.78.1427:Java in Next-Generation Database Systems::2008-07-01 applications, including database applications. CiteSeerX 2008-07-01 2008-02-07 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.78.1427 http //www.cs.wisc.edu/~cao/WISP98/final-versions/praveen.ps en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1534:Proceedings of the 2002 Winter Simulation Conference:Ycesan Chen Snowdon E. Yücesan C. -h. Chen J. L. Snowdon J. M. Charnes Sang D. Choi Anil R. Kumar:2009-04-19 This paper discusses the initial efforts to implement simulation modeling as a visual management and analysis tool at an automotive foundry plant manufacturing engine blocks. The foundry process was modeled using Pro Model to identify bottlenecks and evaluate machine performance, cycle times and production data (total parts, rejects, throughput, products/hr) essential for efficient production control. Results from the current system identified assembly machine work area as the bottleneck (although utilization was greater than 95% for two assembly machines) resulting in high work-in-process (WIP) inventory level, low resource and machine utilization. Based on these results, optimum numbers were identified through use of scenarios by varying the number of assembly machines and processing time of each machine. In addition to these scenarios, strategies for production control involving buffer sizes were also made. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1534 http //www.informs-cs.org/wsc02papers/138.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1535:A Sub-Quadratic Algorithm for Conjunctive and Disjunctive BESs:Jan Friso Groote Misa Keinänen:2009-04-19 We present an algorithm for conjunctive and disjunctive Boolean equation systems (BESs), which arise frequently in the verification and analysis of finite state concurrent systems. In contrast to the previously best known O(e ) time solutions, our algorithm computes the solution of such a fixpoint equation system with size e and alternation depth d in O(e log d) time. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1535 http //www.win.tue.nl/~jfg/articles/CSR-04-13.pdf en 10.1.1.58.4882 10.1.1.81.9591 10.1.1.108.4288 10.1.1.140.2376 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1536:Innovations of the NetSolve Grid Computing System:Dorian C. Arnold Henri Casanova Jack Dongarra:2009-04-19 KEY WORDS Grid computing distributed computing heterogeneous network computing client--server This article is meant to provide the reader with details regarding the present state of the project, describing the current architecture of the system, its latest innovations and other systems 10 that make use of the NetSolve infrastructure. Copyright # 2002 John Wiley & Sons, Ltd CiteSeerX 2009-04-19 2007-11-19 2002 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1536 http //icl.cs.utk.edu/news_pub/submissions/cpe678.pdf en 10.1.1.25.8254 10.1.1.49.8881 10.1.1.32.6963 10.1.1.46.3287 10.1.1.15.9060 10.1.1.43.1259 10.1.1.30.5246 10.1.1.27.3632 10.1.1.115.1390 10.1.1.107.4174 10.1.1.65.4741 10.1.1.3.4994 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1537:The InfoVis Toolkist:Jean-daniel Fekete Jean-daniel Fekete Projet In-situ:2009-04-19 This report presents the InfoVis Toolkit, designed to support the creation, extension and integration of advanced 2D Information Visualization components into interactive Java Swing applications. The InfoVis Toolkit provides specific data structures to achieve a fast action/feedback loop required by dynamic queries. It comes with a large set of components such as range sliders and tailored control panels required to control and configure the visualizations. These components are integrated into a coherent framework that simplifies the management of rich data structures and the design and extension of visualizations. Supported data structures currently include tables, trees and graphs. Supported visualizations include scatter plots, time series, Treemaps, node-link diagrams for trees and graphs and adjacency matrix for graphs. All visualizations can use fisheye lenses and dynamic labeling. The InfoVis Toolkit supports hardware acceleration when available through Agile2D, an implementation of the Java Graphics API based on OpenGL, achieving speedups of 10 to 60 times. CiteSeerX 2009-04-19 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1537 ftp //ftp.inria.fr/INRIA/publication/publi-pdf/RR/RR-4818.pdf en 10.1.1.123.805 10.1.1.43.631 10.1.1.41.1810 10.1.1.56.8772 10.1.1.13.6859 10.1.1.20.9570 10.1.1.58.2019 10.1.1.25.5975 10.1.1.111.2892 10.1.1.18.1023 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1538:Complex Systems Modeling:Christophe Lecerf Thi:2009-04-19 This paper addresses the simulation of the dynamics of complex systems by using hierarchical graph and multi-agent system. A complex system is composed of numerous interacting parts that can be described recursively. First we summarize the hierarchical aspect of the complex system. We then present a description of hierarchical graph as a data structure for structural modeling in parallel with dynamics simulation by agents. This method can be used by physiological modelers, ecological modelers, etc as well as in other domains that are considered as complex systems. An example issued from physiology will illustrate this approach. CiteSeerX 2009-04-19 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1538 http //e-ifi.org/rivf/2003/proceedings/p93-98.pdf en 10.1.1.28.9248 10.1.1.4.6467 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1539:Proceedings of the 2003 Winter Simulation Conference:Chick Snchez Ferrin S. Chick P. J. Sánchez D. Ferrin D. J. Morrice Gary Tan Na Zhao:2009-04-19 uses to deliver value to its customers. In today's competitive environment, the globalization of markets has rapidly substituted the traditional integrated business. The competitive success of an organization no longer depends only on its own efforts, but relies on the efficiency of the entire supply chain. Therefore, building an effective supply chain is fast becoming paramount in today's marketplace. Distributed Supply Chain (DSC) Simulation has been identified as one of the best means to test and analyze the performance of supply chains. The Generic Runtime Infrastructure for Distributed Simulation (GRIDS) is a middleware that supports the reuse and interoperation of DSC simulations. This paper reports the experience on employing the GRIDS to support the distributed collaboration of an automobile manufacture supply chain simulation. Several advantages of GRIDS are also discussed here which make it an ideal middleware for DSC simulations. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1539 http //www.informs-cs.org/wsc03papers/142.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1540:Multi-dimensional Visual Representations for Underwater Environmental Uncertainty:Greg S. Schmidt Sue-Ling Chen Greg S. Schmidt Sue-ling Chen Aaron N. Bryden Mark A. Livingston Bryan R. Osborn Lawrence J. Rosenblum:2009-04-19 this paper) and (2) develop a visual method for each characterization. The mariner community needs enhanced characterizations of environmental uncertainty now, but the accuracy of the characterizations is still not sufficient enough and therefore formal user evaluations cannot take place at this point in development. We received feedback on the applicability of our techniques from domain experts. We used this in conjunction with previous results to compile a set of development guidelines (some obvious, others not) CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1540 http //www.ait.nrl.navy.mil/vrlab/pages/../papers/j_IEEECGA04.pdf en 10.1.1.109.7470 10.1.1.60.7349 10.1.1.125.6248 10.1.1.144.7725 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1541:InstantGrid A Framework for On-Demand Grid:Point Construction Roy Roy S. C. Ho K. K. Yin David C. M. Lee Daniel H. F. Hung Cho-li Wang Francis C. M. Lau:2009-04-19 This paper proposes the InstantGrid framework for on-demand construction of grid points. In contrast to traditional approaches, InstantGrid is designed to substantially simplify software management in grid systems, and is able to instantly turn any computer into a grid-ready platform with the desired execution environment. Experimental results demonstrate that a 256-node grid point with commodity grid middleware can be constructed in five minutes from scratch. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1541 http //www.cs.hku.hk/~clwang/papers/InstantGrid-gcc2004-camera.pdf en 10.1.1.114.2815 10.1.1.118.9332 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1542:Prototyping Proof Carrying Code:Martin Wildmoser Tobias Nipkow Gerwin Klein Sebastian Nanz:2009-04-19 We introduce a generic framework for proof carrying code, developed and mechanically verified in Isabelle/HOL. The framework defines and proves sound a verification condition generator with minimal assumptions on the underlying programming language, safety policy, and safety logic. We demonstrate its usability for prototyping proof carrying code systems by instantiating it to a simple assembly language with procedures and a safety policy for arithmetic overflow. CiteSeerX Kluwer 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1542 http //www.doc.ic.ac.uk/~nanz/publications/./ppcc_tcs04.pdf en 10.1.1.40.2507 10.1.1.24.6526 10.1.1.29.2076 10.1.1.40.7179 10.1.1.42.4453 10.1.1.43.6143 10.1.1.103.6797 10.1.1.113.4649 10.1.1.11.9523 10.1.1.10.8649 10.1.1.84.1258 10.1.1.35.532 10.1.1.129.5517 10.1.1.86.3296 10.1.1.7.139 10.1.1.83.9822 10.1.1.103.4133 10.1.1.106.1397 10.1.1.65.5537 10.1.1.71.2795 10.1.1.123.3289 10.1.1.113.9340 10.1.1.132.1879 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1543:Proceedings of the Block Island Workshop on Cooperative Control,:Springer-Verlag Series Lecture Wei Ren Al W. Beard Timothy W. Mclain:2009-04-19 this paper. Ref [15] addresses the knowledge consensus problem when teams of agents only have local communication between nearest neighbors. Since the set of nearest neighbors is constantly changing, the overall system becomes a hybrid system. The paper shows that if the union over all bidirectional communication graphs is connected for finite periods of time, then consensus is achieved. While the results in this paper are not as strong, only unidirectional communication links are assumed CiteSeerX Springer-Verlag 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1543 http //www.et.byu.edu/~wr25/./papers/preprints/bookchapters/RenBeardMcLain03.pdf en 10.1.1.28.2247 10.1.1.10.4292 10.1.1.1.3664 10.1.1.72.7624 10.1.1.2.7148 10.1.1.32.8694 10.1.1.4.8605 10.1.1.15.1267 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1544:Hidden-Action in Multi-Hop Routing:Michal Feldman John Chuang:2009-04-19 In any multi-hop routing scheme, cooperation by the intermediate nodes are essential for the succesful delivery of traffic. However, the effort exerted by the intermediate nodes are often unobservable by the source and/or destination nodes. We show it is possible to overcome this problem of hidden action by designing contracts, in the form of payments, to induce cooperation from the intermediate nodes. Interestingly, the ability to monitor per-hop or per-path outcomes, even if costless to implement, may not improve the welfare of the participants or the performance of the network. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1544 http //www.eecs.harvard.edu/p2pecon/confman/papers/s3p1.pdf en 10.1.1.105.3673 10.1.1.19.8434 10.1.1.28.5987 10.1.1.21.4823 10.1.1.42.559 10.1.1.132.4609 10.1.1.10.8652 10.1.1.11.4819 10.1.1.11.9831 10.1.1.19.1750 10.1.1.34.2032 10.1.1.113.7948 10.1.1.11.8397 10.1.1.60.1810 10.1.1.4.1706 10.1.1.13.9006 10.1.1.119.7102 10.1.1.14.1149 10.1.1.14.6620 10.1.1.10.7859 10.1.1.107.3061 10.1.1.119.8132 10.1.1.106.6176 10.1.1.122.7182 10.1.1.123.5145 10.1.1.131.5662 10.1.1.135.2383 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1545:EPTD DISCUSSION PAPER NO. 83 HOW AGRICULTURAL RESEARCH AFFECTS URBAN POVERTY IN DEVELOPING COUNTRIES THE CASE OF CHINA:Shenggen Fan Cheng Fang Xiaobo Zhang:2009-04-19 developing countries China agricultural research urban poverty i ACKNOWLEDGMENTS This paper develops a framework to measure the impact of agricultural research on urban poverty. Increased investments in agricultural R&D can lower food prices by increasing food production, and lower food prices benefit the urban poor because they often spend more than 60% of their income on food. Application of the framework to China shows that these food price effects are large and that the benefits for the urban poor have been about as large as the benefits for the rural poor. KEYWORDS developing countries, China, agricultural research, urban, poverty ii ACKNOWLEDGMENTS The authors are grateful for helpful comments received from Peter Hazell, Robert Evanson and participants in a session at the American Agricultural Economics Association annual meeting in Chicago, August 5-8, 2001. iii TABLE OF CONTENTS 1. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1545 http //www.ifpri.org/divs/eptd/dp/papers/eptdp83.pdf en 10.1.1.144.9394 10.1.1.1.3288 10.1.1.58.6199 10.1.1.58.3593 10.1.1.31.1619 10.1.1.58.2714 10.1.1.58.2531 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1546:Ontology-Based Query Refinement:For Multimedia Meta Sonja Zillner Werner Winiwarter:2009-04-19 To enable e#cient access to multimedia content, the media data has to be augmented by semantic metadata and functionality. The semantic representation has to be integrated with domain ontologies to fully exploit domain-specific knowledge. This knowledge can be used for refining ambiguous user queries by closing the conceptual gap between the user and the information to be retrieved. In our previous research, we have introduced Enhanced Multimedia Meta Objects (EMMOs) as a new approach for semantic multimedia meta modeling, as well as the query algebra EMMA, which is adequate and complete with regard to the EMMO model. This paper focuses on the refinement of EMMA queries by incorporating ontological knowledge. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1546 http //www.ifs.univie.ac.at/~ww/iiwas04a.pdf en 10.1.1.44.6030 10.1.1.93.962 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1547:Computerising Natural History Card Archives:Downton Lucas And A. C. Downton S. M. Lucas G. Patoulas:2009-04-19 This paper summarises the achievements of a multidisciplinary Bioinformatics project which has the objective of providing a general mechanism for efficient computerisation of typewritten/hand-annotated archive card indexes, of the type found in most museums, archives and libraries. In addition to efficiently scanning, recognising and databasing the content of the cards, the original card images must be maintained as the ultimate source record, and a flexible database structure is required to allow taxonomists to reorganise and update the resulting online archive. Implementation mechanisms for each part of the overall system are described, and conversion performance for a demonstrator database of 27,578 Pyralid moth archive cards is reported. The system is currently being used to convert the full NHM archive of Lepidoptera totalling 290,886 cards. CiteSeerX 2009-04-19 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1547 http //www.csc.liv.ac.uk/%7Eprima/ICDAR2003/Papers/0064_562_downton_a.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1548:Genome-Wide Detection of Alternative Splicing in Expressed Sequences Using Partial Order Multiple Sequence Alignment Graphs:C. Grasso B. Modrek Y. Xing C. Lee:2009-04-19 this paper we present a detailed examination of the technical problems we have encountered in undertaking high-throughput analyses of alternative splicing over the last four years, and the specific solutions we have developed for these problems, in seeking to minimize both false positive and false negative errors CiteSeerX 2009-04-19 2007-11-19 2001 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1548 http //www.cs.mun.ca/~harold/BDG/grasso.pdf en 10.1.1.109.6075 10.1.1.115.1836 10.1.1.3.1101 10.1.1.104.1301 10.1.1.20.6089 10.1.1.108.6744 10.1.1.92.2847 10.1.1.102.5592 10.1.1.58.4273 10.1.1.83.8143 10.1.1.123.8933 10.1.1.100.2820 10.1.1.100.7691 10.1.1.101.5916 10.1.1.101.9727 10.1.1.102.7070 10.1.1.103.1344 10.1.1.104.9576 10.1.1.108.8802 10.1.1.112.9910 10.1.1.67.4233 10.1.1.74.9803 10.1.1.83.5745 10.1.1.83.7724 10.1.1.84.5073 10.1.1.84.6844 10.1.1.86.4181 10.1.1.87.3649 10.1.1.87.8082 10.1.1.90.2517 10.1.1.91.5647 10.1.1.92.8657 10.1.1.95.7769 10.1.1.95.7882 10.1.1.96.8415 10.1.1.99.6833 10.1.1.99.8018 10.1.1.100.4446 10.1.1.114.4132 10.1.1.115.3917 10.1.1.117.404 10.1.1.121.5264 10.1.1.122.809 10.1.1.125.5346 10.1.1.112.430 10.1.1.127.9023 10.1.1.128.4690 10.1.1.130.1155 10.1.1.130.5318 10.1.1.131.5021 10.1.1.137.3169 10.1.1.14.3143 10.1.1.1.5756 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1549:Adaptive Sampling for Environmental Robotics:Mohammad Rahimi Richard Pon William J. Kaiser Gaurav S. Sukhatme Deborah Estrin Mani Srivastava:2009-04-19 this paper we describe ######################################## a new distributed, robotic sensor methodology developed for applications including characterization of environmental structure and phenomena. NIMS exploits deployed infrastructure that provides the benefits of precise motion, aerial suspension, and low energy sustainable operations in complex environments. NIMS nodes may explore a three-dimensional environment and enable the deployment of sensor nodes at diverse locations and viewing perspectives. NIMS characterization of phenomena in a three dimensional space must now consider the selection of sensor sampling points in both time and space. Thus, we introduce a new approach of mobile node adaptive sampling with the objective of minimizing error between the actual and reconstructed spatiotemporal behavior of environmental variables while minimizing required motion. In this approach, the NIMS node first explores as an agent, gathering a statistical description of phenomena using a ##################################approach. By iteratively increasing sampling resolution, guided adaptively by the measurement results themselves, this NIMS sampling enables reconstruction of phenomena with a systematic method for balancing accuracy with sampling resource cost in time and motion. This adaptive sampling method is described analytically and also tested with simulated environmental data. Experimental evaluations of adaptive sampling algorithms have also been completed. Specifically, NIMS experimental systems have been developed for monitoring of spatiotemporal variation of atmospheric climate phenomena. A NIMS system has been deployed at a field biology station to map phenomena in a 50m width and 50m span transect in a forest environme... CiteSeerX 2009-04-19 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1549 http //cres.usc.edu/pubdb_html/files_upload/400.pdf en 10.1.1.131.2084 10.1.1.18.1128 10.1.1.20.7017 10.1.1.8.1672 10.1.1.92.7418 10.1.1.93.7998 10.1.1.71.4666 10.1.1.132.6943 10.1.1.100.8264 10.1.1.64.4012 10.1.1.79.7058 10.1.1.110.501 10.1.1.128.7519 10.1.1.106.8162 10.1.1.63.3553 10.1.1.66.1967 10.1.1.69.4362 10.1.1.72.4649 10.1.1.84.7790 10.1.1.91.7021 10.1.1.93.6045 10.1.1.97.2624 10.1.1.99.4191 10.1.1.111.6699 10.1.1.115.2562 10.1.1.129.5023 10.1.1.135.5385 10.1.1.138.7945 10.1.1.139.8283 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1550:Enhanced Expressiveness in Scripting Using AnimalScript V2:Guido Rößling Felix Gliesche Thomas Jajeh Thomas Widjaja:2009-04-19 this paper) is scripting. Here, the user provides a simple ASCII file containing commands that steer the visualization. Typically, the commands are held in plain English to make using the underlying scripting language easier. Typical examples for scripting-driven AV systems include JAWAA (Akingbade et al., 2003), JSamba (Stasko, 1998), JHAV E (Naps et al., 2000) and Animal (Roling and Freisleben, 2002) CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1550 http //nibbler.tk.informatik.tu-darmstadt.de/Publications/2004/pvw12.pdf en 10.1.1.19.5818 10.1.1.28.9812 10.1.1.103.9289 10.1.1.78.7343 10.1.1.138.1175 10.1.1.140.5036 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1551:Comparison of Clustering Algorithms in Speaker Identification:Tomi Kinnunen Teemu Kilpelinen Pasi FrÄnti:2009-04-19 Speech processing speaker identification vector In speaker identification, we match a given (unkown) speaker to the set of known speakers in a database. The database is constructed from the speech samples of each known speaker. Feature vectors are extracted from the samples by short-term spectral analysis, and processed further by vector quantization for locating the clusters in the feature space. We study the role of the vector quantization in the speaker identification system. We compare the performance of different clustering algorithms, and the influence of the codebook size. We want to find out, which method provides the best clustering result, and whether the difference in quality contribute to improvement in recognition accuracy of the system. CiteSeerX 2009-04-19 2007-11-19 0 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1551 http //cs.joensuu.fi/pages/tkinnu/research/pdf/ComparisonClusteringAlgsSpeakerRec.pdf en 10.1.1.58.5875 10.1.1.1.5615 10.1.1.58.2111 10.1.1.58.3968 10.1.1.125.5073 10.1.1.104.7507 10.1.1.78.6536 10.1.1.81.4597 10.1.1.89.5956 10.1.1.143.9804 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1552:Towards a Generic Talking Head:Brar Bailly Chabanas:2009-04-19 MPEG-4 Facial Animation Parameters and Facial Definition Parameters We present here a framework for developing a generic talking head capable of reproducing the anatomy and the facial deformations induced by speech movements with a set of a few parameters. We will show that the speaker-specific articulatory movements can be straightforward encoded into the normalized MPEG-4 Facial Animation Parameters and Facial Definition Parameters. CiteSeerX 2009-04-19 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1552 http //www.icp.inpg.fr/ICP/publis/synthese/_mb/clonegen_mb_ISSP03.pdf en 10.1.1.128.4967 10.1.1.30.2775 10.1.1.130.9240 10.1.1.15.8634 10.1.1.31.3794 10.1.1.142.982 10.1.1.32.7228 10.1.1.42.3468 10.1.1.52.5953 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1553:Language Policy and Localization in Pakistan Proposal for a Paradigmatic Shift:Tariq Rahman:2009-04-19 This paper examines the present language policy of Pakistan and its consequences for the indigenous languages of the country. It then relates this to efforts at localization--- creating computer software in the languages of the country---and argues that all such efforts have been power-oriented. This means that only those languages have been selected for localization which are used in the domains of power---government, bureaucracy, judiciary, military, commerce, media, education, research etc---thus further strengthening them vis a vis the marginalized languages of the people. It is therefore argued that the efforts at localization should be rights-based i.e. all language communities should be considered equal and their languages should be localized not because of their present use in the domains of power but because they too should be strengthened by being put to such use. CiteSeerX 2009-04-19 2007-11-19 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1553 http //www.elda.fr/en/proj/scalla/SCALLA2004/rahman.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1554:BioMed Central:Bmc Medical Informatics Biomed Central Cynthia S Gadd Open Access Kathleen Ann Mckibbon Kathleen Ann Mckibbon Cynthia S Gadd:2009-04-19 Background Quantitative studies are becoming more recognized as important to understanding health care with all of its richness and complexities. The purpose of this descriptive survey was to provide a quantitative evaluation of the qualitative studies published in 170 core clinical journals for 2000. CiteSeerX 2009-04-19 2007-11-19 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1554 http //www.biomedcentral.com/content/pdf/1472-6947-4-11.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1555:Object-Relational Management of Multiply Represented Geographic Entities:Anders Friis-Christensen National Anders Friis-christensen:2009-04-19 Multiple representation occurs when information about the same geographic entity is represented electronically more than once. This occurs frequently in practice, and it invariably results in the occurrence of inconsistencies among the different representations. We propose to resolve this situation by introducing a multiple representation management system (MRMS), the schema of which includes rules that specify how to identify representations of the same entity, rules that specify consistency requirements, and rules used to restore consistency when necessary. In this paper, we demonstrate by means of a prototype and a realworld case study that it is possible to implement a multiple representation schema language on top of an objectrelational database management system. Specifically, it is demonstrated how it is possible to map the constructs of the language used for specifying the multiple representation schema to functionality available in Oracle. Though some limitations exist, Oracle has proven to be a suitable platform for implementing an MRMS. CiteSeerX IEEE Computer Society 2009-04-19 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1555 http //www.cs.auc.dk/~csj/Papers/Files/2003_friis-christensenSSDM.pdf en 10.1.1.118.6768 10.1.1.101.5051 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.1.1556:Netview Application Software for Constructing and Visually Exploring Phylogenetic Networks:Kirill Kryukov Naruya Saitou:2009-04-19 phylogenetic network neighbor-joining method Introduction Reconstructing evolutionary history of a group of species is a major task in biological study. Many methods exist for reconstructing such history, or phylogeny, but most of them are based on an assumption that evolution of given gene family can be represented as a tree. However some families of genes may have alternative historical structure that cannot be represented as a tree. Such history can result from events such as recombination, gene conversion and horizontal gene transfer, and require not tree but network for accurate representation. Parallel substitutions are also sources of non-tree networks. For the purpose of understanding such history we developed a program Netview, which enables constructing phylogenetic network based on the sequence data. 2 Netview Netview uses following approach to construct a network First, it constructs a NJ-tree [1] for provided set of sequences, using nucleotide di#erence distance matrix (Fig. 1). Then every nucleotide site positio CiteSeerX 2009-04-19 2007-11-19 2003 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.1556 http //hc.ims.u-tokyo.ac.jp/JSBi/journal/GIW03/GIW03SS06.pdf en 10.1.1.85.2481 10.1.1.134.5815 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.113.6268:Multiple Dynamic View Support for Cooperative Work:Yusuke Yokota Tatsuya Nakamura Hiroyuki Tarumi Yahiko Kambayashi:2008-08-14 VIEW Media is a cooperative hypermedia system, which supports cooperative work utilizing hypermedia documents. It provides fundamental functions for cooperative work support system and enables developers to construct various groupware such as distributed presentation systems, education systems, conference systems, and so forth. This system provides a powerful and flexible mechanism of customization. The mechanism can alter presentation, structure, behavior and authority of hypermedia documents and workspace, which supports dynamic change of roles of users. This paper describes the purpose of VIEW Media, its basic model and implementation, and a user interface which supports conferences among users who have different access rights to shared documents. 1. CiteSeerX 2008-08-14 2008-08-14 1999 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.113.6268 http //ieeexplore.ieee.org/iel5/6197/16540/00765767.pdf en 10.1.1.139.1894 10.1.1.21.4748 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.21.7175:Adding View Support to ODMG-93:M. Dobrovnik J. Eder:2009-04-15 A concept to introduce external models in object oriented databases is presented, such that application programs do no longer interface directly the whole conceptual schema, but work against external schemas specifically designed for the applications requirements. There are virtually no restrictions for such applications, since the interaction with the database takes place via updateable views. The data model is a somewhat simplified form of ODMG-93 [4], where we incorporated the additional constructs we need for the external schema definition. The approach makes a clear distinction between types and classes, and also separates the type and class hierarchies of the conceptual schema from the external type and class hierarchies. With type derivation, we provide a powerful type restructuring mechanism, which allows to define an external type which is based on a conceptual type. In the derivation process, one can omit conceptual components and methods or redefine their types. Additional methods can be defined for external types as well. 1 To appear in "Advances in Databases and Information Systems ADBIS '94" , Proc. of the Intl. Workshop of the Moscow ACM SIGMOD Chapter By defining well formed external schemas via constraints and schema invariants, we are able to guarantee unambiguous method resolution, steadiness of method resolution and compliance with the covariant subtyping principle. The full semantics of the conceptual schema are preserved. The designer of the external schema can make use of all information contained in the conceptual schema, in particular conceptual methods can be called from externally defined ones. In this paper, we concentrate on the area of type derivation and method resolution. 1 CiteSeerX 2009-04-15 2007-11-21 1994 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.21.7175 http //www.ifi.uni-klu.ac.at/Publications/pubfiles/psfiles/1994-0001-DoEd.ps en 10.1.1.49.5388 10.1.1.45.855 10.1.1.45.224 10.1.1.50.3649 10.1.1.92.5802 10.1.1.25.3017 10.1.1.96.1587 10.1.1.42.7156 10.1.1.17.6038 10.1.1.105.8423 10.1.1.85.8868 10.1.1.86.6171 10.1.1.89.4262 10.1.1.39.5730 10.1.1.33.2495 10.1.1.28.4618 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.31.2167:Study of the Charge Spectra Generated By Photomultipliers.:Cavasinni Cologna B. Di Girolamo G. Renzoni:2009-04-12 INTRODUCTION In this note we discuss the measurements of light yield from scintillator and wavelenght-shifting fibres (WLS) performed through the analysis of the charge distribution provided by a photomultiplier. This procedure was used in ref [1]. The analysis procedures were verified by a simple montecarlo program which generates the photomultiplier spectra. 2 THE EXPERIMENTAL APPARATUS These measurements have been carried out in the optoelectronic laboratory in Pisa with two different setups. GPIB-CAMAC interface PM 1 Fic system OS/9 Computer -fic monitor 12345... -off-line analisys Fibre Fibre Scintillator Support Dark Room Up View Support Source Digital Multimeter Figure 1 Experimental apparatus used for the measurements of charge spectra In the first case, as shown in the figure 1, the fibre is placed on proper supports at about 10 cm from the surface of an optical bench. The fibre, coupled to a small scintillat CiteSeerX 2009-04-12 2007-11-22 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.31.2167 http //atlasinfo.cern.ch/Atlas/SUB_DETECTORS/TILE/tileref/note117/fnote.ps.Z en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.49.2435:TIGUKAT An Object Model for Query and View Support in Object Database Systems:Randal J. Peters M. Tamer Özsu Duane Szafron:2009-04-12 Object-oriented computing is influencing many areas of computer science including software engineering, user interfaces, operating systems, programming languages and database systems. The appeal of object-orientation is attributed to its higher levels of abstraction for modeling real world concepts, its support for incremental development and its potential for interoperability. Despite many advances, object-oriented computing is still in its infancy and a universally acceptable definition of an object-oriented data model is virtually nonexistent, although some standardization efforts are underway. This report presents the TIGUKAT 1 object model definition that is the result of an investigation of object-oriented modeling features which are common among earlier proposals, along with some distinctive qualities that extend the power and expressibility of this model beyond others. The literature recognizes two perspectives of an object model the structural view and the behavioral view. ... CiteSeerX 2009-04-12 2007-11-22 1992 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.49.2435 ftp //menaik.cs.ualberta.ca/pub/TechReports/1992/TR92-14/TR92-14.ps.Z en 10.1.1.123.1085 10.1.1.116.1298 10.1.1.48.1422 10.1.1.27.6071 10.1.1.84.9469 10.1.1.97.9167 10.1.1.11.8792 10.1.1.48.8578 10.1.1.31.7627 10.1.1.42.7725 10.1.1.48.2199 10.1.1.45.224 10.1.1.44.4971 10.1.1.34.624 10.1.1.86.4262 10.1.1.31.7627 10.1.1.32.4444 10.1.1.51.5051 10.1.1.32.4382 10.1.1.51.9134 10.1.1.52.9346 10.1.1.56.204 10.1.1.21.1999 10.1.1.45.4879 10.1.1.105.8443 10.1.1.53.2111 10.1.1.53.4468 10.1.1.29.8669 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.37.8818:Overview of Multidatabase Transaction Management:Yuri Breitbart Hector Garcia-Molina Avi Silberschatz:2009-06-22 A multidatabase system (MDBS) is a facility that allows users access to data located in multiple autonomous database management systems (DBMSs). In such a system, global transactions are executed under the control of the MDBS. Independently, local transactions are executed under the control of the local DBMSs. Each local DBMS integrated by the MDBS may employ a different transaction management scheme. In addition, each local DBMS has complete control over all transactions (global and local) executing at its site, including the ability to abort at any point any of the transactions executing at its site. Typically, no design or internal DBMS structure changes are allowed in order to accommodate the MDBS. Furthermore, the local DBMSs may not be aware of each other, and, as a consequence, cannot coordinate their actions. Thus, traditional techniques for ensuring transaction atomicity and consistency in homogeneous distributed database systems may not be appropriate for an MDBS environment.... CiteSeerX 2009-06-22 2007-11-22 1992 text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.37.8818 ftp //ftp.cs.utexas.edu/pub/avi/UT-CS-TR-92-21.PS.Z en 10.1.1.101.8988 10.1.1.130.1772 10.1.1.38.6210 10.1.1.34.3768 10.1.1.36.1275 10.1.1.104.3430 10.1.1.112.244 10.1.1.94.9106 10.1.1.41.4043 10.1.1.49.5143 10.1.1.59.2034 10.1.1.53.875 10.1.1.137.5642 10.1.1.41.8832 10.1.1.21.1100 10.1.1.105.3626 10.1.1.44.773 10.1.1.21.2576 10.1.1.40.6484 10.1.1.144.2713 10.1.1.48.6718 10.1.1.16.6166 10.1.1.40.832 10.1.1.36.2660 10.1.1.30.3087 10.1.1.47.322 10.1.1.17.6532 10.1.1.33.2301 10.1.1.20.4306 10.1.1.47.6258 10.1.1.39.9212 10.1.1.46.4334 10.1.1.71.485 10.1.1.43.1405 10.1.1.49.1308 10.1.1.35.6530 10.1.1.42.5177 10.1.1.54.4068 10.1.1.133.3692 10.1.1.40.4220 10.1.1.48.7743 10.1.1.26.575 10.1.1.107.596 10.1.1.116.3495 10.1.1.33.2074 10.1.1.38.7229 10.1.1.59.4464 10.1.1.103.9562 10.1.1.36.5887 10.1.1.40.9658 10.1.1.53.6783 10.1.1.29.5010 10.1.1.107.876 10.1.1.46.2273 10.1.1.46.3657 10.1.1.49.5281 10.1.1.50.4114 10.1.1.63.3234 10.1.1.79.9607 10.1.1.83.4819 10.1.1.83.4980 10.1.1.84.8136 10.1.1.90.953 10.1.1.90.9785 10.1.1.92.2397 10.1.1.93.8911 10.1.1.94.3702 10.1.1.97.672 10.1.1.98.4604 10.1.1.117.6190 10.1.1.118.4814 10.1.1.130.880 10.1.1.137.1167 10.1.1.51.5111 10.1.1.45.2774 10.1.1.45.9165 10.1.1.40.4684 10.1.1.35.5866 10.1.1.38.3606 10.1.1.29.9166 10.1.1.31.3667 10.1.1.21.7181 10.1.1.33.2343 10.1.1.23.3117 10.1.1.24.7879 10.1.1.18.8936 10.1.1.19.3770 10.1.1.19.5246 10.1.1.12.3293 10.1.1.2.2325 10.1.1.60.116 10.1.1.140.5244 10.1.1.143.3448 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.54.6302:Overview of Multidatabase Transaction Management:Yuri Breitbart Hector Garcia-molina Avi Silberschatz:2009-04-12 A multidatabase system (MDBS) is a facility that allows users access to data located in multiple autonomous database management systems (DBMSs). In such a system, global transactions are executed under the control of the MDBS. Independently, local transactions are executed under the control of the local DBMSs. Each local DBMS integrated by the MDBS may employ a different transaction management scheme. In addition, each local DBMS has complete control over all transactions (global and local) executing at its site, including the ability to abort at any point any of the transactions executing at its site. Typically, no design or internal DBMS structure changes are allowed in order to accommodate the MDBS. Furthermore, the local DBMSs may not be aware of each other, and, as a consequence, cannot coordinate their actions. Thus, traditional techniques for ensuring transaction atomicity and consistency in homogeneous distributed database systems may not be appropriate for an MDBS environment.... CiteSeerX 2009-04-12 2007-11-22 1992 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.6302 http //www-db.stanford.edu/pub/papers/multidatabase.ps en 10.1.1.101.8988 10.1.1.130.1772 10.1.1.38.6210 10.1.1.34.3768 10.1.1.36.1275 10.1.1.104.3430 10.1.1.112.244 10.1.1.94.9106 10.1.1.41.4043 10.1.1.49.5143 10.1.1.59.2034 10.1.1.53.875 10.1.1.137.5642 10.1.1.41.8832 10.1.1.21.1100 10.1.1.105.3626 10.1.1.44.773 10.1.1.21.2576 10.1.1.40.6484 10.1.1.144.2713 10.1.1.48.6718 10.1.1.16.6166 10.1.1.40.832 10.1.1.36.2660 10.1.1.30.3087 10.1.1.47.322 10.1.1.17.6532 10.1.1.33.2301 10.1.1.20.4306 10.1.1.47.6258 10.1.1.39.9212 10.1.1.46.4334 10.1.1.71.485 10.1.1.43.1405 10.1.1.49.1308 10.1.1.35.6530 10.1.1.42.5177 10.1.1.54.4068 10.1.1.133.3692 10.1.1.40.4220 10.1.1.48.7743 10.1.1.26.575 10.1.1.107.596 10.1.1.116.3495 10.1.1.33.2074 10.1.1.38.7229 10.1.1.59.4464 10.1.1.103.9562 10.1.1.36.5887 10.1.1.40.9658 10.1.1.53.6783 10.1.1.29.5010 10.1.1.107.876 10.1.1.46.2273 10.1.1.46.3657 10.1.1.49.5281 10.1.1.50.4114 10.1.1.63.3234 10.1.1.79.9607 10.1.1.83.4819 10.1.1.83.4980 10.1.1.84.8136 10.1.1.90.953 10.1.1.90.9785 10.1.1.92.2397 10.1.1.93.8911 10.1.1.94.3702 10.1.1.97.672 10.1.1.98.4604 10.1.1.117.6190 10.1.1.118.4814 10.1.1.130.880 10.1.1.137.1167 10.1.1.51.5111 10.1.1.45.2774 10.1.1.45.9165 10.1.1.40.4684 10.1.1.35.5866 10.1.1.38.3606 10.1.1.29.9166 10.1.1.31.3667 10.1.1.21.7181 10.1.1.33.2343 10.1.1.23.3117 10.1.1.24.7879 10.1.1.18.8936 10.1.1.19.3770 10.1.1.19.5246 10.1.1.12.3293 10.1.1.2.2325 10.1.1.60.116 10.1.1.140.5244 10.1.1.143.3448 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.59.5918:Coupling OWL with MPEG-7 and TV-Anytime for Domain-specific Multimedia Information Integration and Retrieval:Chrisa Tsinaraki Panagiotis Polydoros Nektarios Moumoutzis Stavros Christodoulakis:2009-04-19 The success of the Web is due to a large extent in the development of standards that allow interoperability in open environments. Future work in the field will have greater impact if it is based and built on existing standards. Well-accepted international standards for multimedia content descriptions are MPEG-7 and TV-Anytime. However, these standards do not propose a concrete methodology and language for the integration of domainspecific knowledge for the multimedia content. Moreover, domain-specific knowledge for a specific domain related to the content of a video may be described in a well-accepted ontology description language such as OWL, which is independent of MPEG-7 and TV-Anytime. CiteSeerX 2009-04-19 2008-02-05 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.59.5918 http //www.riao.org/Proceedings-2004/papers/1330.pdf en 10.1.1.103.1819 10.1.1.104.7893 10.1.1.106.3954 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.43.3839:Specification and Execution of Transactional Workflows:Marek Rusinkiewicz Amit Sheth:2009-04-13 The basic transaction model has evolved over time to incorporate more complex transaction structures and to selectively modify the atomicity and isolation properties. In this chapter we discuss the application of transaction concepts to activities that involve coordinated execution of multiple tasks (possibly of different types) over different processing entities. Such applications are referred to as transactional workflows. In this chapter we discuss the specification of such workflows and the issues involved in their execution. 1 What is a Workflow? Workflows are activities involving the coordinated execution of multiple tasks performed by different processing entities. A task defines some work to be done and can be specified in a number of ways, including a textual description in a file or an email, a form, a message, or a computer program. A processing entity that performs the tasks may be a person or a software system (e.g., a mailer, an application program, a database mana... CiteSeerX ACM Press 2009-04-13 2007-11-22 1995 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.43.3839 http //lsdis.cs.uga.edu/lib/././download/RS93.ps en 10.1.1.17.1323 10.1.1.59.5051 10.1.1.38.6210 10.1.1.68.7445 10.1.1.109.5175 10.1.1.17.7962 10.1.1.44.7778 10.1.1.112.244 10.1.1.13.7602 10.1.1.102.7874 10.1.1.41.4043 10.1.1.49.5143 10.1.1.41.7252 10.1.1.17.3225 10.1.1.54.7761 10.1.1.55.5255 10.1.1.108.958 10.1.1.35.7733 10.1.1.52.3682 10.1.1.36.1618 10.1.1.45.6317 10.1.1.43.3180 10.1.1.35.8718 10.1.1.44.6365 10.1.1.51.2883 10.1.1.50.9206 10.1.1.6.9085 10.1.1.30.1707 10.1.1.80.6634 10.1.1.49.355 10.1.1.127.3550 10.1.1.35.3562 10.1.1.137.8832 10.1.1.49.4085 10.1.1.41.5506 10.1.1.40.4657 10.1.1.43.2369 10.1.1.40.832 10.1.1.74.5411 10.1.1.90.4428 10.1.1.110.6967 10.1.1.27.2122 10.1.1.15.5605 10.1.1.54.727 10.1.1.49.7512 10.1.1.45.8796 10.1.1.50.5984 10.1.1.53.137 10.1.1.30.3262 10.1.1.28.1680 10.1.1.21.7110 10.1.1.29.3148 10.1.1.57.687 10.1.1.59.5924 10.1.1.46.2812 10.1.1.51.5552 10.1.1.17.7375 10.1.1.40.1598 10.1.1.52.9787 10.1.1.1.3496 10.1.1.50.6791 10.1.1.55.3358 10.1.1.137.7582 10.1.1.118.4127 10.1.1.49.3580 10.1.1.35.5825 10.1.1.46.9382 10.1.1.31.7411 10.1.1.48.5504 10.1.1.55.5163 10.1.1.18.1603 10.1.1.52.8129 10.1.1.1.9723 10.1.1.21.9113 10.1.1.49.7644 10.1.1.52.6646 10.1.1.75.3106 10.1.1.80.2072 10.1.1.55.8770 10.1.1.54.8188 10.1.1.101.7919 10.1.1.104.8176 10.1.1.24.5741 10.1.1.29.4667 10.1.1.4.1055 10.1.1.48.9175 10.1.1.56.792 10.1.1.65.3172 10.1.1.66.5947 10.1.1.73.8532 10.1.1.83.8299 10.1.1.86.8521 10.1.1.87.2402 10.1.1.87.4648 10.1.1.90.5638 10.1.1.91.1709 10.1.1.94.4248 10.1.1.114.511 10.1.1.119.5037 10.1.1.124.7957 10.1.1.49.215 10.1.1.53.7777 10.1.1.53.9711 10.1.1.45.9409 10.1.1.40.8789 10.1.1.43.4845 10.1.1.34.8273 10.1.1.35.4783 10.1.1.28.3176 10.1.1.16.8151 10.1.1.8.9117 10.1.1.58.3449 10.1.1.142.7041 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.33.8596:Dynamic Query Optimization and Query Processing in Multidatabase Systems 1.:Henryk Josinski:2009-04-15 Introduction The multidatabase system (MDBS) approach, as a solution for integrated access to information distributed among diverse data sources, has gained a lot of attention in recent years. The multidatabase system is a database system which integrates pre--existing databases allowing the users to access simultaneously database systems (DBMSs) formulating a global query based on a global schema. The component DBMSs are assumed to be heterogeneous and autonomous. Heterogeneity refers to different user interfaces, data models, query languages, and query optimization strategies [5]. Local autonomy means that each DBMS retains complete control over local data and processing. As result of this, its cost model may not be available to the global query optimizer. When a global query is submitted, it is decomposed into two types of queries [1] -- subqueries, operating on sharable data items from local databases, -- assembling queries, consisting of, CiteSeerX 2009-04-15 2007-11-22 2000 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.33.8596 http //www.edbt2000.uni-konstanz.de/phd-workshop/papers/Josinski.pdf en 10.1.1.27.4704 10.1.1.51.8352 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.55.2467:Using Reasoning of Description Logics for Query Processing in Multidatabase Systems:Alfredo Goñi Jesús Bermúdez José M. Blanco Arantza Illarramendi:2009-04-12 . Nowadays, the interest to work simultaneously with data stored in several databases is growing. Multidatabase Systems (MDBS) have been proposed as a solution to work with different pre-existing autonomous databases. Federated Database Systems (FDBS) are a special type of MDBS where an integrated schema is provided. This integrated schema is the result of an integration process among the schemata of the pre-existing autonomous databases. In our case we have built a FDBS that integrates several heterogeneous relational databases by using a particular type of Knowledge Representation system based on Description Logics (DL system) . The integrated schema is represented as a terminology formed by a set of classes and attributes. Although there has been a lot of research about the problems of translation and integration of schemata to obtain integrated ones, the problem of query processing against these integrated schemata has not been treated so much. In this paper we present an overview ... CiteSeerX 2009-04-12 2007-11-22 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.55.2467 http //sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-4/goni.ps en 10.1.1.32.6380 10.1.1.51.9343 10.1.1.47.9649 10.1.1.126.7225 10.1.1.7.9769 10.1.1.65.4869 10.1.1.32.7239 10.1.1.16.6464 10.1.1.50.3099 10.1.1.1.7655 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.55.482:A Shared View of Sharing The Treaty of Orlando:Lynn Andrea Stein Henry Lieberman David Ungar:2009-04-12 Introduction For the past few years, researchers have been debating the relative merits of object-oriented languages with classes and inheritance as opposed to those with prototypes and delegation. It has become clear that the object-oriented programming language design space is not a dichotomy. Instead, we have identified two fundamental mechanisms---templates and empathy---and several different independent degrees of freedom for each. Templates create new objects in their own image, providing guarantees about the similarity of group members. Empathy allows an object to act as if it were some other object, thus providing sharing of state and behavior. The Smalltalk-80 TM language, 1 Actors, Lieberman's Delegation system, Self, and Hybrid each take differing stands on the forms of templates 1 Smalltalk-80 TM is a trademark of Par CiteSeerX ACM Press 2009-04-12 2007-11-22 1989 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.55.482 http //lcs.www.media.mit.edu/people/lieber/Lieberary/OOP/Treaty/Treaty.ps en 10.1.1.26.9545 10.1.1.118.6579 10.1.1.48.69 10.1.1.57.5195 10.1.1.9.570 10.1.1.47.511 10.1.1.127.5320 10.1.1.100.4334 10.1.1.5.3348 10.1.1.39.3374 10.1.1.56.4713 10.1.1.61.2065 10.1.1.27.3015 10.1.1.1.5960 10.1.1.67.5433 10.1.1.31.8109 10.1.1.68.4062 10.1.1.49.3986 10.1.1.122.9331 10.1.1.46.8283 10.1.1.54.5230 10.1.1.16.2055 10.1.1.137.5180 10.1.1.43.5722 10.1.1.68.2105 10.1.1.35.1247 10.1.1.30.1415 10.1.1.7.5014 10.1.1.102.3946 10.1.1.105.6469 10.1.1.26.223 10.1.1.26.8645 10.1.1.35.4104 10.1.1.39.6986 10.1.1.41.7822 10.1.1.42.9056 10.1.1.53.9325 10.1.1.71.1802 10.1.1.76.6993 10.1.1.89.9613 10.1.1.121.5599 10.1.1.122.3737 10.1.1.127.1894 10.1.1.55.5674 10.1.1.37.8260 10.1.1.2.2077 10.1.1.24.5782 10.1.1.19.780 10.1.1.2.4148 10.1.1.2.4173 10.1.1.131.902 10.1.1.30.2927 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.13.2374:Integrated Office Systems:O. M. Nierstrasz D. C. Tsichritzis:2009-04-17 Introduction New techniques are sorely needed to aid in the development and maintenance of large application systems. The problem with traditional approaches to software engineering is well in evidence in the field of o#ce information systems it is costly and di#cult to extend existing applications, and to get unrelated applications to "talk" to each other. The objectoriented approach is already being tentatively applied in the modeling of "o#ce objects" and in the presentation of these entities to users as such in "desktop" interfaces to o#ce software. In order to fully exploit the approach to achieve integrated o#ce systems, we need to use object-oriented programming languages, object-oriented run-time support, and object-oriented software engineering environments. We can view the fundamental idea behind the object-oriented approach as that of encapsulation object-oriented languages and systems exploit encapsulation in various ways in an attempt to enhance productivity through, f CiteSeerX 2009-04-17 2007-11-21 1988 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.13.2374 http //www.iam.unibe.ch/~scg/Archive/OSG/Nier89bIntegOfficeSystems.pdf en 10.1.1.26.9545 10.1.1.65.5865 10.1.1.34.624 10.1.1.12.8544 10.1.1.144.6983 10.1.1.26.6746 10.1.1.49.3064 10.1.1.30.4607 10.1.1.38.4894 10.1.1.20.8197 10.1.1.26.4381 10.1.1.29.1890 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.42.9253:Integrated Office Systems:O. M. Nierstrasz D. C. Tsichritzis:2009-04-11 Introduction New techniques are sorely needed to aid in the development and maintenance of large application systems. The problem with traditional approaches to software engineering is well in evidence in the field of office information systems it is costly and difficult to extend existing applications, and to get unrelated applications to "talk" to each other. The objectoriented approach is already being tentatively applied in the modeling of "office objects" and in the presentation of these entities to users as such in "desktop" interfaces to office software. In order to fully exploit the approach to achieve integrated office systems, we need to use object-oriented programming languages, object-oriented run-time support, and object-oriented software engineering environments. We can view the fundamental idea behind the object-oriented approach as that of encapsulation object-oriented languages and systems exploit encapsulation in various ways in an attempt t CiteSeerX ACM Press and Addison-Wesley 2009-04-11 2007-11-22 1988 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.42.9253 ftp //ftp.iam.unibe.ch/pub/scg/Papers/integratedOfficeSystems.ps.gz en 10.1.1.26.9545 10.1.1.65.5865 10.1.1.34.624 10.1.1.12.8544 10.1.1.144.6983 10.1.1.26.6746 10.1.1.49.3064 10.1.1.30.4607 10.1.1.38.4894 10.1.1.20.8197 10.1.1.26.4381 10.1.1.29.1890 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.86.3568:Topics Selected Reference References The following books are useful references.:Alfred V. Aho John E. Hopcroft:2008-07-01 but it lacks topics in network flows and linear programming, as well as more recent algorithms. It is amazing that after more than twenty years it remains an extremely valuable book. 2. Alfred V. Aho, John E. Hopcroft, and Jeffrey D. Ullman. Data Structures and Algorithms. Addison-Wesley, 1983. Revised and more elementary version of the first six chapters of The Design and Analysis of Computer Algorithms. CiteSeerX 2008-07-01 2008-04-01 2000 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.86.3568 http //graphics.stanford.edu/courses/cs161-00-winter/handouts/references.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.100.9631:[3] S. Abiteboul, R. Hull, and V. Vianu. Foundations of Databases. Addison-:S. Acharya P. B. Gibbons V. Poosala S. Ramaswamy The Aqua:2008-07-01 [4] S. Abiteboul and P. Kanellakis. Object identity as a query language primitive. CiteSeerX 2008-07-01 2008-04-02 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.100.9631 http //www.ateneonline.it/rama/6114-6_biblio.pdf en 10.1.1.17.2504 10.1.1.29.4781 10.1.1.96.1350 10.1.1.19.1226 10.1.1.120.576 10.1.1.28.4924 10.1.1.43.2125 10.1.1.40.8642 10.1.1.28.7845 10.1.1.91.7258 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.25.5524:Games and total Datalog ... Queries:Jörg Flum Max Kubierschky Bertram Ludäscher:2009-04-16 We show that the expressive power of Datalog @ programs under the well-founded semantics does not decrease when restricted to total programs thereby affirmatively answering an open question posed by Abiteboul et al. (Foundations of Databases, Addison-Wesley, Reading, MA, 1995). In particular, we show that for every such program there exists an equivalent total program whose only recursive rule is of the form win( # X ) # move( # X # Y ) @win( # Y ) where move is definable by a quantifier-free first-order formula. Also, for the non-inflationary semantics we derive a new normal form whose only recursive rule simulates a version of the game of life. CiteSeerX 2009-04-16 2007-11-21 2000 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.25.5524 http //www.sdsc.edu/~ludaesch/Paper/tcs2000.pdf en 10.1.1.53.8258 10.1.1.32.9446 10.1.1.144.1424 10.1.1.46.9441 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.62.2095:<author><last-name>Abiteboul</last-name></author> <author><last-name>Hull</last-name></author> <author><last-name>Vianu</last-name></author> <title>Foundations of Databases</title>:Peter Wood Overview Of Xml:2008-07-01 magazine → title volume issue date • event-condition-action (ECA) rules for XML • analysing ECA rules • containment and equivalence of XPath queries • conclusions and future work CiteSeerX 2008-07-01 2008-02-06 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.62.2095 http //www.dcs.bbk.ac.uk/~ptw/analysis.pdf en Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.49.2910:Active Database Systems:Umeshwar Dayal Eric N. Hanson Jennifer Widom:2009-04-12 In Won Kim editor Modern Database Systems The Object Model Integrating a production rules facility into a database system provides a uniform mechanism for a number of advanced database features including integrity constraint enforcement, derived data maintenance, triggers, alerters, protection, version control, and others. In addition, a database system with rule processing capabilities provides a useful platform for large and efficient knowledge-base and expert systems. Database systems with production rules are referred to as active database systems, and the field of active database systems has indeed been active. This chapter summarizes current work in active database systems topics covered include active database rule models and languages, rule execution semantics, and implementation issues. 1 Introduction Conventional database systems are passive they only execute queries or transactions explicitly submitted by a user or an application program. For many applications, however, it is important to monitor situations of interest, and to ... CiteSeerX ACM Press 2009-04-12 2007-11-22 1994 application/postscript text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.49.2910 http //www-db.stanford.edu/pub/papers/book-chapter.ps en 10.1.1.17.1323 10.1.1.143.7196 10.1.1.50.3821 10.1.1.51.9946 10.1.1.41.2030 10.1.1.46.2504 10.1.1.52.4421 10.1.1.38.2083 10.1.1.34.661 10.1.1.103.7630 10.1.1.100.9015 10.1.1.97.1699 10.1.1.107.4220 10.1.1.47.9217 10.1.1.133.7157 10.1.1.101.5051 10.1.1.30.9989 10.1.1.53.6941 10.1.1.50.8529 10.1.1.133.4287 10.1.1.50.7278 10.1.1.10.1688 10.1.1.19.8669 10.1.1.44.7600 10.1.1.144.376 10.1.1.44.1348 10.1.1.47.9998 10.1.1.90.4428 10.1.1.108.344 10.1.1.48.9470 10.1.1.53.5472 10.1.1.52.4872 10.1.1.144.4965 10.1.1.31.7578 10.1.1.32.6426 10.1.1.58.6335 10.1.1.85.8052 10.1.1.93.1931 10.1.1.55.4610 10.1.1.21.3821 10.1.1.26.9208 10.1.1.31.4869 10.1.1.48.1833 10.1.1.83.8628 10.1.1.87.9318 10.1.1.90.2195 10.1.1.36.5184 10.1.1.21.1704 10.1.1.53.1733 10.1.1.90.3181 10.1.1.53.6783 10.1.1.52.6151 10.1.1.104.6911 10.1.1.105.1691 10.1.1.21.1984 10.1.1.23.2775 10.1.1.62.5556 10.1.1.68.9063 10.1.1.74.4746 10.1.1.78.5097 10.1.1.84.743 10.1.1.84.904 10.1.1.87.6019 10.1.1.88.3907 10.1.1.89.9631 10.1.1.90.4147 10.1.1.92.365 10.1.1.100.2747 10.1.1.98.5083 10.1.1.98.6663 10.1.1.99.1894 10.1.1.99.8174 10.1.1.133.8073 10.1.1.52.7823 10.1.1.39.5341 10.1.1.35.3458 10.1.1.26.4620 10.1.1.18.8936 10.1.1.19.3694 10.1.1.12.631 10.1.1.48.6394 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.99.9998:intelligent” tools for workflow process redesign A research agenda:Mariska Netjes Irene V Hajo A. Reijers:2009-01-07 Abstract. Although much attention is being paid to business processes during the past decades, the design of business processes and particularly workflow processes is still more art than science. In this workshop paper, we present our view on modeling methods for workflow processes and introduce our research aiming for the development of an “intelligent” software tool for workflow process redesign. This tool uses two approaches to redesign workflows an evolutionary approach, focussing on local updates to a given process, and a revolutionary approach, starting with a clean-sheet of paper. 1 CiteSeerX Springer-Verlag 2009-01-07 2008-04-02 2006 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.99.9998 http //is.tm.tue.nl/staff/mnetjes/BPD05 NetjesVanderfeestenReijers - final2.pdf en 10.1.1.34.732 10.1.1.111.4822 10.1.1.55.5274 10.1.1.57.9662 10.1.1.39.6008 10.1.1.107.2228 10.1.1.110.5479 10.1.1.117.7409 10.1.1.118.9596 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
+oai CiteSeerXPSU 10.1.1.99.9999:Policy gradient method for team Markov games:Ville Könönen:2008-07-17 Abstract. The main aim of this paper is to extend the single-agent policy gradient method for multiagent domains where all agents share the same utility function. We formulate these team problems as Markov games endowed with the asymmetric equilibrium concept and based on this formulation, we provide a direct policy gradient learning method. In addition, we test the proposed method with a small example problem. 1 CiteSeerX 2008-07-17 2008-04-02 2004 application/pdf text http //citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.99.9999 http //lib.tkk.fi/Diss/2004/isbn9512273594/article4.pdf en 10.1.1.138.2589 10.1.1.17.5347 10.1.1.10.8073 10.1.1.98.4302 Metadata may be used without restrictions as long as the oai identifier remains attached to it.
diff --git a/asterix-fuzzyjoin/data/pub-small/raw.dblp-000/part-00000 b/asterix-fuzzyjoin/data/pub-small/raw.dblp-000/part-00000
new file mode 100644
index 0000000..be9c045
--- /dev/null
+++ b/asterix-fuzzyjoin/data/pub-small/raw.dblp-000/part-00000
@@ -0,0 +1,100 @@
+books/acm/kim95/AnnevelinkACFHK95:Object SQL - A Language for the Design and Implementation of Object Databases.:Jurgen Annevelink Rafiul Ahad Amelia Carlson Daniel H. Fishman Michael L. Heytens William Kent:2002-01-03 42-68 1995 Modern Database Systems db/books/collections/kim95.html#AnnevelinkACFHK95
+books/acm/kim95/Blakeley95:OQL[C++] Extending C++ with an Object Query Capability.:José A. Blakeley:2002-01-03 69-88 Modern Database Systems db/books/collections/kim95.html#Blakeley95 1995
+books/acm/kim95/BreitbartGS95:Transaction Management in Multidatabase Systems.:Yuri Breitbart Hector Garcia-Molina Abraham Silberschatz:2004-03-08 573-591 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#BreitbartGS95 1995
+books/acm/kim95/ChristodoulakisK95:Multimedia Information Systems Issues and Approaches.:Stavros Christodoulakis Leonidas Koveos:2002-01-03 318-337 1995 Modern Database Systems db/books/collections/kim95.html#ChristodoulakisK95
+books/acm/kim95/DayalHW95:Active Database Systems.:Umeshwar Dayal Eric N. Hanson Jennifer Widom:2002-01-03 434-456 1995 Modern Database Systems db/books/collections/kim95.html#DayalHW95
+books/acm/kim95/DittrichD95:Where Object-Oriented DBMSs Should Do Better A Critique Based on Early Experiences.:Angelika Kotz Dittrich Klaus R. Dittrich:2002-01-03 238-254 1995 Modern Database Systems db/books/collections/kim95.html#DittrichD95
+books/acm/kim95/Garcia-MolinaH95:Distributed Databases.:Hector Garcia-Molina Meichun Hsu:2002-01-03 477-493 1995 Modern Database Systems db/books/collections/kim95.html#Garcia-MolinaH95
+books/acm/kim95/Goodman95:An Object-Oriented DBMS War Story Developing a Genome Mapping Database in C++.:Nathan Goodman:2002-01-03 216-237 1995 Modern Database Systems db/books/collections/kim95.html#Goodman95
+books/acm/kim95/Kaiser95:Cooperative Transactions for Multiuser Environments.:Gail E. Kaiser:2002-01-03 409-433 1995 Modern Database Systems db/books/collections/kim95.html#Kaiser95
+books/acm/kim95/KelleyGKRG95:Schema Architecture of the UniSQL/M Multidatabase System:William Kelley Sunit K. Gala Won Kim Tom C. Reyes Bruce Graham:2004-03-08 Modern Database Systems books/acm/Kim95 621-648 1995 db/books/collections/kim95.html#KelleyGKRG95
+books/acm/kim95/KemperM95:Physical Object Management.:Alfons Kemper Guido Moerkotte:2002-01-03 175-202 1995 Modern Database Systems db/books/collections/kim95.html#KemperM95
+books/acm/kim95/Kim95:Introduction to Part 1 Next-Generation Database Technology.:Won Kim:2002-01-03 5-17 1995 Modern Database Systems db/books/collections/kim95.html#Kim95
+books/acm/kim95/Kim95a:Object-Oriented Database Systems Promises, Reality, and Future.:Won Kim:2002-01-03 255-280 1995 Modern Database Systems db/books/collections/kim95.html#Kim95a
+books/acm/kim95/Kim95b:Introduction to Part 2 Technology for Interoperating Legacy Databases.:Won Kim:2002-01-03 515-520 1995 Modern Database Systems db/books/collections/kim95.html#Kim95b
+books/acm/kim95/KimCGS95:On Resolving Schematic Heterogeneity in Multidatabase Systems.:Won Kim Injun Choi Sunit K. Gala Mark Scheevel:2002-01-03 521-550 1995 Modern Database Systems db/books/collections/kim95.html#KimCGS95
+books/acm/kim95/KimG95:Requirements for a Performance Benchmark for Object-Oriented Database Systems.:Won Kim Jorge F. Garza:2002-01-03 203-215 1995 Modern Database Systems db/books/collections/kim95.html#KimG95
+books/acm/kim95/KimK95:On View Support in Object-Oriented Databases Systems.:Won Kim William Kelley:2002-01-03 108-129 1995 Modern Database Systems db/books/collections/kim95.html#KimK95
+books/acm/kim95/Kowalski95:The POSC Solution to Managing E&P Data.:Vincent J. Kowalski:2002-01-03 281-301 1995 Modern Database Systems db/books/collections/kim95.html#Kowalski95
+books/acm/kim95/KriegerA95:C++ Bindings to an Object Database.:David Krieger Tim Andrews:2002-01-03 89-107 1995 Modern Database Systems db/books/collections/kim95.html#KriegerA95
+books/acm/kim95/Lunt95:Authorization in Object-Oriented Databases.:Teresa F. Lunt:2002-01-03 130-145 1995 Modern Database Systems db/books/collections/kim95.html#Lunt95
+books/acm/kim95/MengY95:Query Processing in Multidatabase Systems.:Weiyi Meng Clement T. Yu:2002-01-03 551-572 1995 Modern Database Systems db/books/collections/kim95.html#MengY95
+books/acm/kim95/Motro95:Management of Uncerainty in database Systems.:Amihai Motro:2002-01-03 457-476 1995 Modern Database Systems db/books/collections/kim95.html#Motro95
+books/acm/kim95/Omiecinski95:Parallel Relational Database Systems.:Edward Omiecinski:2002-01-03 494-512 1995 Modern Database Systems db/books/collections/kim95.html#Omiecinski95
+books/acm/kim95/OzsuB95:Query Processing in Object-Oriented Database Systems.:M. Tamer Özsu José A. Blakeley:2002-01-03 146-174 1995 Modern Database Systems db/books/collections/kim95.html#OzsuB95
+books/acm/kim95/RusinkiewiczS95:Specification and Execution of Transactional Workflows.:Marek Rusinkiewicz Amit P. Sheth:2004-03-08 592-620 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#RusinkiewiczS95 1995
+books/acm/kim95/Samet95:Spatial Data Structures.:Hanan Samet:2004-03-08 361-385 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#Samet95 1995
+books/acm/kim95/SametA95:Spatial Data Models and Query Processing.:Hanan Samet Walid G. Aref:2002-01-03 338-360 1995 Modern Database Systems db/books/collections/kim95.html#SametA95
+books/acm/kim95/ShanADDK95:Pegasus A Heterogeneous Information Management System.:Ming-Chien Shan Rafi Ahmed Jim Davis Weimin Du William Kent:2004-03-08 664-682 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#ShanADDK95 1995
+books/acm/kim95/Snodgrass95:Temporal Object-Oriented Databases A Critical Comparison.:Richard T. Snodgrass:2002-01-03 386-408 1995 Modern Database Systems db/books/collections/kim95.html#Snodgrass95
+books/acm/kim95/SoleyK95:The OMG Object Model.:Richard Mark Soley William Kent:2002-01-03 18-41 1995 Modern Database Systems db/books/collections/kim95.html#SoleyK95
+books/acm/kim95/Stout95:EDA/SQL.:Ralph L. Stout:2004-03-08 649-663 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#Stout95 1995
+books/acm/kim95/Thompson95:The Changing Database Standards Landscape.:Craig W. Thompson:2002-01-03 302-317 1995 Modern Database Systems db/books/collections/kim95.html#Thompson95
+books/acm/kim95/BreitbartR95:Overview of the ADDS System.:Yuri Breitbart Tom C. Reyes:2009-06-12 683-701 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#BreitbartR95 1995
+books/acm/Kim95:Modern Database Systems The Object Model, Interoperability, and Beyond.::2004-03-08 Won Kim Modern Database Systems ACM Press and Addison-Wesley 1995 0-201-59098-0 db/books/collections/kim95.html
+books/ap/MarshallO79:Inequalities Theory of Majorization and Its Application.:Albert W. Marshall Ingram Olkin:2002-01-03 Academic Press 1979 0-12-473750-1
+books/aw/kimL89/BjornerstedtH89:Version Control in an Object-Oriented Architecture.:Anders Björnerstedt Christer Hulten:2006-02-24 451-485 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#BjornerstedtH89
+books/aw/kimL89/BretlMOPSSWW89:The GemStone Data Management System.:Robert Bretl David Maier Allen Otis D. Jason Penney Bruce Schuchardt Jacob Stein E. Harold Williams Monty Williams:2002-01-03 283-308 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#BretlMOPSSWW89
+books/aw/kimL89/CareyDRS89:Storage Management in EXODUS.:Michael J. Carey David J. DeWitt Joel E. Richardson Eugene J. Shekita:2002-01-03 341-369 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#CareyDRS89
+books/aw/kimL89/Decouchant89:A Distributed Object Manager for the Smalltalk-80 System.:Dominique Decouchant:2002-01-03 487-520 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Decouchant89
+books/aw/kimL89/DiederichM89:Objects, Messages, and Rules in Database Design.:Jim Diederich Jack Milton:2002-01-03 177-197 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#DiederichM89
+books/aw/kimL89/EllisG89:Active Objects Ealities and Possibilities.:Clarence A. Ellis Simon J. Gibbs:2002-01-03 561-572 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#EllisG89
+books/aw/kimL89/FishmanABCCDHHKLLMNRSW89:Overview of the Iris DBMS.:Daniel H. Fishman Jurgen Annevelink David Beech E. C. Chow Tim Connors J. W. Davis Waqar Hasan C. G. Hoch William Kent S. Leichner Peter Lyngbæk Brom Mahbod Marie-Anne Neimat Tore Risch Ming-Chien Shan W. Kevin Wilkinson:2002-01-03 219-250 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#FishmanABCCDHHKLLMNRSW89
+books/aw/kimL89/KimBCGW89:Features of the ORION Object-Oriented Database System.:Won Kim Nat Ballou Hong-Tai Chou Jorge F. Garza Darrell Woelk:2002-01-03 251-282 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#KimBCGW89
+books/aw/kimL89/KimKD89:Indexing Techniques for Object-Oriented Databases.:Won Kim Kyung-Chang Kim Alfred G. Dale:2002-01-03 371-394 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#KimKD89
+books/aw/kimL89/King89:My Cat Is Object-Oriented.:Roger King:2002-01-03 23-30 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#King89
+books/aw/kimL89/Maier89:Making Database Systems Fast Enough for CAD Applications.:David Maier:2002-01-03 573-582 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Maier89
+books/aw/kimL89/MellenderRS89:Optimizing Smalltalk Message Performance.:Fred Mellender Steve Riegel Andrew Straw:2002-01-03 423-450 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#MellenderRS89
+books/aw/kimL89/Moon89:The Common List Object-Oriented Programming Language Standard.:David A. Moon:2002-01-03 49-78 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Moon89
+books/aw/kimL89/Moss89:Object Orientation as Catalyst for Language-Database Inegration.:J. Eliot B. Moss:2002-01-03 583-592 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Moss89
+books/aw/kimL89/Nierstrasz89:A Survey of Object-Oriented Concepts.:Oscar Nierstrasz:2002-01-03 3-21 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Nierstrasz89
+books/aw/kimL89/NierstraszT89:Integrated Office Systems.:Oscar Nierstrasz Dennis Tsichritzis:2002-01-03 199-215 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#NierstraszT89
+books/aw/kimL89/Russinoff89:Proteus A Frame-Based Nonmonotonic Inference System.:David M. Russinoff:2002-01-03 127-150 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#Russinoff89
+books/aw/kimL89/SkarraZ89:Concurrency Control and Object-Oriented Databases.:Andrea H. Skarra Stanley B. Zdonik:2002-01-03 395-421 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#SkarraZ89
+books/aw/kimL89/SteinLU89:A Shared View of Sharing The Treaty of Orlando.:Lynn Andrea Stein Henry Lieberman David Ungar:2002-01-03 31-48 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#SteinLU89
+books/aw/kimL89/TarltonT89:Pogo A Declarative Representation System for Graphics.:Mark A. Tarlton P. Nong Tarlton:2002-01-03 151-176 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#TarltonT89
+books/aw/kimL89/TomlinsonS89:Concurrent Object-Oriented Programming Languages.:Chris Tomlinson Mark Scheevel:2002-01-03 79-124 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#TomlinsonS89
+books/aw/kimL89/TsichritzisN89:Directions in Object-Oriented Research.:Dennis Tsichritzis Oscar Nierstrasz:2002-01-03 523-536 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#TsichritzisN89
+books/aw/kimL89/Wand89:A Proposal for a Formal Model of Objects.:Yair Wand:2002-01-03 537-559 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Wand89
+books/aw/kimL89/WeiserL89:OZ+ An Object-Oriented Database System.:Stephen P. Weiser Frederick H. Lochovsky:2002-01-03 309-337 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#WeiserL89
+books/aw/stonebraker86/RoweS86:The Commercial INGRES Epilogue.:Lawrence A. Rowe Michael Stonebraker:2002-01-03 63-82 1986 The INGRES Papers db/books/collections/Stonebraker86.html#RoweS86 db/books/collections/Stonebraker86/RoweS86.html ingres/P063.pdf
+books/aw/stonebraker86/Stonebraker86:Design of Relational Systems (Introduction to Section 1).:Michael Stonebraker:2002-01-03 1-3 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86 db/books/collections/Stonebraker86/Stonebraker86.html ingres/P001.pdf
+books/aw/stonebraker86/Stonebraker86a:Supporting Studies on Relational Systems (Introduction to Section 2).:Michael Stonebraker:2002-01-03 83-85 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86a db/books/collections/Stonebraker86/Stonebraker86a.html ingres/P083.pdf
+books/aw/stonebraker86/Stonebraker86b:Distributed Database Systems (Introduction to Section 3).:Michael Stonebraker:2002-01-03 183-186 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86b db/books/collections/Stonebraker86/Stonebraker86b.html ingres/P183.pdf
+books/aw/stonebraker86/Stonebraker86c:The Design and Implementation of Distributed INGRES.:Michael Stonebraker:2002-01-03 187-196 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86c db/books/collections/Stonebraker86/Stonebraker86c.html ingres/P187.pdf
+books/aw/stonebraker86/Stonebraker86d:User Interfaces for Database Systems (Introduction to Section 4).:Michael Stonebraker:2002-01-03 243-245 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86d db/books/collections/Stonebraker86/Stonebraker86d.html ingres/P243.pdf
+books/aw/stonebraker86/Stonebraker86e:Extended Semantics for the Relational Model (Introduction to Section 5).:Michael Stonebraker:2002-01-03 313-316 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86e db/books/collections/Stonebraker86/Stonebraker86e.html ingres/P313.pdf
+books/aw/stonebraker86/Stonebraker86f:Database Design (Introduction to Section 6).:Michael Stonebraker:2002-01-03 393-394 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86f db/books/collections/Stonebraker86/Stonebraker86f.html ingres/P393.pdf
+books/aw/stonebraker86/X86:Title, Preface, Contents.::2002-01-03 1986 The INGRES Papers db/books/collections/Stonebraker86.html#X86 db/books/collections/Stonebraker86/X86.html ingres/frontmatter.pdf
+books/aw/stonebraker86/X86a:References.::2002-01-03 429-444 1986 The INGRES Papers db/books/collections/Stonebraker86.html#X86a db/books/collections/Stonebraker86/X86a.html ingres/P429.pdf
+books/aw/Knuth86a:TeX The Program:Donald E. Knuth:2002-01-03 Addison-Wesley 1986 0-201-13437-3
+books/aw/AbiteboulHV95:Foundations of Databases.:Serge Abiteboul Richard Hull Victor Vianu:2002-01-03 Addison-Wesley 1995 0-201-53771-0 AHV/Toc.pdf ... ... journals/tods/AstrahanBCEGGKLMMPTWW76 books/bc/AtzeniA93 journals/tcs/AtzeniABM82 journals/jcss/AbiteboulB86 journals/csur/AtkinsonB87 conf/pods/AtzeniB87 journals/vldb/AbiteboulB95 conf/sigmod/AbiteboulB91 conf/dood/AtkinsonBDDMZ89 conf/vldb/AlbanoBGO93 ... conf/icdt/Abiteboul88 journals/ipl/Abiteboul89 conf/ds/Abrial74 journals/tods/AhoBU79 books/mk/minker88/AptBW88 conf/vldb/AroraC78 conf/stoc/AfratiC89 journals/tods/AlbanoCO85 conf/pods/AfratiCY91 conf/pods/AusielloDM85 conf/vldb/AbiteboulG85 journals/jacm/AjtaiG87 conf/focs/AjtaiG89 journals/tods/AbiteboulG91 ... ... journals/tods/AbiteboulH87 conf/sigmod/AbiteboulH88 ... conf/sigmod/AbiteboulK89 journals/tcs/AbiteboulKG91 journals/jcss/AbiteboulKRW95 conf/sigmod/AbiteboulLUW93 conf/pods/AtzeniP82 conf/pods/AfratiP87 conf/pods/AptP87 conf/wg/AndriesP91 conf/pods/AfratiPPRSU86 books/el/leeuwen90/Apt90 conf/ifip/Armstrong74 journals/siamcomp/AhoSSU81 journals/tods/AhoSU79 journals/siamcomp/AhoSU79 conf/pods/AbiteboulSV90 journals/is/AtzeniT93 conf/popl/AhoU79 conf/pods/AbiteboulV87 conf/jcdkb/AbiteboulV88 journals/jacm/AbiteboulV88 conf/pods/AbiteboulV88 journals/jacm/AbiteboulV89 journals/jcss/AbiteboulV90 journals/jcss/AbiteboulV91 conf/stoc/AbiteboulV91 journals/amai/AbiteboulV91 journals/jcss/AbiteboulV95 journals/jacm/AptE82 conf/coco/AbiteboulVV92 conf/iclp/AptB88 conf/oopsla/BobrowKKMSZ86 journals/tse/BatoryBGSTTW88 conf/mfcs/Bancilhon78 ... conf/db-workshops/Bancilhon85 books/el/leeuwen90/Barendregt90 ... journals/tods/BeeriB79 books/el/leeuwen90/BerstelB90 conf/icdt/BeneventanoB92 conf/vldb/BernsteinBC80 conf/vldb/BeeriBG78 conf/sigmod/BorgidaBMR89 journals/tods/BunemanC79 journals/jacm/BernsteinC81 conf/dbpl/BancilhonCD89 books/bc/tanselCGSS93/BaudinetCW93 conf/sigmod/BiskupDB79 journals/jacm/BeeriDFS84 books/mk/BancilhonDK92 conf/edbt/BryDM88 conf/pods/BunemanDW88 journals/jcss/BunemanDW91 journals/tods/Beeri80 journals/dke/Beeri90 ... journals/tods/Bernstein76 conf/lics/BidoitF87 journals/iandc/BidoitF91 conf/sigmod/BeeriFH77 conf/stoc/BeeriFMMUY81 journals/jacm/BeeriFMY83 journals/tods/BunemanFN82 journals/siamcomp/BernsteinG81 journals/iandc/BlassGK85 conf/ijcai/BrachmanGL85 journals/tods/BernsteinGWRR81 books/aw/BernsteinHG87 ... journals/tcs/Bidoit91 journals/tcs/Biskup80 conf/adbt/Biskup79 journals/tods/Biskup83 journals/tcs/BunemanJO91 journals/tods/BeeriK86 conf/pods/BeeriKBR87 conf/icdt/BidoitL90 journals/csur/BatiniL86 conf/sigmod/BlakeleyLT86 conf/vldb/BeeriM91 conf/sigmod/BlakeleyMG93 journals/siamcomp/BeeriMSU81 conf/pods/BancilhonMSU86 conf/pods/BeeriNRST87 journals/software/Borgida85 conf/icalp/BraP83 conf/fgcs/BalbinMR88 ... conf/pods/BeeriR87 journals/jlp/BalbinR87 conf/sigmod/BancilhonR86 books/mk/minker88/BancilhonR88 journals/jlp/BeeriR91 conf/vldb/BancilhonRS82 conf/pods/BeeriRSS92 conf/dood/Bry89 journals/tods/BancilhonS81 journals/cogsci/BrachmanS85 journals/tods/BergamaschiS92 conf/sigmod/BernsteinST75 conf/dbpl/TannenBN91 conf/icdt/TannenBW92 ... journals/jacm/BeeriV84 conf/icalp/BeeriV81 conf/adbt/BeeriV79 journals/siamcomp/BeeriV84 journals/iandc/BeeriV84 journals/jacm/BeeriV84 journals/tcs/BeeriV85 journals/ibmrd/ChamberlinAEGLMRW76 ... journals/iandc/Cardelli88 books/mk/Cattell94 conf/sigmod/CacaceCCTZ90 conf/vldb/CastilhoCF82 conf/adbt/CasanovaF82 conf/focs/CaiFI89 journals/jcss/CasanovaFP84 conf/stoc/CosmadakisGKV88 conf/dood/CorciuloGP93 books/sp/CeriGT90 conf/focs/ChandraH80 journals/jcss/ChandraH80 journals/jcss/ChandraH82 journals/jlp/ChandraH85 conf/popl/Chandra81 conf/adbt/Chang79 conf/pods/Chandra88 ... journals/tods/Chen76 conf/ride/ChenHM94 conf/icde/Chomicki92 conf/pods/Chomicki92 ... ... ... conf/stoc/CosmadakisK85 journals/acr/CosmadakisK86 ... journals/jcss/CosmadakisKS86 journals/jacm/CosmadakisKV90 ... conf/pods/CalvaneseL94 conf/adbt/Clark77 conf/stoc/ChandraLM81 conf/stoc/ChandraM77 conf/pods/ConsensM90 conf/sigmod/ConsensM93 conf/icdt/ConsensM90 journals/cacm/Codd70 conf/sigmod/Codd71a persons/Codd71a persons/Codd72 conf/ifip/Codd74 ... conf/sigmod/Codd79 journals/cacm/Codd82 ... conf/sigmod/Cohen89 journals/cacm/Cohen90 ... journals/jcss/Cook74 conf/pods/Cosmadakis83 conf/focs/Cosmadakis87 books/el/leeuwen90/Courcelle90a journals/jacm/CosmadakisP84 conf/edbt/CeriCGLLTZ88 ... conf/vldb/CeriT87 conf/vldb/CasanovaTF88 ... conf/pods/CasanovaV83 journals/siamcomp/ChandraV85 conf/pods/ChaudhuriV92 conf/pods/ChaudhuriV93 conf/pods/ChaudhuriV94 journals/csur/CardelliW85 conf/pods/ChenW89 conf/pods/CohenW89 conf/vldb/CeriW90 conf/vldb/CeriW91 conf/iclp/ChenW92 conf/vldb/CeriW93 ... conf/birthday/Dahlhaus87 conf/vldb/Date81 books/aw/Date86 ... conf/dbpl/Dayal89 journals/tods/DayalB82 journals/ibmrd/DelobelC73 conf/icde/DelcambreD89 ... journals/tods/Delobel78 journals/jacm/Demolombe92 journals/tods/DateF92 ... conf/vldb/DayalHL91 journals/jacm/Paola69a conf/caap/DahlhausM86 journals/acr/DAtriM86 journals/iandc/DahlhausM92 conf/sigmod/DerrMP93 conf/vldb/MaindrevilleS88 conf/pods/Dong92 conf/adbt/BraP82 ... conf/dbpl/DongS91 journals/iandc/DongS95 conf/dbpl/DongS93 conf/dbpl/DongS93 conf/icdt/DongT92 conf/vldb/DenninghoffV91 conf/pods/DenninghoffV93 ... ... books/acm/kim95/DayalHW95 ... conf/pods/EiterGM94 conf/pods/Escobar-MolanoHJ93 ... books/el/leeuwen90/Emerson90 books/bc/ElmasriN89 ... conf/icse/Eswaran76 conf/sigmod/EpsteinSW78 ... ... conf/vldb/Fagin77 journals/tods/Fagin77 conf/sigmod/Fagin79 journals/tods/Fagin81 journals/ipl/FaginV83 journals/jacm/Fagin82 journals/jacm/Fagin83 journals/tcs/Fagin93 books/sp/kimrb85/FurtadoC85 ... journals/jlp/Fitting85a journals/tcs/FischerJT83 journals/acr/FaginKUV86 conf/icdt/FernandezM92 journals/tods/FaginMU82 conf/vldb/FaloutsosNS91 ... journals/ai/Forgy82 ... conf/sigmod/Freytag87 ... journals/siamcomp/FischerT83 journals/siamcomp/FaginMUY83 conf/pods/FaginUV83 conf/icalp/FaginV84 ... ... ... ... conf/sigmod/GraefeD87 conf/ride/GatziuD94 conf/sigmod/GardarinM86 conf/sigmod/GyssensG88 journals/tcs/GinsburgH83a journals/jacm/GinsburgH86 ... books/bc/tanselCGSS93/Ginsburg93 books/fm/GareyJ79 journals/jacm/GrantJ82 conf/vldb/GehaniJ91 conf/vldb/GhandeharizadehHJCELLTZ93 journals/tods/GhandeharizadehHJ96 conf/vldb/GehaniJS92 ... conf/sigmod/GehaniJS92 ... conf/deductive/GuptaKM92 conf/pods/GurevichL82 conf/iclp/GelfondL88 conf/adbt/77 journals/csur/GallaireMN84 conf/pods/GrahneMR92 conf/sigmod/GuptaMS93 conf/lics/GaifmanMSV87 journals/jacm/GaifmanMSV93 journals/jacm/GrahamMV86 conf/csl/GradelO92 ... conf/pods/Gottlob87 conf/pods/GyssensPG90 conf/dood/GiannottiPSZ91 books/aw/GoldbergR83 journals/acr/GrahneR86 journals/ipl/Grant77 ... journals/iandc/Grandjean83 conf/vldb/Grahne84 ... journals/csur/Graefe93 books/sp/Greibach75 journals/tods/GoodmanS82 journals/jcss/GoodmanS84 conf/focs/GurevichS85 ... conf/pods/GrumbachS94 conf/sigmod/GangulyST90 ... journals/tcs/Gunter92 ... ... ... ... conf/pods/GrahamV84 conf/pods/GrumbachV91 conf/icde/GardarinV92 conf/sigmod/GraefeW89 ... journals/jacm/GinsburgZ82 conf/vldb/GottlobZ88 ... ... journals/sigmod/Hanson89 ... journals/cacm/Harel80 journals/tkde/HaasCLMWLLPCS90 conf/lics/Hella92 journals/iandc/Herrmann95 conf/pods/HirstH93 conf/vldb/HullJ91 conf/ewdw/HullJ90 journals/csur/HullK87 journals/tods/HudsonK89 conf/lics/HillebrandKM93 conf/nato/HillebrandKR93 conf/jcdkb/HsuLM88 journals/ipl/HoneymanLY80 journals/tods/HammerM81 conf/adbt/HenschenMN82 ... journals/jacm/HenschenN84 journals/jacm/Honeyman82 conf/sigmod/HullS89 conf/pods/HullS89 journals/acta/HullS94 journals/jcss/HullS93 conf/fodo/HullTY89 journals/jcss/Hull83 journals/jacm/Hull84 journals/tcs/Hull85 journals/siamcomp/Hull86 ... conf/vldb/Hulin89 ... journals/jacm/HullY84 conf/vldb/HullY90 conf/pods/HullY91 conf/sigmod/IoannidisK90 journals/jcss/ImielinskiL84 conf/adbt/Imielinski82 journals/jcss/Immerman82 journals/iandc/Immerman86 ... journals/siamcomp/Immerman87 conf/pods/ImielinskiN88 conf/vldb/IoannidisNSS92 conf/sigmod/ImielinskiNV91 conf/dood/ImielinskiNV91 conf/vldb/Ioannidis85 journals/jacm/Jacobs82 conf/dbpl/JacobsH91 journals/csur/JarkeK84 journals/jcss/JohnsonK84 conf/popl/JaffarL87 books/el/leeuwen90/Johnson90 journals/jacm/Joyner76 conf/pods/JaeschkeS82 ... books/mk/minker88/Kanellakis88 books/el/leeuwen90/Kanellakis90 conf/oopsla/KhoshafianC86 conf/edbt/KotzDM88 conf/jcdkb/Keller82 conf/pods/Keller85 journals/computer/Keller86 ... journals/tods/Kent79 ... journals/ngc/RohmerLK86 conf/tacs/KanellakisG94 conf/jcdkb/Kifer88 conf/pods/KanellakisKR90 conf/sigmod/KiferKS92 ... conf/icdt/KiferL86 books/aw/KimL89 ... journals/tods/Klug80 journals/jacm/Klug82 journals/jacm/Klug88 journals/jacm/KiferLW95 conf/kr/KatsunoM91 journals/ai/KatsunoM92 conf/jcdkb/KrishnamurthyN88 journals/csur/Knight89 ... journals/iandc/Kolaitis91 journals/ai/Konolige88 conf/ifip/Kowalski74 journals/jacm/Kowalski75 conf/bncod/Kowalski84 conf/vldb/KoenigP81 journals/tods/KlugP82 ... conf/pods/KolaitisP88 conf/pods/KiferRS88 conf/sigmod/KrishnamurthyRS88 books/mg/SilberschatzK91 conf/iclp/KempT88 conf/sigmod/KellerU84 conf/dood/Kuchenhoff91 ... journals/jlp/Kunen87 conf/iclp/Kunen88 conf/pods/Kuper87 conf/pods/Kuper88 conf/ppcp/Kuper93 conf/pods/KuperV84 conf/stoc/KolaitisV87 journals/tcs/KarabegV90 journals/iandc/KolaitisV90 conf/pods/KolaitisV90 journals/tods/KarabegV91 journals/iandc/KolaitisV92 journals/tcs/KuperV93 journals/tods/KuperV93 journals/tse/KellerW85 conf/pods/KiferW89 conf/jcdkb/Lang88 books/el/Leeuwen90 ... journals/jcss/Leivant89 ... journals/iandc/Leivant90 ... conf/db-workshops/Levesque82 journals/ai/Levesque84 conf/mfdbs/Libkin91 conf/er/Lien79 journals/jacm/Lien82 books/mk/minker88/Lifschitz88 ... journals/tcs/Lindell91 journals/tods/Lipski79 journals/jacm/Lipski81 journals/tcs/LeratL86 journals/cj/LeveneL90 books/sp/Lloyd87 conf/pods/LakshmananM89 conf/tlca/LeivantM93 conf/sigmod/LaverMG83 conf/pods/LiptonN90 journals/jcss/LucchesiO78 conf/sigmod/Lohman88 ... conf/ijcai/Lozinskii85 books/ph/LewisP81 ... conf/sigmod/LecluseRV88 journals/is/LipeckS87 journals/jlp/LloydST87 journals/tods/LingTK81 conf/sigmod/LyngbaekV87 conf/dood/LefebvreV89 conf/pods/LibkinW93 conf/dbpl/LibkinW93 journals/jacm/Maier80 books/cs/Maier83 ... conf/vldb/Makinouchi77 conf/icalp/Makowsky81 ... conf/icdt/Malvestuto86 conf/aaai/MacGregorB92 journals/tods/MylopoulosBW80 conf/sigmod/McCarthyD89 journals/csur/MishraE92 conf/sigmod/MumickFPR90 books/mk/Minker88 journals/jlp/Minker88 conf/vldb/MillerIR93 journals/is/MillerIR94 journals/iandc/Mitchell83 conf/pods/Mitchell83 conf/vldb/MendelzonM79 journals/tods/MaierMS79 journals/jcss/MaierMSU80 conf/pods/MendelzonMW94 journals/debu/MorrisNSUG87 journals/ai/Moore85 conf/vldb/Morgenstern83 conf/pods/Morris88 ... conf/pods/MannilaR85 ... journals/jlp/MinkerR90 books/aw/MannilaR92 journals/acr/MaierRW86 ... journals/tods/MarkowitzS92 conf/pods/Marchetti-SpaccamelaPS87 journals/jacm/MaierSY81 conf/iclp/MorrisUG86 journals/tods/MaierUV84 conf/iclp/MorrisUG86 journals/acta/MakowskyV86 books/bc/MaierW88 books/mk/minker88/ManchandraW88 conf/pods/Naughton86 conf/sigmod/NgFS91 ... conf/vldb/Nejdl87 conf/adbt/NicolasM77 conf/sigmod/Nicolas78 journals/acta/Nicolas82 conf/ds/76 conf/pods/NaqviK88 journals/tods/NegriPS91 conf/vldb/NaughtonRSU89 conf/pods/NaughtonS87 ... ... conf/vldb/Osborn79 ... journals/tods/OzsoyogluY87 conf/adbt/Paige82 ... books/cs/Papadimitriou86 ... journals/ipl/Paredaens78 ... books/sp/ParedaensBGG89 journals/ai/Andersen91 books/el/leeuwen90/Perrin90 journals/ins/Petrov89 conf/pods/ParedaensG88 conf/pods/PatnaikI94 conf/adbt/ParedaensJ79 journals/csur/PeckhamM88 ... ... conf/sigmod/ParkerP80 ... conf/iclp/Przymusinski88 conf/pods/Przymusinski89 ... conf/vldb/ParkerSV92 conf/aaai/PearlV87 journals/ai/PereiraW80a conf/pods/PapadimitriouY92 journals/tkde/QianW91 ... journals/jlp/Ramakrishnan91 conf/pods/RamakrishnanBS87 ... conf/adbt/Reiter77 journals/ai/Reiter80 conf/db-workshops/Reiter82 journals/jacm/Reiter86 journals/tods/Rissanen77 conf/mfcs/Rissanen78 conf/pods/Rissanen82 ... journals/ngc/RohmerLK86 journals/jacm/Robinson65 ... conf/pods/Ross89 ... ... conf/sigmod/RoweS79 conf/sigmod/RichardsonS91 journals/debu/RamamohanaraoSBPNTZD87 conf/vldb/RamakrishnanSS92 conf/sigmod/RamakrishnanSSS93 conf/pods/RamakrishnanSUV89 journals/jcss/RamakrishnanSUV93 journals/jlp/RamakrishnanU95 conf/sigmod/SelingerACLP79 conf/sigmod/Sagiv81 journals/tods/Sagiv83 books/mk/minker88/Sagiv88 conf/slp/Sagiv90 conf/sigmod/Sciore81 journals/jacm/Sciore82 conf/pods/Sciore83 journals/acr/Sciore86 journals/jacm/SagivDPF81 conf/pods/X89 ... journals/ai/SmithG85 books/mk/minker88/Shepherdson88 journals/tods/Shipman81 conf/pods/Shmueli87 conf/iclp/SekiI88 conf/sigmod/ShmueliI84 journals/tc/Sickel76 journals/jsc/Siekmann89 conf/sigmod/StonebrakerJGP90 conf/vldb/SimonKM92 journals/csur/ShethL90 conf/pods/SeibL91 conf/sigmod/SuLRD93 conf/adbt/SilvaM79 journals/sigmod/Snodgrass90 journals/sigmod/Soo91 conf/pods/SuciuP94 conf/sigmod/StonebrakerR86 conf/slp/SudarshanR93 conf/pods/SagivS86 journals/cacm/Stonebraker81 books/mk/Stonebraker88 journals/tkde/Stonebraker92 books/aw/Stroustrup91 journals/jacm/SadriU82 conf/vldb/Su91 conf/pods/SagivV89 journals/jacm/SagivW82 journals/tods/StonebrakerWKH76 journals/jacm/SagivY80 conf/pods/SaccaZ86 journals/tcs/SaccaZ88 ... conf/pods/SaccaZ90 ... ... books/bc/TanselCGJSS93 ... journals/acr/ThomasF86 ... ... ... ... journals/tcs/Topor87 ... books/mk/minker88/ToporS88 ... journals/siamcomp/TarjanY84 journals/csur/TeoreyYF86 journals/algorithmica/UllmanG88 conf/pods/Ullman82 books/cs/Ullman82 journals/tods/Ullman85 books/cs/Ullman88 conf/pods/Ullman89 books/cs/Ullman89 conf/sigmod/Gelder86 ... conf/pods/BusscheG92 conf/focs/BusscheGAG92 conf/pods/BusscheP91 conf/slp/Gelder86 conf/pods/Gelder89 conf/pods/GelderRS88 journals/jacm/GelderRS91 journals/tods/GelderT91 journals/ipl/Vardi81 conf/stoc/Vardi82 conf/focs/Vardi82 journals/acta/Vardi83 journals/jcss/Vardi84 conf/pods/Vardi85 conf/pods/Vardi86 journals/jcss/Vardi86 ... conf/pods/Vardi88 conf/sigmod/Vassiliou79 ... ... journals/jacm/EmdenK76 conf/nf2/SchollABBGPRV87 journals/jacm/Vianu87 journals/acta/Vianu87 conf/eds/Vieille86 conf/iclp/Vieille87 ... conf/eds/Vieille88 journals/tcs/Vieille89 ... journals/tcs/VianuV92 conf/sigmod/WidomF90 conf/icde/WangH92 conf/pos/WidjojoHW90 journals/computer/Wiederhold92 conf/pods/Wilkins86 conf/pods/Winslett88 conf/sigmod/WolfsonO90 conf/pods/Wong93 conf/sigmod/WolfsonS88 journals/ibmrd/WangW75 journals/tods/WongY76 conf/vldb/Yannakakis81 journals/csur/YuC84 ... journals/jcss/YannakakisP82 ... journals/tods/Zaniolo82 journals/jcss/Zaniolo84 ... conf/edbt/ZhouH90 journals/ibmsj/Zloof77 books/mk/ZdonikM90 db/books/dbtext/abiteboul95.html
+books/aw/Lamport86:LaTeX User's Guide & Reference Manual:Leslie Lamport:2002-01-03 Addison-Wesley 1986 0-201-15790-X
+books/aw/AhoHU74:The Design and Analysis of Computer Algorithms.:Alfred V. Aho John E. Hopcroft Jeffrey D. Ullman:2002-01-03 Addison-Wesley 1974 0-201-00029-6
+books/aw/Lamport2002:Specifying Systems, The TLA+ Language and Tools for Hardware and Software Engineers:Leslie Lamport:2005-07-28 Addison-Wesley 2002 0-3211-4306-X http //research.microsoft.com/users/lamport/tla/book.html
+books/aw/AhoHU83:Data Structures and Algorithms.:Alfred V. Aho John E. Hopcroft Jeffrey D. Ullman:2002-01-03 Addison-Wesley 1983 0-201-00023-7
+books/aw/LewisBK01:Databases and Transaction Processing An Application-Oriented Approach:Philip M. Lewis Arthur J. Bernstein Michael Kifer:2002-01-03 Addison-Wesley 2001 0-201-70872-8
+books/aw/AhoKW88:The AWK Programming Language:Alfred V. Aho Brian W. Kernighan Peter J. Weinberger:2002-01-03 Addison-Wesley 1988
+books/aw/LindholmY97:The Java Virtual Machine Specification:Tim Lindholm Frank Yellin:2002-01-28 Addison-Wesley 1997 0-201-63452-X
+books/aw/AhoSU86:Compilers Princiles, Techniques, and Tools.:Alfred V. Aho Ravi Sethi Jeffrey D. Ullman:2002-01-03 Addison-Wesley 1986 0-201-10088-6
+books/aw/Sedgewick83:Algorithms:Robert Sedgewick:2002-01-03 Addison-Wesley 1983 0-201-06672-6
+journals/siamcomp/AspnesW96:Randomized Consensus in Expected O(n log² n) Operations Per Processor.:James Aspnes Orli Waarts:2002-01-03 1024-1044 1996 25 SIAM J. Comput. 5 db/journals/siamcomp/siamcomp25.html#AspnesW96
+conf/focs/AspnesW92:Randomized Consensus in Expected O(n log ^2 n) Operations Per Processor:James Aspnes Orli Waarts:2006-04-25 137-146 conf/focs/FOCS33 1992 FOCS db/conf/focs/focs92.html#AspnesW92
+journals/siamcomp/Bloniarz83:A Shortest-Path Algorithm with Expected Time O(n² log n log* n).:Peter A. Bloniarz:2002-01-03 588-600 1983 12 SIAM J. Comput. 3 db/journals/siamcomp/siamcomp12.html#Bloniarz83
+conf/stoc/Bloniarz80:A Shortest-Path Algorithm with Expected Time O(n^2 log n log ^* n):Peter A. Bloniarz:2006-04-25 378-384 conf/stoc/STOC12 1980 STOC db/conf/stoc/stoc80.html#Bloniarz80
+journals/siamcomp/Megiddo83a:Linear-Time Algorithms for Linear Programming in R³ and Related Problems.:Nimrod Megiddo:2002-01-03 759-776 1983 12 SIAM J. Comput. 4 db/journals/siamcomp/siamcomp12.html#Megiddo83a
+conf/focs/Megiddo82:Linear-Time Algorithms for Linear Programming in R^3 and Related Problems:Nimrod Megiddo:2006-04-25 329-338 conf/focs/FOCS23 1982 FOCS db/conf/focs/focs82.html#Megiddo82
+journals/siamcomp/MoffatT87:An All Pairs Shortest Path Algorithm with Expected Time O(n² log n).:Alistair Moffat Tadao Takaoka:2002-01-03 1023-1031 1987 16 SIAM J. Comput. 6 db/journals/siamcomp/siamcomp16.html#MoffatT87
+conf/focs/MoffatT85:An All Pairs Shortest Path Algorithm with Expected Running Time O(n^2 log n):Alistair Moffat Tadao Takaoka:2006-04-25 101-105 conf/focs/FOCS26 1985 FOCS db/conf/focs/focs85.html#MoffatT85
+conf/icip/SchonfeldL98:VORTEX Video Retrieval and Tracking from Compressed Multimedia Databases.:Dan Schonfeld Dan Lelescu:2002-11-05 123-127 1998 ICIP (3) db/conf/icip/icip1998-3.html#SchonfeldL98
+conf/hicss/SchonfeldL99:VORTEX Video Retrieval and Tracking from Compressed Multimedia Databases ¾ Visual Search Engine.:Dan Schonfeld Dan Lelescu:2002-01-03 1999 HICSS http //computer.org/proceedings/hicss/0001/00013/00013006abs.htm db/conf/hicss/hicss1999-3.html#SchonfeldL99
+journals/corr/abs-0802-2861:Geometric Set Cover and Hitting Sets for Polytopes in $R^3$:Sören Laue:2008-03-03 http //arxiv.org/abs/0802.2861 2008 CoRR abs/0802.2861 db/journals/corr/corr0802.html#abs-0802-2861 informal publication
+conf/stacs/Laue08:Geometric Set Cover and Hitting Sets for Polytopes in R³.:Sören Laue:2008-03-04 2008 STACS 479-490 http //drops.dagstuhl.de/opus/volltexte/2008/1367 conf/stacs/2008 db/conf/stacs/stacs2008.html#Laue08
+journals/iandc/IbarraJCR91:Some Classes of Languages in NC¹:Oscar H. Ibarra Tao Jiang Jik H. Chang Bala Ravikumar:2006-04-25 86-106 Inf. Comput. January 1991 90 1 db/journals/iandc/iandc90.html#IbarraJCR91
+conf/awoc/IbarraJRC88:On Some Languages in NC.:Oscar H. Ibarra Tao Jiang Bala Ravikumar Jik H. Chang:2002-08-06 64-73 1988 conf/awoc/1988 AWOC db/conf/awoc/awoc88.html#IbarraJRC88
+journals/jacm/GalilHLSW87:An O(n³log n) deterministic and an O(n³) Las Vegs isomorphism test for trivalent graphs.:Zvi Galil Christoph M. Hoffmann Eugene M. Luks Claus-Peter Schnorr Andreas Weber:2003-11-20 513-531 1987 34 J. ACM 3 http //doi.acm.org/10.1145/28869.28870 db/journals/jacm/jacm34.html#GalilHLSW87
+conf/focs/GalilHLSW82:An O(n^3 log n) Deterministic and an O(n^3) Probabilistic Isomorphism Test for Trivalent Graphs:Zvi Galil Christoph M. Hoffmann Eugene M. Luks Claus-Peter Schnorr Andreas Weber:2006-04-25 118-125 conf/focs/FOCS23 1982 FOCS db/conf/focs/focs82.html#GalilHLSW82
+journals/jacm/GalilT88:An O(n²(m + n log n)log n) min-cost flow algorithm.:Zvi Galil Éva Tardos:2003-11-20 374-386 1988 35 J. ACM 2 http //doi.acm.org/10.1145/42282.214090 db/journals/jacm/jacm35.html#GalilT88
+conf/focs/GalilT86:An O(n^2 (m + n log n) log n) Min-Cost Flow Algorithm:Zvi Galil Éva Tardos:2006-04-25 1-9 conf/focs/FOCS27 1986 FOCS db/conf/focs/focs86.html#GalilT86
+series/synthesis/2009Weintraub:Jordan Canonical Form Theory and Practice:Steven H. Weintraub:2009-09-06 Jordan Canonical Form Theory and Practice http //dx.doi.org/10.2200/S00218ED1V01Y200908MAS006 http //dx.doi.org/10.2200/S00218ED1V01Y200908MAS006 2009 Synthesis Lectures on Mathematics & Statistics Morgan & Claypool Publishers
+series/synthesis/2009Brozos:The Geometry of Walker Manifolds:Miguel Brozos-Vázquez Eduardo García-Río Peter Gilkey Stana Nikcevic Rámon Vázquez-Lorenzo:2009-09-06 The Geometry of Walker Manifolds http //dx.doi.org/10.2200/S00197ED1V01Y200906MAS005 http //dx.doi.org/10.2200/S00197ED1V01Y200906MAS005 2009 Synthesis Lectures on Mathematics & Statistics Morgan & Claypool Publishers
diff --git a/asterix-fuzzyjoin/data/users-visitors-small.expected/recordpairs-000/expected.txt b/asterix-fuzzyjoin/data/users-visitors-small.expected/recordpairs-000/expected.txt
new file mode 100644
index 0000000..2d4cbb7
--- /dev/null
+++ b/asterix-fuzzyjoin/data/users-visitors-small.expected/recordpairs-000/expected.txt
@@ -0,0 +1,4 @@
+{ "uid":10: "Jodi Rotruck", "lottery_numbers":[10, 15, 20]:{ "vid":100: "Alex Ascher", "lottery_numbers":[10, 15, 30]:0.5
+{ "uid":20: "Clint Coil", "lottery_numbers":[20, 25, 30]:{ "vid":200: "Hank Friley", "lottery_numbers":[20, 25]:0.6666667
+{ "uid":30: "Marvella Loud", "lottery_numbers":[40, 41, 42]:{ "vid":300: "Luella Schweinert", "lottery_numbers":[41, 42, 43]:0.5
+{ "uid":40: "Tamie Pollara", "lottery_numbers":[45, 46, 47]:{ "vid":400: "Shanna Cuaresma", "lottery_numbers":[45, 46, 47]:1.0
diff --git a/asterix-fuzzyjoin/data/users-visitors-small/records.users-000/part-00000 b/asterix-fuzzyjoin/data/users-visitors-small/records.users-000/part-00000
new file mode 100644
index 0000000..fce60e9
--- /dev/null
+++ b/asterix-fuzzyjoin/data/users-visitors-small/records.users-000/part-00000
@@ -0,0 +1,4 @@
+{ "uid": 10, "name": "Jodi Rotruck", "lottery_numbers": [10, 15, 20] }
+{ "uid": 20, "name": "Clint Coil", "lottery_numbers": [20, 25, 30] }
+{ "uid": 30, "name": "Marvella Loud", "lottery_numbers": [40, 41, 42] }
+{ "uid": 40, "name": "Tamie Pollara", "lottery_numbers": [45, 46, 47] }
diff --git a/asterix-fuzzyjoin/data/users-visitors-small/records.users-000/part-00001 b/asterix-fuzzyjoin/data/users-visitors-small/records.users-000/part-00001
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/asterix-fuzzyjoin/data/users-visitors-small/records.users-000/part-00001
diff --git a/asterix-fuzzyjoin/data/users-visitors-small/records.visitors-000/part-00000 b/asterix-fuzzyjoin/data/users-visitors-small/records.visitors-000/part-00000
new file mode 100644
index 0000000..0fb745a
--- /dev/null
+++ b/asterix-fuzzyjoin/data/users-visitors-small/records.visitors-000/part-00000
@@ -0,0 +1,4 @@
+{ "vid": 100, "name": "Alex Ascher", "lottery_numbers": [10, 15, 30] }
+{ "vid": 200, "name": "Hank Friley", "lottery_numbers": [20, 25] }
+{ "vid": 300, "name": "Luella Schweinert", "lottery_numbers": [41, 42, 43] }
+{ "vid": 400, "name": "Shanna Cuaresma", "lottery_numbers": [45, 46, 47] }
diff --git a/asterix-fuzzyjoin/data/users-visitors-small/records.visitors-000/part-00001 b/asterix-fuzzyjoin/data/users-visitors-small/records.visitors-000/part-00001
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/asterix-fuzzyjoin/data/users-visitors-small/records.visitors-000/part-00001
diff --git a/asterix-fuzzyjoin/data/verify.sh b/asterix-fuzzyjoin/data/verify.sh
new file mode 100755
index 0000000..19ad36e
--- /dev/null
+++ b/asterix-fuzzyjoin/data/verify.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+TMP=/tmp/verify_
+
+if [ $# -lt "2" ]
+then
+ echo "Usage: `basename $0` result.txt result.T.txt [-u]"
+ exit $E_BADARGS
+fi
+
+args=( $@ )
+for i in 0 1
+do
+ f=${args[$i]}
+ ### Assume second argument is alredy sorted and with no duplicates
+ if [ "$i" -eq "0" ]
+ then
+ sort $3 $f > $TMP$i
+ else
+ rm $TMP$i 2> /dev/null
+ ln -s $PWD/$f $TMP$i
+ fi
+ if [ "$?" -ne "0" ]
+ then
+ echo Fail -- preprocessing
+ exit 1
+ fi
+
+ l[$i]=`wc --lines $TMP$i | cut --delimiter=" " --fields=1`
+ if [ "$?" -ne "0" ]
+ then
+ echo Fail -- preprocessing
+ exit 1
+ fi
+done
+
+
+### Test 1
+if [ "${l[0]}" -ne "${l[1]}" ]
+then
+ echo $1 ${l[0]}
+ echo $2 ${l[1]}
+ echo Fail -- different number of tokens
+ exit 1
+fi
+
+### Test 2
+diff --brief ${TMP}0 ${TMP}1
+if [ "$?" -ne "0" ]
+then
+ echo Fail -- differnt tokens
+ exit 1
+fi
+rm ${TMP}0 ${TMP}1
+
+echo Pass
\ No newline at end of file
diff --git a/asterix-fuzzyjoin/data/verify.stage1.sh b/asterix-fuzzyjoin/data/verify.stage1.sh
new file mode 100755
index 0000000..e648fb5
--- /dev/null
+++ b/asterix-fuzzyjoin/data/verify.stage1.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+d=tokens-000
+f=part
+
+if [ $# -ne "1" ]
+then
+ echo "Usage: `basename $0` dataset"
+ echo "Example: `basename $0` dblp.small"
+ exit $E_BADARGS
+fi
+
+./verify.sh $1/$d/$f-00000 $1.expected/$d/expected.txt
+exit $?
diff --git a/asterix-fuzzyjoin/data/verify.stage2.sh b/asterix-fuzzyjoin/data/verify.stage2.sh
new file mode 100755
index 0000000..be98a4c
--- /dev/null
+++ b/asterix-fuzzyjoin/data/verify.stage2.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+TMP=/tmp/verify_stage2_
+d=ridpairs-000
+f=part
+
+if [ $# -ne "1" ]
+then
+ echo "Usage: `basename $0` dataset"
+ echo "Example: `basename $0` dblp.small"
+ exit $E_BADARGS
+fi
+
+cat $1/$d/$f-????? > $TMP
+./verify.sh $TMP $1.expected/$d/expected.txt -u
+if [ "$?" -ne "0" ]
+then
+ exit 1
+fi
+rm $TMP
diff --git a/asterix-fuzzyjoin/data/verify.stage3.sh b/asterix-fuzzyjoin/data/verify.stage3.sh
new file mode 100755
index 0000000..dac5a93
--- /dev/null
+++ b/asterix-fuzzyjoin/data/verify.stage3.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+TMP=/tmp/verify_stage3_
+d=recordpairs-000
+f=part
+
+if [ $# -ne "1" ]
+then
+ echo "Usage: `basename $0` dataset"
+ echo "Example: `basename $0` dblp.small"
+ exit $E_BADARGS
+fi
+
+cat $1/$d/$f-????? > $TMP
+./verify.sh $TMP $1.expected/$d/expected.txt -u
+if [ "$?" -ne "0" ]
+then
+ exit 1
+fi
+rm $TMP
diff --git a/asterix-fuzzyjoin/pom.xml b/asterix-fuzzyjoin/pom.xml
new file mode 100644
index 0000000..42dd773
--- /dev/null
+++ b/asterix-fuzzyjoin/pom.xml
@@ -0,0 +1,69 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>asterix</artifactId>
+ <groupId>edu.uci.ics.asterix</groupId>
+ <version>0.8.1-SNAPSHOT</version>
+ </parent>
+ <groupId>edu.uci.ics.asterix</groupId>
+ <artifactId>asterix-fuzzyjoin</artifactId>
+ <version>0.8.1-SNAPSHOT</version>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.3.2</version>
+ <configuration>
+ <compilerArguments>
+ <encoding>utf8</encoding>
+ </compilerArguments>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.6</source>
+ <target>1.6</target>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ <phase>test-compile</phase>
+ </execution>
+ </executions>
+ <configuration>
+ <outputDirectory>${basedir}/target</outputDirectory>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <forkMode>pertest</forkMode>
+ <argLine>-enableassertions -Xmx1g</argLine>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.8.1</version>
+ <type>jar</type>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+</project>
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyFiltersJaccard.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyFiltersJaccard.java
new file mode 100644
index 0000000..75029a8
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyFiltersJaccard.java
@@ -0,0 +1,104 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin;
+
+public class FuzzyFiltersJaccard {
+
+ /**
+ * type is double because with float .8 / (1 + .8) * (8 + 10) = 8.0...01
+ */
+ protected final double simThr;
+ protected final double simThrExpr;
+
+ public FuzzyFiltersJaccard(double similarityThreshold) {
+ simThr = similarityThreshold;
+ simThrExpr = simThr / (1 + simThr);
+ }
+
+ public int getIndexPrefixLength(int length) {
+ return length - (int) Math.ceil(2 * simThrExpr * length) + 1;
+ }
+
+ public int getIntersectLowerBound(int lengthX, int lengthY) {
+ return (int) Math.ceil(simThrExpr * (lengthX + lengthY));
+ }
+
+ public long getIntersectLowerBound(long lengthX, long lengthY) {
+ return (long) Math.ceil(simThrExpr * (lengthX + lengthY));
+ }
+
+ public int getIntersectUpperBound(int noGramsCommon, int positionX, int positionY, int lengthX, int lengthY) {
+ return noGramsCommon + Math.min(lengthX - positionX - 1, lengthY - positionY - 1);
+ }
+
+ public long getIntersectUpperBound(int noGramsCommon, long positionX, long positionY, long lengthX, long lengthY) {
+ return noGramsCommon + Math.min(lengthX - positionX - 1, lengthY - positionY - 1);
+ }
+
+ public int getLengthLowerBound(int length) {
+ return (int) Math.ceil(simThr * length);
+ }
+
+ public long getLengthLowerBound(long length) {
+ return (long) Math.ceil(simThr * length);
+ }
+
+ public int getPrefixLength(int length) {
+ return length - (int) Math.ceil(simThr * length) + 1;
+ }
+
+ public long getPrefixLength(long length) {
+ return length - (long) Math.ceil(simThr * length) + 1;
+ }
+
+ public double getSimilarityThreshold() {
+ return simThr;
+ }
+
+ public boolean passLengthFilter(int lengthX, int lengthY) {
+ return getLengthLowerBound(lengthX) <= lengthY && lengthY <= 1 / simThr * lengthX;
+ }
+
+ public boolean passLengthFilter(long lengthX, long lengthY) {
+ return getLengthLowerBound(lengthX) <= lengthY && lengthY <= 1 / simThr * lengthX;
+ }
+
+ /**
+ * @param noGramsCommon
+ * number of grams in common
+ * @param positionX
+ * position of the last gram in common on X
+ * @param positionY
+ * position of the last gram in common on X
+ * @param lengthX
+ * @param lengthY
+ * @return
+ */
+ public boolean passPositionFilter(int noGramsCommon, int positionX, int positionY, int lengthX, int lengthY) {
+ return getIntersectUpperBound(noGramsCommon, positionX, positionY, lengthX, lengthY) >= getIntersectLowerBound(
+ lengthX, lengthY);
+ }
+
+ public boolean passPositionFilter(int noGramsCommon, long positionX, long positionY, long lengthX, long lengthY) {
+ return getIntersectUpperBound(noGramsCommon, positionX, positionY, lengthX, lengthY) >= getIntersectLowerBound(
+ lengthX, lengthY);
+ }
+
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyJoinAppendLength.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyJoinAppendLength.java
new file mode 100644
index 0000000..b08aa29
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyJoinAppendLength.java
@@ -0,0 +1,60 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+
+import edu.uci.ics.asterix.fuzzyjoin.tokenizer.Tokenizer;
+import edu.uci.ics.asterix.fuzzyjoin.tokenizer.TokenizerFactory;
+
+public class FuzzyJoinAppendLength {
+ public static void main(String args[]) throws IOException {
+ final String inputFileName = args[0];
+ final String outputFileName = args[1];
+
+ BufferedReader input = new BufferedReader(new FileReader(inputFileName));
+ BufferedWriter output = new BufferedWriter(new FileWriter(outputFileName));
+
+ Tokenizer tokenizer = TokenizerFactory.getTokenizer(FuzzyJoinConfig.TOKENIZER_VALUE,
+ FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
+
+ int[] dataColumns = FuzzyJoinUtil.getDataColumns("2,3");
+
+ String line;
+ HashMap<String, MutableInteger> tokenCount = new HashMap<String, MutableInteger>();
+ while ((line = input.readLine()) != null) {
+ String[] splits = line.split(FuzzyJoinConfig.RECORD_SEPARATOR_REGEX);
+ Collection<String> tokens = tokenizer.tokenize(FuzzyJoinUtil.getData(splits, dataColumns,
+ FuzzyJoinConfig.TOKEN_SEPARATOR));
+ output.write(splits[0] + FuzzyJoinConfig.RECORD_SEPARATOR + splits[1] + FuzzyJoinConfig.RECORD_SEPARATOR
+ + splits[2] + FuzzyJoinConfig.RECORD_SEPARATOR + splits[3] + FuzzyJoinConfig.RECORD_SEPARATOR
+ + tokens.size() + "\n");
+ }
+
+ input.close();
+ output.close();
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyJoinConfig.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyJoinConfig.java
new file mode 100644
index 0000000..03806a6
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyJoinConfig.java
@@ -0,0 +1,66 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin;
+
+public class FuzzyJoinConfig {
+ private static final String NAMESPACE = "fuzzyjoin";
+ //
+ // tokenizer
+ //
+ public static final String TOKENIZER_PROPERTY = NAMESPACE + ".tokenizer";
+ public static final String TOKENIZER_VALUE = "Word";
+ //
+ // similarity
+ //
+ public static final String SIMILARITY_NAME_PROPERTY = NAMESPACE + ".similarity.name";
+ public static final String SIMILARITY_NAME_VALUE = "Jaccard";
+ public static final String SIMILARITY_THRESHOLD_PROPERTY = NAMESPACE + ".similarity.threshold";
+ public static final float SIMILARITY_THRESHOLD_VALUE = .8f;
+ //
+ // record
+ //
+ public static final String RECORD_DATA_PROPERTY = NAMESPACE + ".record.data";
+ public static final String RECORD_DATA_VALUE = "1";
+ public static final String RECORD_DATA1_PROPERTY = NAMESPACE + ".record.data1";
+ public static final String RECORD_DATA1_VALUE = "1";
+ //
+ // data
+ //
+ public static final String DATA_TOKENS_PROPERTY = NAMESPACE + ".data.tokens";
+ //
+ // const
+ //
+ public static final String RECORD_DATA_VALUE_SEPARATOR_REGEX = ",";
+ public static final char WORD_SEPARATOR = '_';
+ public static final String WORD_SEPARATOR_REGEX = "_";
+ public static final char TOKEN_SEPARATOR = '_';
+ public static final String TOKEN_SEPARATOR_REGEX = "_";
+ public static final int RECORD_KEY = 0;
+ //
+ // separators
+ //
+ public static final char TOKEN_RANK_SEPARATOR = '_';
+ public static final char RECORD_SEPARATOR = ':';
+ public static final String RECORD_SEPARATOR_REGEX = ":";
+ public static final char RECORD_EXTRA_SEPARATOR = ';';
+ public static final String RECORD_EXTRA_SEPARATOR_REGEX = ";";
+ public static final char RIDPAIRS_SEPARATOR = ' ';
+ public static final String RIDPAIRS_SEPARATOR_REGEX = " ";
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyJoinContext.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyJoinContext.java
new file mode 100644
index 0000000..738bdc8
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyJoinContext.java
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin;
+
+import java.util.ArrayList;
+
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityFiltersJaccard;
+
+public class FuzzyJoinContext {
+ public final float similarityThreshold;
+ public final SimilarityFiltersJaccard similarityFilters;
+ public final ArrayList<int[]> records;
+ public final ArrayList<ResultSelfJoin> results;
+
+ public FuzzyJoinContext(float similarityThreshold) {
+ this.similarityThreshold = similarityThreshold;
+ similarityFilters = new SimilarityFiltersJaccard(similarityThreshold);
+ records = new ArrayList<int[]>();
+ results = new ArrayList<ResultSelfJoin>();
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyJoinMemory.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyJoinMemory.java
new file mode 100644
index 0000000..ec2a411
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyJoinMemory.java
@@ -0,0 +1,317 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin;
+
+import java.io.BufferedInputStream;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import edu.uci.ics.asterix.fuzzyjoin.invertedlist.InvertedListLengthList;
+import edu.uci.ics.asterix.fuzzyjoin.invertedlist.InvertedListsLengthList;
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityFiltersJaccard;
+
+public class FuzzyJoinMemory {
+ public static void main(String[] args) {
+ if (args.length < 2) {
+ System.err.println("Usage: <threshold> <file> [no runs, e.g., 1] [warm-up factor, e.g., 1]");
+ System.exit(2);
+ }
+
+ float similarityThreshold = Float.valueOf(args[0]);
+ String fileName = args[1];
+
+ int noRuns = 1, warmUpFactor = 1;
+ if (args.length > 2) {
+ noRuns = Integer.valueOf(args[2]);
+ if (args.length > 3) {
+ warmUpFactor = Integer.valueOf(args[3]);
+ }
+ }
+
+ System.err.println("Document: " + fileName);
+ System.err.println("... LOADING DATASET ...");
+
+ ArrayList<int[]> records = new ArrayList<int[]>();
+ ArrayList<Integer> rids = new ArrayList<Integer>();
+
+ FuzzyJoinMemory fj = new FuzzyJoinMemory(similarityThreshold);
+
+ FuzzyJoinMemory.readRecords(fileName, records, rids);
+
+ System.err.println("Algorithm: ppjoin");
+ System.err.println("Threshold: Jaccard " + similarityThreshold);
+
+ List<ResultSelfJoin> results = fj.runs(records, noRuns, warmUpFactor);
+
+ for (ResultSelfJoin result : results) {
+ System.out.format("%d %d %.3f", rids.get(result.indexX), rids.get(result.indexY), result.similarity);
+ System.out.println();
+ // System.out.format("(" + result.indexX + "," + result.indexY +
+ // ")");
+ // System.out.println();
+ // System.out.format("(" + rids.get(result.indexX) + ","
+ // + rids.get(result.indexY) + ")\t" + result.similarity);
+ // System.out.println();
+ }
+ }
+
+ public static void readRecords(String fileName, List<int[]> records, List<Integer> rids) {
+ LittleEndianIntInputStream in;
+ try {
+ in = new LittleEndianIntInputStream(new BufferedInputStream(new FileInputStream(fileName)));
+ } catch (FileNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+
+ while (true) {
+ int rid = 0;
+ try {
+ rid = in.readInt();
+ } catch (IOException e) {
+ // FILE_EXPECTED reach of EOF
+ break;
+ }
+
+ rids.add(rid);
+ int[] record;
+
+ try {
+ int size = in.readInt();
+ record = new int[size];
+ for (int j = 0; j < size; j++) {
+ int token = in.readInt();
+ record[j] = token;
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
+ records.add(record);
+ }
+ }
+
+ private final InvertedListsLengthList invertedLists;
+ private final SimilarityFiltersJaccard similarityFilters;
+
+ private final ArrayList<int[]> records;
+
+ public FuzzyJoinMemory(float similarityThreshold) {
+ invertedLists = new InvertedListsLengthList();
+ similarityFilters = new SimilarityFiltersJaccard(similarityThreshold);
+ records = new ArrayList<int[]>();
+ }
+
+ public void add(final int[] tokens) {
+ final int index = records.size();
+ final int length = tokens.length;
+ final int indexPrefixLength = similarityFilters.getPrefixLength(length);
+
+ for (int indexToken = 0; indexToken < indexPrefixLength; indexToken++) {
+ invertedLists.index(tokens[indexToken], new int[] { index, indexToken, length });
+ }
+ records.add(tokens);
+ }
+
+ public ArrayList<ResultJoin> join(final int[] tokens, final int length) {
+ final int prefixLength = similarityFilters.getPrefixLength(length);
+ final int lengthLowerBound = similarityFilters.getLengthLowerBound(length);
+ //
+ // self join
+ //
+ final HashMap<Integer, Integer> counts = new HashMap<Integer, Integer>();
+ for (int indexToken = 0; indexToken < Math.min(prefixLength, tokens.length); indexToken++) {
+ final int token = tokens[indexToken];
+ //
+ // probe index
+ //
+ InvertedListLengthList invertedList = invertedLists.get(token);
+ if (invertedList != null) {
+ // length filter
+ invertedList.setMinLength(lengthLowerBound);
+ for (int[] element : invertedList) {
+ final int indexProbe = element[0];
+ final int indexTokenProbe = element[1];
+ final int lengthProbe = element[2];
+ Integer count = counts.get(indexProbe);
+ if (count == null) {
+ count = 0;
+ }
+
+ if (count != -1) {
+ count++;
+ // position filter
+ if (!similarityFilters.passPositionFilter(count, indexToken, length, indexTokenProbe,
+ lengthProbe)) {
+ count = -1;
+ }
+ // suffix filter
+ if (count == 1
+ && !similarityFilters.passSuffixFilter(tokens, indexToken, records.get(indexProbe),
+ indexTokenProbe)) {
+ count = -1;
+ }
+ counts.put(indexProbe, count);
+ }
+ }
+ }
+ }
+ //
+ // verify candidates
+ //
+ ArrayList<ResultJoin> results = new ArrayList<ResultJoin>();
+ for (Map.Entry<Integer, Integer> cand : counts.entrySet()) {
+ int count = cand.getValue();
+ int indexProbe = cand.getKey();
+ if (count > 0) {
+ int tokensProbe[] = records.get(indexProbe);
+ float similarity = similarityFilters.passSimilarityFilter(tokens, prefixLength, tokensProbe,
+ similarityFilters.getPrefixLength(tokensProbe.length), count);
+ if (similarity > 0) {
+ results.add(new ResultJoin(indexProbe, similarity));
+ }
+ }
+ }
+ return results;
+ }
+
+ public void prune(int length) {
+ final int lengthLowerBound = similarityFilters.getLengthLowerBound(length + 1);
+ invertedLists.prune(lengthLowerBound);
+ }
+
+ public List<ResultSelfJoin> runs(Collection<int[]> records, int noRuns, int warmupFactor) {
+ if (records.size() < 2) {
+ return new ArrayList<ResultSelfJoin>();
+ }
+
+ int noRunsTotal = noRuns * warmupFactor;
+ float runtime = 0, runtimeAverage = 0;
+ ArrayList<ResultSelfJoin> results = new ArrayList<ResultSelfJoin>();
+
+ System.err.println("# Records: " + records.size());
+ System.err.print("=== BEGIN JOIN (TIMER STARTED) === ");
+ for (int i = 1; i <= noRunsTotal; i++) {
+ System.err.print(".");
+ System.err.flush();
+
+ results.clear();
+ Runtime.getRuntime().gc();
+
+ Date startTime = new Date();
+ for (int[] record : records) {
+ results.addAll(selfJoinAndAddRecord(record));
+ }
+ Date endTime = new Date();
+ runtime = (endTime.getTime() - startTime.getTime()) / (float) 1000.0;
+
+ if (i >= noRunsTotal - noRuns) {
+ runtimeAverage += runtime;
+ }
+ }
+ System.err.println();
+ System.err.println("# Results: " + results.size());
+ System.err.println("=== END JOIN (TIMER STOPPED) ===");
+ System.err.println("Total Running Time: " + runtimeAverage / noRuns + " (" + runtime + ")");
+ System.err.println();
+ return results;
+ }
+
+ public ArrayList<ResultSelfJoin> selfJoinAndAddRecord(final int[] tokens) {
+ final int index = records.size();
+ final int length = tokens.length;
+ final int prefixLength = similarityFilters.getPrefixLength(length);
+ final int indexPrefixLength = similarityFilters.getIndexPrefixLength(length);
+ final int lengthLowerBound = similarityFilters.getLengthLowerBound(length);
+ //
+ // self join
+ //
+ final HashMap<Integer, Integer> counts = new HashMap<Integer, Integer>();
+ for (int indexToken = 0; indexToken < prefixLength; indexToken++) {
+ final int token = tokens[indexToken];
+ //
+ // probe index
+ //
+ InvertedListLengthList invertedList = invertedLists.get(token);
+ if (invertedList != null) {
+ // length filter
+ invertedList.setMinLength(lengthLowerBound);
+ for (int[] element : invertedList) {
+ final int indexProbe = element[0];
+ final int indexTokenProbe = element[1];
+ final int lengthProbe = element[2];
+ Integer count = counts.get(indexProbe);
+ if (count == null) {
+ count = 0;
+ }
+
+ if (count != -1) {
+ count++;
+ // position filter
+ if (!similarityFilters.passPositionFilter(count, indexToken, length, indexTokenProbe,
+ lengthProbe)) {
+ count = -1;
+ }
+ // suffix filter
+ if (count == 1
+ && !similarityFilters.passSuffixFilter(tokens, indexToken, records.get(indexProbe),
+ indexTokenProbe)) {
+ count = -1;
+ }
+ counts.put(indexProbe, count);
+ }
+ }
+ }
+ //
+ // add to index
+ //
+ if (indexToken < indexPrefixLength) {
+ invertedLists.index(token, new int[] { index, indexToken, length });
+ }
+ }
+ //
+ // add record
+ //
+ records.add(tokens);
+ //
+ // verify candidates
+ //
+ ArrayList<ResultSelfJoin> results = new ArrayList<ResultSelfJoin>();
+ for (Map.Entry<Integer, Integer> cand : counts.entrySet()) {
+ int count = cand.getValue();
+ int indexProbe = cand.getKey();
+ if (count > 0) {
+ int tokensProbe[] = records.get(indexProbe);
+ float similarity = similarityFilters.passSimilarityFilter(tokens, prefixLength, tokensProbe,
+ similarityFilters.getIndexPrefixLength(tokensProbe.length), count);
+ if (similarity > 0) {
+ results.add(new ResultSelfJoin(index, indexProbe, similarity));
+ }
+ }
+ }
+ return results;
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyJoinTokenize.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyJoinTokenize.java
new file mode 100644
index 0000000..aa8927d
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyJoinTokenize.java
@@ -0,0 +1,135 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin;
+
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import edu.uci.ics.asterix.fuzzyjoin.tokenizer.Tokenizer;
+import edu.uci.ics.asterix.fuzzyjoin.tokenizer.TokenizerFactory;
+import edu.uci.ics.asterix.fuzzyjoin.tokenorder.TokenLoad;
+import edu.uci.ics.asterix.fuzzyjoin.tokenorder.TokenRank;
+import edu.uci.ics.asterix.fuzzyjoin.tokenorder.TokenRankFrequency;
+
+public class FuzzyJoinTokenize {
+ public static class TokenCount implements Comparable {
+ public String token;
+ public MutableInteger count;
+
+ public TokenCount(String token, MutableInteger count) {
+ this.token = token;
+ this.count = count;
+ }
+
+ @Override
+ public int compareTo(Object o) {
+ TokenCount tc = (TokenCount) o;
+ return count.compareTo(tc.count);
+ }
+
+ public String getToken() {
+ return token;
+ }
+
+ @Override
+ public String toString() {
+ return token + " " + count;
+ }
+ }
+
+ public static void main(String args[]) throws IOException {
+ final String inputFileName = args[0];
+ final String tokensFileName = args[1];
+ final String tokenizedFileName = args[2];
+
+ BufferedReader input = new BufferedReader(new FileReader(inputFileName));
+
+ Tokenizer tokenizer = TokenizerFactory.getTokenizer(FuzzyJoinConfig.TOKENIZER_VALUE,
+ FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
+
+ int[] dataColumns = FuzzyJoinUtil.getDataColumns("2,3");
+
+ String line;
+ HashMap<String, MutableInteger> tokenCount = new HashMap<String, MutableInteger>();
+ while ((line = input.readLine()) != null) {
+ Collection<String> tokens = tokenizer.tokenize(FuzzyJoinUtil.getData(
+ line.split(FuzzyJoinConfig.RECORD_SEPARATOR_REGEX), dataColumns, FuzzyJoinConfig.TOKEN_SEPARATOR));
+
+ for (String token : tokens) {
+ MutableInteger count = tokenCount.get(token);
+ if (count == null) {
+ tokenCount.put(token, new MutableInteger(1));
+ } else {
+ count.inc();
+ }
+ }
+ }
+
+ input.close();
+
+ ArrayList<TokenCount> tokenCounts = new ArrayList<TokenCount>();
+ for (Map.Entry<String, MutableInteger> entry : tokenCount.entrySet()) {
+ tokenCounts.add(new TokenCount(entry.getKey(), entry.getValue()));
+ }
+ Collections.sort(tokenCounts);
+
+ BufferedWriter outputTokens = new BufferedWriter(new FileWriter(tokensFileName));
+ for (TokenCount tc : tokenCounts) {
+ outputTokens.write(tc.getToken() + "\n");
+ }
+ outputTokens.close();
+
+ TokenRank tokenRank = new TokenRankFrequency();
+ TokenLoad tokenLoad = new TokenLoad(tokensFileName, tokenRank);
+ tokenLoad.loadTokenRank();
+
+ input = new BufferedReader(new FileReader(inputFileName));
+ LittleEndianIntOutputStream outputTokenized = new LittleEndianIntOutputStream(new BufferedOutputStream(
+ new FileOutputStream(tokenizedFileName)));
+ while ((line = input.readLine()) != null) {
+ String splits[] = line.split(FuzzyJoinConfig.RECORD_SEPARATOR_REGEX);
+ int rid = Integer.parseInt(splits[FuzzyJoinConfig.RECORD_KEY]);
+ outputTokenized.writeInt(rid);
+ Collection<String> tokens = tokenizer.tokenize(FuzzyJoinUtil.getData(splits, dataColumns,
+ FuzzyJoinConfig.TOKEN_SEPARATOR));
+ Collection<Integer> tokensRanked = tokenRank.getTokenRanks(tokens);
+ outputTokenized.writeInt(tokensRanked.size());
+ for (Integer token : tokensRanked) {
+ outputTokenized.writeInt(token);
+ }
+ // for (int i = 0; i < tokens.size() - tokensRanked.size(); i++) {
+ // outputTokenized.writeInt(Integer.MAX_VALUE);
+ // }
+ }
+
+ input.close();
+ outputTokenized.close();
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyJoinUtil.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyJoinUtil.java
new file mode 100644
index 0000000..d947e24
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/FuzzyJoinUtil.java
@@ -0,0 +1,87 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin;
+
+import java.util.regex.Pattern;
+
+public class FuzzyJoinUtil {
+ private static final Pattern rePunctuation = Pattern.compile("[^\\p{L}\\p{N}]"); // L:Letter, N:Number
+ private static final Pattern reSpaceOrAnderscore = Pattern.compile("(_|\\s)+");
+
+ public static String clean(String in) {
+ /*
+ * - remove punctuation
+ *
+ * - normalize case
+ *
+ * - remove extra spaces
+ *
+ * - repalce space with FuzzyJoinDriver.TOKEN_SEPARATOR
+ */
+
+ in = rePunctuation.matcher(in).replaceAll(" ");
+ in = reSpaceOrAnderscore.matcher(in).replaceAll(" ");
+ in = in.trim();
+ in = in.replace(' ', '_');
+ in = in.toLowerCase();
+ return in;
+ }
+
+ /**
+ * @param splits
+ * splitted record
+ * @param dataColumns
+ * column index of data columns
+ * @param tokenSeparator
+ * TODO
+ * @return concatenation of data column values
+ */
+ public static String getData(Object[] splits, int[] dataColumns, char tokenSeparator) {
+ String data = null;
+ for (int dataColumn : dataColumns) {
+ if (data != null) {
+ data += tokenSeparator;
+ }
+ if (splits.length > dataColumn) {
+ if (data == null) {
+ data = "";
+ }
+ // data += splits[dataColumns[i]];
+ data += clean(splits[dataColumn].toString());
+ }
+ }
+ return data;
+ }
+
+ /**
+ * @param columnsString
+ * string containing the indexes of the columns containing data
+ * @return array of data columns indexes
+ */
+ public static int[] getDataColumns(String columnsString) {
+ String[] splits = columnsString.split(FuzzyJoinConfig.RECORD_DATA_VALUE_SEPARATOR_REGEX);
+ int[] columns = new int[splits.length];
+ for (int i = 0; i < splits.length; i++) {
+ columns[i] = Integer.parseInt(splits[i]);
+ }
+ return columns;
+ }
+
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/IntArray.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/IntArray.java
new file mode 100644
index 0000000..7170e87
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/IntArray.java
@@ -0,0 +1,80 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin;
+
+import java.util.Arrays;
+
+public class IntArray {
+ private static final int SIZE = 128;
+
+ private int[] data;
+ private int length;
+
+ public IntArray() {
+ data = new int[SIZE];
+ length = 0;
+ }
+
+ public void add(int d) {
+ if (length == data.length) {
+ data = Arrays.copyOf(data, data.length << 1);
+ }
+ data[length++] = d;
+ }
+
+ public int[] get() {
+ return data;
+ }
+
+ public int get(int i) {
+ return data[i];
+ }
+
+ public int length() {
+ return length;
+ }
+
+ public void reset() {
+ length = 0;
+ }
+
+ public void sort() {
+ sort(0, length);
+ }
+
+ public void sort(int start, int end) {
+ Arrays.sort(data, start, end);
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder out = new StringBuilder();
+ out.append('[');
+ for (int i = 0; i < length; ++i) {
+ out.append(data[i]);
+ if (i < length - 1) {
+ out.append(',');
+ out.append(' ');
+ }
+ }
+ out.append(']');
+ return out.toString();
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/IntPair.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/IntPair.java
new file mode 100644
index 0000000..1d54e82
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/IntPair.java
@@ -0,0 +1,96 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin;
+
+public class IntPair {
+
+ public static int[] PRIME = new int[] { 17, 31 }; // used for hashCode
+ // computations
+
+ protected int first;
+
+ protected int second;
+
+ public IntPair() {
+ }
+
+ public IntPair(int first, int second) {
+ this.first = first;
+ this.second = second;
+ }
+
+ public int compareTo(Object o) {
+ if (this == o) {
+ return 0;
+ }
+ IntPair p = (IntPair) o;
+ if (first != p.first) {
+ return first < p.first ? -1 : 1;
+ }
+ return second < p.second ? -1 : second > p.second ? 1 : 0;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == null) {
+ return false;
+ }
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof IntPair)) {
+ return false;
+ }
+ IntPair p = (IntPair) o;
+ return first == p.first && second == p.second;
+ }
+
+ public int getFirst() {
+ return first;
+ }
+
+ public int getSecond() {
+ return second;
+ }
+
+ @Override
+ public int hashCode() {
+ return first * PRIME[0] + second;
+ }
+
+ public void set(int first, int second) {
+ this.first = first;
+ this.second = second;
+ }
+
+ public void setFirst(int first) {
+ this.first = first;
+ }
+
+ public void setSecond(int second) {
+ this.second = second;
+ }
+
+ @Override
+ public String toString() {
+ return "(" + first + "," + second + ")";
+ }
+
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/LittleEndianIntInputStream.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/LittleEndianIntInputStream.java
new file mode 100644
index 0000000..e17b694
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/LittleEndianIntInputStream.java
@@ -0,0 +1,52 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin;
+
+import java.io.EOFException;
+import java.io.FilterInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+public class LittleEndianIntInputStream extends FilterInputStream {
+
+ public LittleEndianIntInputStream(InputStream in) {
+ super(in);
+ }
+
+ public int readInt() throws IOException {
+ int a = read();
+ if (a == -1) {
+ throw new EOFException();
+ }
+ int b = read();
+ if (b == -1) {
+ throw new EOFException();
+ }
+ int c = read();
+ if (c == -1) {
+ throw new EOFException();
+ }
+ int d = read();
+ if (d == -1) {
+ throw new EOFException();
+ }
+ return (a | (b << 8) | (c << 16) | (d << 24)); // little endian
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/LittleEndianIntOutputStream.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/LittleEndianIntOutputStream.java
new file mode 100644
index 0000000..8b45338
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/LittleEndianIntOutputStream.java
@@ -0,0 +1,37 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin;
+
+import java.io.FilterOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+public class LittleEndianIntOutputStream extends FilterOutputStream {
+ public LittleEndianIntOutputStream(OutputStream in) {
+ super(in);
+ }
+
+ public void writeInt(int v) throws IOException {
+ write((byte) (0xff & v));
+ write((byte) (0xff & (v >> 8)));
+ write((byte) (0xff & (v >> 16)));
+ write((byte) (0xff & (v >> 24)));
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/MutableInteger.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/MutableInteger.java
new file mode 100644
index 0000000..8dd8748
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/MutableInteger.java
@@ -0,0 +1,70 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin;
+
+public class MutableInteger implements Comparable {
+ private int v;
+
+ public MutableInteger(int v) {
+ this.v = v;
+ }
+
+ @Override
+ public int compareTo(Object o) {
+ MutableInteger m = (MutableInteger) o;
+ return v - m.v;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == null) {
+ return false;
+ }
+ if (o == this) {
+ return true;
+ }
+ if (!(o instanceof MutableInteger)) {
+ return false;
+ }
+ MutableInteger m = (MutableInteger) o;
+ if (m.v == v) {
+ return true;
+ }
+ return false;
+ }
+
+ public int get() {
+ return v;
+ }
+
+ public void inc() {
+ v += 1;
+ }
+
+ public void set(int v) {
+ this.v = v;
+ }
+
+ @Override
+ public String toString() {
+ return "" + v;
+ }
+
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/RIDPairSimilarity.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/RIDPairSimilarity.java
new file mode 100644
index 0000000..64a3ea1
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/RIDPairSimilarity.java
@@ -0,0 +1,57 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin;
+
+public class RIDPairSimilarity {
+ public int rid1, rid2;
+ public float similarity;
+
+ public RIDPairSimilarity() {
+ }
+
+ public RIDPairSimilarity(int rid1, int rid2, float similarity) {
+ set(rid1, rid2, similarity);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ RIDPairSimilarity r = (RIDPairSimilarity) o;
+ return rid1 == r.rid1 && rid2 == r.rid2;
+ }
+
+ @Override
+ public int hashCode() {
+ return rid1 * rid2 * (rid1 - rid2);
+ }
+
+ public void set(int rid1, int rid2, float similarity) {
+ this.rid1 = rid1;
+ this.rid2 = rid2;
+ this.similarity = similarity;
+ }
+
+ @Override
+ public String toString() {
+ return "{(" + rid1 + ", " + rid2 + "), " + similarity + "}";
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/ResultJoin.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/ResultJoin.java
new file mode 100644
index 0000000..f4b6c34
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/ResultJoin.java
@@ -0,0 +1,30 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin;
+
+public class ResultJoin {
+ public int index;
+ public float similarity;
+
+ public ResultJoin(int index, float similarity) {
+ this.index = index;
+ this.similarity = similarity;
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/ResultSelfJoin.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/ResultSelfJoin.java
new file mode 100644
index 0000000..9e2658c
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/ResultSelfJoin.java
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin;
+
+public class ResultSelfJoin {
+ public int indexX, indexY;
+ public float similarity;
+
+ public ResultSelfJoin(int indexX, int indexY, float similarity) {
+ this.indexX = indexX;
+ this.indexY = indexY;
+ this.similarity = similarity;
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedList.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedList.java
new file mode 100644
index 0000000..fe0553c
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedList.java
@@ -0,0 +1,27 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.invertedlist;
+
+
+public interface InvertedList extends Iterable<int[]> {
+ public void add(int[] element);
+
+ public void setMinLength(int length);
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedListLengthFixed.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedListLengthFixed.java
new file mode 100644
index 0000000..ed8db19
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedListLengthFixed.java
@@ -0,0 +1,78 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.invertedlist;
+
+import java.util.Iterator;
+
+public class InvertedListLengthFixed implements InvertedList {
+ public class ListIterator implements Iterator<int[]> {
+
+ int ix;
+
+ public ListIterator(int ix) {
+ this.ix = ix;
+ }
+
+ public boolean hasNext() {
+ return ix < sz;
+ }
+
+ public int[] next() {
+ return list[ix++];
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ private final int[][] list;
+
+ private int sz, ix;
+
+ public InvertedListLengthFixed(int size) {
+ list = new int[size * 3][3];
+ sz = 0;
+ ix = 0;
+ }
+
+ public void add(int[] element) {
+ list[sz++] = element;
+ }
+
+ public int getIndex() {
+ return ix;
+ }
+
+ public int getSize() {
+ return sz;
+ }
+
+ public Iterator<int[]> iterator() {
+ // return Arrays.asList(list).iterator();
+ return new ListIterator(ix);
+ }
+
+ public void setMinLength(int minLength) {
+ while (ix < sz && list[ix][2] < minLength) {
+ ix++;
+ }
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedListLengthList.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedListLengthList.java
new file mode 100644
index 0000000..6322040
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedListLengthList.java
@@ -0,0 +1,107 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.invertedlist;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.LinkedList;
+
+public class InvertedListLengthList implements InvertedList {
+ /**
+ * @author rares
+ * assumes that ListLength(s) are not empty inside the
+ * InvertedListLength
+ */
+ private class ListIterator implements Iterator<int[]> {
+
+ private final Iterator<ListLength> iteratorLength;
+ private Iterator<int[]> iteratorList;
+
+ public ListIterator() {
+ iteratorLength = list.iterator();
+ iteratorList = null;
+ }
+
+ public boolean hasNext() {
+ return (iteratorList != null && iteratorList.hasNext()) || iteratorLength.hasNext();
+ }
+
+ public int[] next() {
+ if (iteratorList != null && iteratorList.hasNext()) {
+ return iteratorList.next();
+ }
+ iteratorList = iteratorLength.next().list.iterator();
+ return iteratorList.next();
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ private class ListLength {
+ public int length;
+ public ArrayList<int[]> list = new ArrayList<int[]>();
+
+ @Override
+ public String toString() {
+ StringBuffer l = new StringBuffer("(");
+ for (int[] i : list) {
+ l.append(Arrays.toString(i));
+ l.append(",");
+ }
+ l.append(")");
+ return "(length:" + length + "," + l + ")";
+ }
+ }
+
+ private LinkedList<ListLength> list;
+
+ public InvertedListLengthList() {
+ list = new LinkedList<ListLength>();
+ }
+
+ public void add(int[] element) {
+ if (!list.isEmpty() && list.getLast().length == element[2]) {
+ list.getLast().list.add(element);
+ } else {
+ ListLength listLength = new ListLength();
+ listLength.length = element[2];
+ listLength.list.add(element);
+ list.add(listLength);
+ }
+ }
+
+ public Iterator<int[]> iterator() {
+ return new ListIterator();
+ }
+
+ public void setMinLength(int minLength) {
+ while (!list.isEmpty() && list.getFirst().length < minLength) {
+ list.removeFirst();
+ }
+ }
+
+ @Override
+ public String toString() {
+ return list.toString();
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedListPlain.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedListPlain.java
new file mode 100644
index 0000000..b8c37c5
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedListPlain.java
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.invertedlist;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+public class InvertedListPlain {
+ private List<int[]> list;
+
+ public InvertedListPlain() {
+ list = new ArrayList<int[]>();
+ }
+
+ public boolean add(int[] element) {
+ list.add(element);
+ return true;
+ }
+
+ public Iterator<int[]> iterator() {
+ return list.iterator();
+ }
+
+ public void setMinLength(int minLength) {
+ }
+
+ @Override
+ public String toString() {
+ return list.toString();
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedLists.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedLists.java
new file mode 100644
index 0000000..7696a38
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedLists.java
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.invertedlist;
+
+public interface InvertedLists {
+ public InvertedList get(int token);
+
+ public void index(int token, int[] element);
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedListsLengthFixed.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedListsLengthFixed.java
new file mode 100644
index 0000000..db21980
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedListsLengthFixed.java
@@ -0,0 +1,44 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.invertedlist;
+
+import java.util.Map;
+
+public class InvertedListsLengthFixed implements InvertedLists {
+ public final InvertedListLengthFixed[] invertedLists;
+ public final Map<Integer, Integer> invertedListsSize;
+
+ public InvertedListsLengthFixed(int noTokens, Map<Integer, Integer> invertedListsSize) {
+ invertedLists = new InvertedListLengthFixed[noTokens];
+ this.invertedListsSize = invertedListsSize;
+ }
+
+ public InvertedList get(int token) {
+ return invertedLists[token];
+ }
+
+ public void index(int token, int[] element) {
+ if (invertedLists[token] == null) {
+ // invertedLists[token] = new InvertedListLengthList();
+ invertedLists[token] = new InvertedListLengthFixed(invertedListsSize.get(token));
+ }
+ invertedLists[token].add(element);
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedListsLengthList.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedListsLengthList.java
new file mode 100644
index 0000000..21f24b7
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/invertedlist/InvertedListsLengthList.java
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.invertedlist;
+
+import java.util.HashMap;
+
+public class InvertedListsLengthList implements InvertedLists {
+ public final HashMap<Integer, InvertedListLengthList> invertedLists;
+
+ public InvertedListsLengthList() {
+ invertedLists = new HashMap<Integer, InvertedListLengthList>();
+ }
+
+ public InvertedListLengthList get(int token) {
+ return invertedLists.get(token);
+ }
+
+ public void index(int token, int[] element) {
+ InvertedListLengthList list = invertedLists.get(token);
+ if (list == null) {
+ list = new InvertedListLengthList();
+ invertedLists.put(token, list);
+ }
+ list.add(element);
+ }
+
+ public void prune(int minLength) {
+ for (InvertedListLengthList l : invertedLists.values()) {
+ l.setMinLength(minLength);
+ }
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroup.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroup.java
new file mode 100644
index 0000000..9341a60
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroup.java
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.recordgroup;
+
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityFilters;
+
+public abstract class RecordGroup {
+ protected final int noGroups;
+ protected final SimilarityFilters fuzzyFilters;
+
+ public RecordGroup(int noGroups, SimilarityFilters fuzzyFilters) {
+ this.noGroups = noGroups;
+ this.fuzzyFilters = fuzzyFilters;
+ }
+
+ public abstract Iterable<Integer> getGroups(Integer token, Integer length);
+
+ public abstract boolean isLengthOnly();
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupFactory.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupFactory.java
new file mode 100644
index 0000000..9b60d4e
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupFactory.java
@@ -0,0 +1,44 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.recordgroup;
+
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityFilters;
+
+public class RecordGroupFactory {
+ public static RecordGroup getRecordGroup(String recordGroup, int noGroups, SimilarityFilters fuzzyFilters,
+ String lengthstatsPath) {
+ if (recordGroup.equals("LengthCount")) {
+ return new RecordGroupLengthCount(noGroups, fuzzyFilters, lengthstatsPath);
+ } else if (recordGroup.equals("LengthIdentity")) {
+ return new RecordGroupLengthIdentity(noGroups, fuzzyFilters);
+ } else if (recordGroup.equals("LengthRange")) {
+ return new RecordGroupLengthRange(noGroups, fuzzyFilters, lengthstatsPath);
+ } else if (recordGroup.equals("Single")) {
+ return new RecordGroupSingle(noGroups, fuzzyFilters);
+ } else if (recordGroup.equals("TokenIdentity")) {
+ return new RecordGroupTokenIdentity(noGroups, fuzzyFilters);
+ } else if (recordGroup.equals("TokenFrequency")) {
+ return new RecordGroupTokenFrequency(noGroups, fuzzyFilters);
+ } else if (recordGroup.equals("TokenFrequencyMirror")) {
+ return new RecordGroupTokenFrequencyMirror(noGroups, fuzzyFilters);
+ }
+ throw new RuntimeException("Unknown record group \"" + recordGroup + "\".");
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupLengthCount.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupLengthCount.java
new file mode 100644
index 0000000..785346a
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupLengthCount.java
@@ -0,0 +1,111 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.recordgroup;
+
+import java.io.DataInputStream;
+import java.io.EOFException;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityFilters;
+
+public class RecordGroupLengthCount extends RecordGroup {
+ private final int min;
+ private final int max;
+ private final int[] lengthGroups;
+
+ public RecordGroupLengthCount(int noGroups, SimilarityFilters fuzzyFilters, String lengthstatsPath) {
+ super(noGroups, fuzzyFilters);
+
+ int sum = 0;
+ int range = 0;
+
+ try {
+ DataInputStream in = new DataInputStream(new FileInputStream(lengthstatsPath.toString()));
+ min = in.readInt();
+ max = in.readInt();
+ range = max - min + 1;
+ lengthGroups = new int[range]; // stores freqencies initally
+ try {
+ while (true) {
+ int length = in.readInt();
+ int freq = in.readInt();
+
+ int lowAbs = fuzzyFilters.getLengthLowerBound(length);
+ int uppAbs = fuzzyFilters.getLengthUpperBound(length);
+
+ int low = Math.max(lowAbs - min, 0);
+ int upp = Math.min(uppAbs - min, max - min);
+
+ for (int l = low; l <= upp; ++l) {
+ lengthGroups[l] += freq;
+ sum += freq;
+ }
+ }
+ } catch (EOFException e) {
+ }
+ } catch (IOException ioe) {
+ throw new RuntimeException(ioe);
+ }
+
+ int countGroup = sum / noGroups;
+ int count = 0;
+ int group = 0;
+ for (int i = 0; i < range; ++i) {
+ count += lengthGroups[i];
+ lengthGroups[i] = group;
+ if (count >= countGroup && group < noGroups - 1) {
+ count = 0;
+ group++;
+ }
+ }
+ }
+
+ @Override
+ public Iterable<Integer> getGroups(Integer token, Integer length) {
+ int lowAbs = fuzzyFilters.getLengthLowerBound(length);
+ int uppAbs = fuzzyFilters.getLengthUpperBound(length);
+
+ int low = Math.max(lowAbs - min, 0);
+ int upp = Math.min(uppAbs - min, max - min);
+
+ ArrayList<Integer> groups = new ArrayList<Integer>(upp - low + 1);
+ int prevGroup = -1;
+ for (int l = low; l <= upp; ++l) {
+ int group = lengthGroups[l];
+ if (group != prevGroup) {
+ groups.add(group);
+ prevGroup = group;
+ }
+ }
+
+ // System.out.println(noGroups + ":" + length + " [" + low + "," + upp
+ // + "] " + groups);
+
+ return groups;
+ }
+
+ @Override
+ public boolean isLengthOnly() {
+ return true;
+ }
+
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupLengthIdentity.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupLengthIdentity.java
new file mode 100644
index 0000000..558eada
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupLengthIdentity.java
@@ -0,0 +1,47 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.recordgroup;
+
+import java.util.Collection;
+import java.util.LinkedList;
+
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityFilters;
+
+public class RecordGroupLengthIdentity extends RecordGroup {
+
+ private final Collection<Integer> groups;
+
+ public RecordGroupLengthIdentity(int noGroups, SimilarityFilters fuzzyFilters) {
+ super(noGroups, fuzzyFilters);
+ groups = new LinkedList<Integer>();
+ }
+
+ @Override
+ public Iterable<Integer> getGroups(Integer token, Integer length) {
+ groups.clear();
+ groups.add(length);
+ return groups;
+ }
+
+ @Override
+ public boolean isLengthOnly() {
+ return true;
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupLengthRange.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupLengthRange.java
new file mode 100644
index 0000000..2bdd02d
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupLengthRange.java
@@ -0,0 +1,75 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.recordgroup;
+
+import java.io.DataInputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityFilters;
+
+public class RecordGroupLengthRange extends RecordGroup {
+ private final int min;
+ private final int max;
+ private final int groupSize;
+
+ public RecordGroupLengthRange(int noGroups, SimilarityFilters fuzzyFilters, String lengthstatsPath) {
+ super(noGroups, fuzzyFilters);
+ try {
+ DataInputStream in = new DataInputStream(new FileInputStream(lengthstatsPath.toString()));
+ min = in.readInt();
+ max = in.readInt();
+ groupSize = (int) Math.ceil((max - min + 1f) / noGroups);
+ } catch (IOException ioe) {
+ throw new RuntimeException(ioe);
+ }
+ }
+
+ @Override
+ public Iterable<Integer> getGroups(Integer token, Integer length) {
+ int lowAbs = fuzzyFilters.getLengthLowerBound(length);
+ int uppAbs = fuzzyFilters.getLengthUpperBound(length);
+
+ int low = Math.max(lowAbs - min, 0);
+ int upp = Math.min(uppAbs - min, max - min);
+
+ ArrayList<Integer> groups = new ArrayList<Integer>(upp - low + 1);
+ int prevGroup = -1;
+ for (int l = low; l <= upp; ++l) {
+ int group = l / groupSize;
+ if (group != prevGroup) {
+ groups.add(group);
+ prevGroup = group;
+ }
+ }
+
+ // System.out.println(length + " [" + lowAbs + "," + uppAbs + "] "
+ // + groups);
+
+ return groups;
+ }
+
+ @Override
+ public boolean isLengthOnly() {
+ return true;
+ }
+
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupSingle.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupSingle.java
new file mode 100644
index 0000000..0f9480f
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupSingle.java
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.recordgroup;
+
+import java.util.Collection;
+import java.util.LinkedList;
+
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityFilters;
+
+public class RecordGroupSingle extends RecordGroup {
+
+ private final Collection<Integer> groups = new LinkedList<Integer>();
+
+ public RecordGroupSingle(int noGroups, SimilarityFilters fuzzyFilters) {
+ super(noGroups, fuzzyFilters);
+ groups.add(0);
+ }
+
+ @Override
+ public Iterable<Integer> getGroups(Integer token, Integer length) {
+ return groups;
+ }
+
+ @Override
+ public boolean isLengthOnly() {
+ return true;
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupTokenFrequency.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupTokenFrequency.java
new file mode 100644
index 0000000..f0cd032
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupTokenFrequency.java
@@ -0,0 +1,47 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.recordgroup;
+
+import java.util.Collection;
+import java.util.LinkedList;
+
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityFilters;
+
+public class RecordGroupTokenFrequency extends RecordGroup {
+
+ private final Collection<Integer> groups = new LinkedList<Integer>();
+
+ public RecordGroupTokenFrequency(int noGroups, SimilarityFilters fuzzyFilters) {
+ super(noGroups, fuzzyFilters);
+ }
+
+ @Override
+ public Iterable<Integer> getGroups(Integer token, Integer length) {
+ groups.clear();
+ groups.add(token % noGroups);
+ return groups;
+ }
+
+ @Override
+ public boolean isLengthOnly() {
+ return false;
+ }
+
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupTokenFrequencyMirror.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupTokenFrequencyMirror.java
new file mode 100644
index 0000000..36085a2
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupTokenFrequencyMirror.java
@@ -0,0 +1,53 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.recordgroup;
+
+import java.util.Collection;
+import java.util.LinkedList;
+
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityFilters;
+
+public class RecordGroupTokenFrequencyMirror extends RecordGroup {
+
+ private final Collection<Integer> groups = new LinkedList<Integer>();
+
+ public RecordGroupTokenFrequencyMirror(int noGroups, SimilarityFilters fuzzyFilters) {
+ super(noGroups, fuzzyFilters);
+ }
+
+ @Override
+ public Iterable<Integer> getGroups(Integer token, Integer length) {
+ int noGroupsDouble = noGroups << 1;
+ int group = token % noGroupsDouble;
+ if (group >= noGroups) {
+ // mirror
+ group = -(group - noGroupsDouble) - 1;
+ }
+ groups.clear();
+ groups.add(group);
+ return groups;
+ }
+
+ @Override
+ public boolean isLengthOnly() {
+ return false;
+ }
+
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupTokenIdentity.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupTokenIdentity.java
new file mode 100644
index 0000000..691fb10
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/recordgroup/RecordGroupTokenIdentity.java
@@ -0,0 +1,47 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.recordgroup;
+
+import java.util.Collection;
+import java.util.LinkedList;
+
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityFilters;
+
+public class RecordGroupTokenIdentity extends RecordGroup {
+
+ private final Collection<Integer> groups;
+
+ public RecordGroupTokenIdentity(int noGroups, SimilarityFilters fuzzyFilters) {
+ super(noGroups, fuzzyFilters);
+ groups = new LinkedList<Integer>();
+ }
+
+ @Override
+ public Iterable<Integer> getGroups(Integer token, Integer length) {
+ groups.clear();
+ groups.add(token);
+ return groups;
+ }
+
+ @Override
+ public boolean isLengthOnly() {
+ return false;
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IGenericSimilarityMetric.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IGenericSimilarityMetric.java
new file mode 100644
index 0000000..85d1785
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IGenericSimilarityMetric.java
@@ -0,0 +1,29 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.similarity;
+
+public interface IGenericSimilarityMetric {
+ // returns similarity
+ public float getSimilarity(IListIterator firstList, IListIterator secondList);
+
+ // returns -1 if does not satisfy threshold
+ // else returns similarity
+ public float getSimilarity(IListIterator firstList, IListIterator secondList, float simThresh);
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IListIterator.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IListIterator.java
new file mode 100644
index 0000000..647c35f
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IListIterator.java
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.similarity;
+
+public interface IListIterator {
+ public int compare(IListIterator cmpIter);
+
+ public byte[] getData();
+
+ public int getPos();
+
+ public boolean hasNext();
+
+ public void next();
+
+ public void reset();
+
+ public int size();
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/PartialIntersect.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/PartialIntersect.java
new file mode 100644
index 0000000..15c339e
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/PartialIntersect.java
@@ -0,0 +1,42 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.similarity;
+
+public class PartialIntersect {
+ public int intersectSize;
+ public int posXStart;
+ public int posXStop;
+ public int posYStart;
+ public int posYStop;
+
+ private boolean startSet = false;
+
+ public boolean isSet() {
+ return startSet;
+ }
+
+ public void reset() {
+ startSet = false;
+ }
+
+ public void set() {
+ startSet = true;
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityFilters.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityFilters.java
new file mode 100644
index 0000000..48b22ca
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityFilters.java
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.similarity;
+
+import java.io.Serializable;
+
+public interface SimilarityFilters extends Serializable {
+ public int getLengthLowerBound(int length);
+
+ public int getLengthUpperBound(int length);
+
+ public int getPrefixLength(int length);
+
+ public boolean passLengthFilter(int lengthX, int lengthY);
+
+ public boolean passPositionFilter(int noGramsCommon, int positionX, int lengthX, int positionY, int lengthY);
+
+ public float passSimilarityFilter(final int[] tokensX, int startX, int lengthX, final int prefixLengthX,
+ final int[] tokensY, int startY, int lengthY, final int prefixLengthY, final int intersectionSizePrefix);
+
+ public float passSimilarityFilter(final int[] tokensX, final int prefixLengthX, final int[] tokensY,
+ final int prefixLengthY, final int intersectionSizePrefix);
+
+ public boolean passSuffixFilter(int[] tokensX, int startX, int lengthX, int positionX, int[] tokensY, int startY,
+ int lengthY, int positionY);
+
+ public boolean passSuffixFilter(int[] tokensX, int positionX, int[] tokensY, int positionY);
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityFiltersFactory.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityFiltersFactory.java
new file mode 100644
index 0000000..07c68a3
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityFiltersFactory.java
@@ -0,0 +1,29 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.similarity;
+
+public class SimilarityFiltersFactory {
+ public static SimilarityFilters getSimilarityFilters(String similarityName, float similarityThreshold) {
+ if ("jaccard".equalsIgnoreCase(similarityName)) {
+ return new SimilarityFiltersJaccard(similarityThreshold);
+ }
+ throw new RuntimeException("Unknown fuzzy filters \"" + similarityName + "\".");
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityFiltersJaccard.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityFiltersJaccard.java
new file mode 100644
index 0000000..db9fefa
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityFiltersJaccard.java
@@ -0,0 +1,297 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.similarity;
+
+import java.util.Arrays;
+
+public class SimilarityFiltersJaccard implements SimilarityFilters {
+ class Partition {
+ public int startL;
+ public int lengthL;
+ public int startR;
+ public int lengthR;
+ public int hamming;
+
+ public Partition() {
+ }
+
+ public Partition(int startL, int lengthL, int startR, int lengthR, int hamming) {
+ this.startL = startL;
+ this.lengthL = lengthL;
+ this.startR = startR;
+ this.lengthR = lengthR;
+ this.hamming = hamming;
+ }
+ }
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ private static final int MAX_DEPTH = 2;
+
+ public static int getLengthLowerBound(int length, float simThr) {
+ return (int) Math.ceil(simThr * length);
+ }
+
+ public static boolean passLengthFilter(int lengthX, int lengthY, float simThr) {
+ return getLengthLowerBound(lengthX, simThr) <= lengthY && lengthY <= 1 / simThr * lengthX;
+ }
+
+ protected float simThr;
+
+ protected float simThr100;
+
+ public SimilarityFiltersJaccard(float similarityThreshold) {
+ reset(similarityThreshold);
+ }
+
+ public int getIndexPrefixLength(int length) {
+ return length - (int) Math.ceil(2 * simThr100 / (100 + simThr100) * length) + 1;
+ }
+
+ public int getIntersectLowerBound(int lengthX, int lengthY) {
+ return (int) Math.ceil(simThr100 * (lengthX + lengthY) / (100 + simThr100));
+ }
+
+ public int getIntersectUpperBound(int noGramsCommon, int positionX, int positionY, int lengthX, int lengthY) {
+ return noGramsCommon + Math.min(lengthX - positionX - 1, lengthY - positionY - 1);
+ }
+
+ public int getLengthLowerBound(int length) {
+ return getLengthLowerBound(length, simThr);
+ }
+
+ public int getLengthUpperBound(int length) {
+ return (int) Math.floor(1 / simThr * length);
+ }
+
+ private Partition getPartition(int[] tokens, int start, int length, int w, int posL, int posR) {
+ int p;
+ if (tokens[posL] > w) {
+ p = posL;
+ } else if (tokens[posR] < w) {
+ p = posR;
+ } else {
+ p = Arrays.binarySearch(tokens, start, start + length, w);
+ }
+
+ if (p < 0) {
+ p = -p - 1;
+ }
+
+ if (p >= start && p < start + length && tokens[p] == w) {
+ return new Partition(start, p - start, p + 1, start + length - p - 1, 0);
+ }
+ return new Partition(start, p - start, p, start + length - p, 1);
+ }
+
+ public int getPrefixLength(int length) {
+ if (length == 0) {
+ return 0;
+ }
+ return length - (int) Math.ceil(simThr * length) + 1;
+ }
+
+ public float getSimilarityThreshold() {
+ return simThr;
+ }
+
+ private int getSuffixFilter(int[] tokensX, int startX, int lengthX, int[] tokensY, int startY, int lengthY,
+ int hammingMax, int depth) {
+ final int lengthDiff = Math.abs(lengthX - lengthY);
+
+ if (depth > MAX_DEPTH || lengthX == 0 || lengthY == 0) {
+ return lengthDiff;
+ }
+
+ final int mid = startY + lengthY / 2 + lengthY % 2 - 1;
+ final int offset = (hammingMax - lengthDiff) / 2;
+
+ int offsetL;
+ int offsetR;
+ if (lengthX < lengthY) {
+ offsetL = 1;
+ offsetR = 0;
+ } else {
+ offsetL = 0;
+ offsetR = 1;
+ }
+ Partition partitionY = new Partition(startY, mid - startY, mid + 1, startY + lengthY - mid - 1, 0);
+
+ // Partition partitionX = getPartition(tokensX, startX, lengthX,
+ // tokensY[mid], Math.max(Math.min(mid + startX - startY, startX
+ // + lengthX - 1)
+ // - offset - Math.abs(lengthX - lengthY) * offsetL,
+ // startX), Math.min(Math.max(mid + startX - startY,
+ // startX)
+ // + offset + Math.abs(lengthX - lengthY) * offsetR,
+ // startX + lengthX - 1));
+
+ Partition partitionX = getPartition(tokensX, startX, lengthX, tokensY[mid],
+ Math.max(mid + startX - startY - offset - lengthDiff * offsetL, startX),
+ Math.min(mid + startX - startY + offset + lengthDiff * offsetR, startX + lengthX - 1));
+
+ int hammingPart = partitionX.hamming;
+
+ int hamming = Math.abs(partitionX.lengthL - partitionY.lengthL)
+ + Math.abs(partitionX.lengthR - partitionY.lengthR) + hammingPart;
+
+ if (hamming <= hammingMax) {
+ int hammingL = getSuffixFilter(tokensX, partitionX.startL, partitionX.lengthL, tokensY, partitionY.startL,
+ partitionY.lengthL, hammingMax - Math.abs(partitionX.lengthR - partitionY.lengthR) - hammingPart,
+ depth + 1);
+ hamming = hammingL + Math.abs(partitionX.lengthR - partitionY.lengthR) + hammingPart;
+
+ if (hamming <= hammingMax) {
+ int hammingR = getSuffixFilter(tokensX, partitionX.startR, partitionX.lengthR, tokensY,
+ partitionY.startR, partitionY.lengthR, hammingMax - hammingL - hammingPart, depth + 1);
+ hamming = hammingL + hammingR + hammingPart;
+ }
+ }
+ return hamming;
+ }
+
+ public boolean passLengthFilter(int lengthX, int lengthY) {
+ return passLengthFilter(lengthX, lengthY, simThr);
+ }
+
+ /**
+ * @param noGramsCommon
+ * number of grams in common
+ * @param positionX
+ * position of the last gram in common on X
+ * @param positionY
+ * position of the last gram in common on X
+ * @param lengthX
+ * @param lengthY
+ * @return
+ */
+ public boolean passPositionFilter(int noGramsCommon, int positionX, int lengthX, int positionY, int lengthY) {
+ return getIntersectUpperBound(noGramsCommon, positionX, positionY, lengthX, lengthY) >= getIntersectLowerBound(
+ lengthX, lengthY);
+ }
+
+ public float passSimilarityFilter(final int[] tokensX, int startX, int lengthX, final int prefixLengthX,
+ final int[] tokensY, int startY, int lengthY, final int prefixLengthY, final int intersectionSizePrefix) {
+ final int length = lengthX;
+ final int token = tokensX[startX + Math.min(prefixLengthX, lengthX) - 1];
+ final int lengthProbe = lengthY;
+ final int tokenProbe = tokensY[startY + prefixLengthY - 1];
+
+ final int intersectSizeLowerBound = getIntersectLowerBound(length, lengthProbe);
+ int intersectSize = 0;
+
+ if (token < tokenProbe) {
+ if (intersectionSizePrefix + length - prefixLengthX >= intersectSizeLowerBound) {
+ intersectSize = intersectionSizePrefix
+ + SimilarityMetric.getIntersectSize(tokensX, startX + prefixLengthX, lengthX - prefixLengthX,
+ tokensY, startY + intersectionSizePrefix, lengthY - intersectionSizePrefix);
+ }
+ } else {
+ if (intersectionSizePrefix + lengthProbe - prefixLengthY >= intersectSizeLowerBound) {
+ intersectSize = intersectionSizePrefix
+ + SimilarityMetric.getIntersectSize(tokensX, startX + intersectionSizePrefix, lengthX
+ - intersectionSizePrefix, tokensY, startY + prefixLengthY, lengthY - prefixLengthY);
+ }
+ }
+
+ if (intersectSize >= intersectSizeLowerBound) {
+ return ((float) intersectSize) / (length + lengthProbe - intersectSize);
+ }
+ return 0;
+ }
+
+ /**
+ * @param tokensX
+ * @param prefixLengthX
+ * @param tokensY
+ * @param prefixLengthY
+ * @param intersectionSizePrefix
+ * @return similarity if it is above or equal to the similarity threshold, 0
+ * otherwise
+ */
+ public float passSimilarityFilter(final int[] tokensX, final int prefixLengthX, final int[] tokensY,
+ final int prefixLengthY, final int intersectionSizePrefix) {
+ // final int length = tokensX.length;
+ // final int token = tokensX[Math.min(prefixLengthX, tokensX.length) -
+ // 1];
+ // final int lengthProbe = tokensY.length;
+ // final int tokenProbe = tokensY[prefixLengthY - 1];
+ //
+ // final int intersectSizeLowerBound = getIntersectLowerBound(length,
+ // lengthProbe);
+ // int intersectSize = 0;
+ //
+ // if (token < tokenProbe) {
+ // if (intersectionSizePrefix + length - prefixLengthX >=
+ // intersectSizeLowerBound) {
+ // intersectSize = intersectionSizePrefix
+ // + SimilarityMetric.getIntersectSize(tokensX,
+ // prefixLengthX, tokensY, intersectionSizePrefix);
+ // }
+ // } else {
+ // if (intersectionSizePrefix + lengthProbe - prefixLengthY >=
+ // intersectSizeLowerBound) {
+ // intersectSize = intersectionSizePrefix
+ // + SimilarityMetric.getIntersectSize(tokensX,
+ // intersectionSizePrefix, tokensY, prefixLengthY);
+ // }
+ // }
+ //
+ // if (intersectSize >= intersectSizeLowerBound) {
+ // return ((float) intersectSize)
+ // / (length + lengthProbe - intersectSize);
+ // }
+ // return 0;
+ return passSimilarityFilter(tokensX, 0, tokensX.length, prefixLengthX, tokensY, 0, tokensY.length,
+ prefixLengthY, intersectionSizePrefix);
+ }
+
+ public boolean passSuffixFilter(int[] tokensX, int tokensStartX, int tokensLengthX, int positionX, int[] tokensY,
+ int tokensStartY, int tokensLengthY, int positionY) {
+ int hammingMax = tokensLengthX + tokensLengthY - 2
+ * (int) Math.ceil(simThr100 / (100 + simThr100) * (tokensLengthX + tokensLengthY))
+ - (positionX + 1 + positionY + 1 - 2);
+ int hamming = getSuffixFilter(tokensX, tokensStartX + positionX + 1, tokensLengthX - positionX - 1, tokensY,
+ tokensStartY + positionY + 1, tokensLengthY - positionY - 1, hammingMax, 1);
+ return hamming <= hammingMax;
+ }
+
+ public boolean passSuffixFilter(int[] tokensX, int positionX, int[] tokensY, int positionY) {
+ // int hammingMax = tokensX.length
+ // + tokensY.length
+ // - 2
+ // * (int) Math.ceil(simThr100 / (100 + simThr100)
+ // * (tokensX.length + tokensY.length))
+ // - (positionX + 1 + positionY + 1 - 2);
+ // int hamming = getSuffixFilter(tokensX, positionX + 1, tokensX.length
+ // - positionX - 1, tokensY, positionY + 1, tokensY.length
+ // - positionY - 1, hammingMax, 1);
+ // return hamming <= hammingMax;
+ return passSuffixFilter(tokensX, 0, tokensX.length, positionX, tokensY, 0, tokensY.length, positionY);
+ }
+
+ public void reset(float similarityThreshold) {
+ simThr = similarityThreshold;
+ simThr100 = simThr * 100;
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetric.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetric.java
new file mode 100644
index 0000000..415e785
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetric.java
@@ -0,0 +1,184 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.similarity;
+
+import edu.uci.ics.asterix.fuzzyjoin.tokenizer.Tokenizer;
+
+public abstract class SimilarityMetric {
+
+ public static int getIntersectSize(IListIterator tokensX, IListIterator tokensY) {
+ int intersectSize = 0;
+ while (tokensX.hasNext() && tokensY.hasNext()) {
+ int cmp = tokensX.compare(tokensY);
+ if (cmp > 0) {
+ tokensY.next();
+ } else if (cmp < 0) {
+ tokensX.next();
+ } else {
+ intersectSize++;
+ tokensX.next();
+ tokensY.next();
+ }
+ }
+ return intersectSize;
+ }
+
+ public static int getIntersectSize(int[] tokensX, int startX, int lengthX, int[] tokensY, int startY, int lengthY) {
+ int posX = 0;
+ int posY = 0;
+ int intersectSize = 0;
+
+ while (posX < lengthX && posY < lengthY) {
+ int tokenX = tokensX[startX + posX];
+ int tokenY = tokensY[startY + posY];
+ if (tokenX > tokenY) {
+ posY++;
+ } else if (tokenX < tokenY) {
+ posX++;
+ } else {
+ intersectSize++;
+ posX++;
+ posY++;
+ }
+ }
+
+ return intersectSize;
+ }
+
+ public static int getIntersectSize(int[] tokensX, int startX, int[] tokensY, int startY) {
+ // int intersectSize = 0;
+ //
+ // while (startX < tokensX.length && startY < tokensY.length) {
+ // int tokenX = tokensX[startX];
+ // int tokenY = tokensY[startY];
+ // if (tokenX > tokenY) {
+ // startY++;
+ // } else if (tokenX < tokenY) {
+ // startX++;
+ // } else {
+ // intersectSize++;
+ // startX++;
+ // startY++;
+ // }
+ // }
+ //
+ // return intersectSize;
+ return getIntersectSize(tokensX, startX, tokensX.length, tokensY, startY, tokensY.length);
+ }
+
+ public static int getIntersectSize(int[] tokensX, int[] tokensY) {
+ return getIntersectSize(tokensX, 0, tokensX.length, tokensY, 0, tokensY.length);
+ }
+
+ public static PartialIntersect getPartialIntersectSize(int[] tokensX, int startX, int lengthX, int[] tokensY,
+ int startY, int lengthY, int tokenStop) {
+ PartialIntersect parInter = new PartialIntersect();
+ getPartialIntersectSize(tokensX, startX, lengthX, tokensY, startY, lengthY, tokenStop, parInter);
+ return parInter;
+ }
+
+ public static void getPartialIntersectSize(int[] tokensX, int startX, int lengthX, int[] tokensY, int startY,
+ int lengthY, int tokenStop, PartialIntersect parInter) {
+ int posX = 0;
+ int posY = 0;
+ int intersectSize = 0;
+
+ parInter.reset();
+ while (posX < lengthX && posY < lengthY) {
+ int tokenX = tokensX[startX + posX];
+ int tokenY = tokensY[startY + posY];
+ if (tokenX > tokenY) {
+ posY++;
+ } else if (tokenX < tokenY) {
+ posX++;
+ } else {
+ intersectSize++;
+ if (!parInter.isSet()) {
+ parInter.posXStart = posX;
+ parInter.posYStart = posY;
+ parInter.set();
+ }
+ if (tokenX == tokenStop) {
+ parInter.posXStop = posX;
+ parInter.posYStop = posY;
+ parInter.intersectSize = intersectSize;
+ }
+ posX++;
+ posY++;
+ }
+ }
+ }
+
+ public static PartialIntersect getPartialIntersectSize(int[] tokensX, int[] tokensY, int tokenStop) {
+ return getPartialIntersectSize(tokensX, 0, tokensX.length, tokensY, 0, tokensY.length, tokenStop);
+ }
+
+ // @SuppressWarnings("unchecked")
+ // public static int getIntersectSize(DataBag tokensX, DataBag tokensY) {
+ // int intersectSize = 0;
+ //
+ // Iterator<Tuple> iteratorX = tokensX.iterator();
+ // Iterator<Tuple> iteratorY = tokensY.iterator();
+ //
+ // Tuple nextX = null;
+ // Tuple nextY = null;
+ //
+ // while ((nextX != null || iteratorX.hasNext())
+ // && (nextY != null || iteratorY.hasNext())) {
+ // if (nextX == null) {
+ // nextX = iteratorX.next();
+ // }
+ // if (nextY == null) {
+ // nextY = iteratorY.next();
+ // }
+ //
+ // int cmp = nextX.compareTo(nextY);
+ // if (cmp > 0) {
+ // nextY = null;
+ // } else if (cmp < 0) {
+ // nextX = null;
+ // } else {
+ // intersectSize++;
+ // nextX = null;
+ // nextY = null;
+ // }
+ // }
+ //
+ // return intersectSize;
+ // }
+
+ // public abstract float getSimilarity(DataBag tokensX, DataBag tokensY);
+
+ // public abstract float getSimilarity(DataBag tokensX, int lengthX,
+ // DataBag tokensY, int lengthY);
+
+ public float getSimilarity(IListIterator tokensX, IListIterator tokensY) {
+ int intersectionSize = SimilarityMetric.getIntersectSize(tokensX, tokensY);
+ int totalSize = tokensX.size() + tokensY.size();
+
+ return (float) intersectionSize / (totalSize - intersectionSize);
+ }
+
+ public abstract float getSimilarity(int[] tokensX, int startX, int lengthX, int[] tokensY, int startY, int lengthY);
+
+ public abstract float getSimilarity(int[] tokensX, int[] tokensY);
+
+ public abstract float getSimilarity(String stringX, String stringY, Tokenizer tokenizer);
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
new file mode 100644
index 0000000..b99d6f7
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
@@ -0,0 +1,216 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.similarity;
+
+import java.util.Arrays;
+
+import edu.uci.ics.asterix.fuzzyjoin.tokenizer.StringUtils;
+
+public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
+
+ private final int utf8SizeIndicatorSize = 2;
+
+ // dp implementation only needs 2 rows
+ private final int rows = 2;
+ private int cols;
+ private int[][] matrix;
+
+ // for letter count filtering
+ private final int[] fsLcCount = new int[128];
+ private final int[] ssLcCount = new int[128];
+
+ public SimilarityMetricEditDistance() {
+ cols = 100; // arbitrary default value
+ matrix = new int[rows][cols];
+ }
+
+ @Override
+ public float getSimilarity(IListIterator firstList, IListIterator secondList) {
+ int flLen = firstList.size();
+ int slLen = secondList.size();
+
+ // reuse existing matrix if possible
+ if (slLen >= cols) {
+ cols = slLen + 1;
+ matrix = new int[rows][cols];
+ }
+
+ // init matrix
+ for (int i = 0; i <= slLen; i++) {
+ matrix[0][i] = i;
+ }
+
+ int currRow = 1;
+ int prevRow = 0;
+
+ // expand dynamic programming matrix row by row
+ for (int i = 1; i <= flLen; i++) {
+ matrix[currRow][0] = i;
+
+ secondList.reset();
+ for (int j = 1; j <= slLen; j++) {
+
+ matrix[currRow][j] = Math.min(Math.min(matrix[prevRow][j] + 1, matrix[currRow][j - 1] + 1),
+ matrix[prevRow][j - 1] + (firstList.compare(secondList) == 0 ? 0 : 1));
+
+ secondList.next();
+ }
+
+ firstList.next();
+
+ int tmp = currRow;
+ currRow = prevRow;
+ prevRow = tmp;
+ }
+
+ return matrix[prevRow][slLen];
+ }
+
+ @Override
+ public float getSimilarity(IListIterator firstList, IListIterator secondList, float simThresh) {
+
+ int edThresh = (int) simThresh;
+
+ int flLen = firstList.size();
+ int slLen = secondList.size();
+
+ // length filter
+ if (Math.abs(flLen - slLen) > edThresh) {
+ return -1;
+ }
+
+ float ed = getSimilarity(firstList, secondList);
+ if (ed > edThresh) {
+ return -1;
+ } else {
+ return ed;
+ }
+ }
+
+ // faster implementation for common case of string edit distance
+ public int UTF8StringEditDistance(byte[] bytes, int fsStart, int ssStart) {
+
+ int fsLen = StringUtils.getStrLen(bytes, fsStart);
+ int ssLen = StringUtils.getStrLen(bytes, ssStart);
+
+ // reuse existing matrix if possible
+ if (ssLen >= cols) {
+ cols = ssLen + 1;
+ matrix = new int[rows][cols];
+ }
+
+ int fsDataStart = fsStart + utf8SizeIndicatorSize;
+ int ssDataStart = ssStart + utf8SizeIndicatorSize;
+
+ // init matrix
+ for (int i = 0; i <= ssLen; i++) {
+ matrix[0][i] = i;
+ }
+
+ int currRow = 1;
+ int prevRow = 0;
+
+ // expand dynamic programming matrix row by row
+ int fsPos = fsDataStart;
+ for (int i = 1; i <= fsLen; i++) {
+ matrix[currRow][0] = i;
+ char fsChar = StringUtils.toLowerCase(StringUtils.charAt(bytes, fsPos));
+
+ int ssPos = ssDataStart;
+ for (int j = 1; j <= ssLen; j++) {
+ char ssChar = StringUtils.toLowerCase(StringUtils.charAt(bytes, ssPos));
+
+ matrix[currRow][j] = Math.min(Math.min(matrix[prevRow][j] + 1, matrix[currRow][j - 1] + 1),
+ matrix[prevRow][j - 1] + (fsChar == ssChar ? 0 : 1));
+
+ ssPos += StringUtils.charSize(bytes, ssPos);
+ }
+
+ fsPos += StringUtils.charSize(bytes, fsPos);
+
+ int tmp = currRow;
+ currRow = prevRow;
+ prevRow = tmp;
+ }
+
+ return matrix[prevRow][ssLen];
+ }
+
+ public int UTF8StringEditDistance(byte[] bytes, int fsStart, int ssStart, int edThresh) {
+
+ int fsUtfLen = StringUtils.getUTFLen(bytes, fsStart);
+ int ssUtfLen = StringUtils.getUTFLen(bytes, ssStart);
+
+ // length filter
+ if (Math.abs(fsUtfLen - ssUtfLen) > edThresh) {
+ return -1;
+ }
+
+ // initialize letter count filtering
+ Arrays.fill(fsLcCount, 0);
+ Arrays.fill(ssLcCount, 0);
+
+ // compute letter counts for first string
+ int fsPos = fsStart + utf8SizeIndicatorSize;
+ int fsEnd = fsPos + fsUtfLen;
+ while (fsPos < fsEnd) {
+ char c = StringUtils.toLowerCase(StringUtils.charAt(bytes, fsPos));
+ if (c < 128) {
+ fsLcCount[c]++;
+ }
+ fsPos += StringUtils.charSize(bytes, fsPos);
+ }
+
+ // compute letter counts for second string
+ int ssPos = ssStart + utf8SizeIndicatorSize;
+ int ssEnd = ssPos + ssUtfLen;
+ while (ssPos < ssEnd) {
+ char c = StringUtils.toLowerCase(StringUtils.charAt(bytes, ssPos));
+ if (c < 128) {
+ ssLcCount[c]++;
+ }
+ ssPos += StringUtils.charSize(bytes, ssPos);
+ }
+
+ // apply filter
+ int gtSum = 0;
+ int ltSum = 0;
+ for (int i = 0; i < 128; i++) {
+ if (fsLcCount[i] > ssLcCount[i]) {
+ gtSum += fsLcCount[i] - ssLcCount[i];
+ if (gtSum > edThresh) {
+ return -1;
+ }
+ } else {
+ ltSum += ssLcCount[i] - fsLcCount[i];
+ if (ltSum > edThresh) {
+ return -1;
+ }
+ }
+ }
+
+ int ed = UTF8StringEditDistance(bytes, fsStart, ssStart);
+ if (ed > edThresh) {
+ return -1;
+ } else {
+ return ed;
+ }
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricFactory.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricFactory.java
new file mode 100644
index 0000000..a66328b
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricFactory.java
@@ -0,0 +1,29 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.similarity;
+
+public class SimilarityMetricFactory {
+ public static SimilarityMetric getSimilarityMetric(String similarityMetric) {
+ if (similarityMetric.equals("Jaccard")) {
+ return new SimilarityMetricJaccard();
+ }
+ throw new RuntimeException("Unknown fuzzy metric \"" + similarityMetric + "\".");
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java
new file mode 100644
index 0000000..b0c0638
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java
@@ -0,0 +1,118 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.similarity;
+
+import java.util.Set;
+import java.util.TreeSet;
+
+import edu.uci.ics.asterix.fuzzyjoin.tokenizer.Tokenizer;
+
+public class SimilarityMetricJaccard extends SimilarityMetric implements IGenericSimilarityMetric {
+
+ public static float getSimilarity(int intersectSize, int lengthX, int lengthY) {
+ return ((float) intersectSize) / (lengthX + lengthY - intersectSize);
+ }
+
+ public static float getSimilarityBag(int[] tokensX, int[] tokensY) {
+ Set<Integer> setX = new TreeSet<Integer>();
+ for (int token : tokensX) {
+ setX.add(token);
+ }
+ Set<Integer> setY = new TreeSet<Integer>();
+ for (int token : tokensY) {
+ setY.add(token);
+ }
+ setX.retainAll(setY);
+ return ((float) setX.size()) / (tokensX.length + tokensY.length - setX.size());
+ }
+
+ // @Override
+ // public float getSimilarity(DataBag tokensX, DataBag tokensY) {
+ // return getSimilarity(tokensX, (int) tokensX.size(), tokensY,
+ // (int) tokensY.size());
+ // }
+
+ // @Override
+ // public float getSimilarity(DataBag tokensX, int lengthX, DataBag tokensY,
+ // int lengthY) {
+ // int intersectionSize = SimilarityMetric.getIntersectSize(tokensX,
+ // tokensY);
+ // int totalSize = lengthX + lengthY;
+ //
+ // return (float) intersectionSize / (totalSize - intersectionSize);
+ // }
+
+ @Override
+ public float getSimilarity(IListIterator tokensX, IListIterator tokensY) {
+ int intersectionSize = SimilarityMetric.getIntersectSize(tokensX, tokensY);
+ int totalSize = tokensX.size() + tokensY.size();
+
+ return (float) intersectionSize / (totalSize - intersectionSize);
+ }
+
+ @Override
+ public float getSimilarity(IListIterator firstList, IListIterator secondList, float simThresh) {
+
+ // apply length filter
+ int lengthLowerBound = (int) Math.ceil(simThresh * firstList.size());
+
+ boolean passesLengthFilter = (lengthLowerBound <= secondList.size())
+ && (secondList.size() <= 1.0f / simThresh * firstList.size());
+ if (!passesLengthFilter) {
+ return -1f;
+ }
+
+ float jacc = getSimilarity(firstList, secondList);
+ if (jacc < simThresh) {
+ return -1f;
+ } else {
+ return jacc;
+ }
+ }
+
+ @Override
+ public float getSimilarity(int[] tokensX, int startX, int lengthX, int[] tokensY, int startY, int lengthY) {
+ int intersectionSize = SimilarityMetric.getIntersectSize(tokensX, startX, lengthX, tokensY, startY, lengthY);
+ int totalSize = lengthX + lengthY;
+
+ return (float) intersectionSize / (totalSize - intersectionSize);
+ }
+
+ @Override
+ public float getSimilarity(int[] tokensX, int[] tokensY) {
+ return getSimilarity(tokensX, 0, tokensX.length, tokensY, 0, tokensY.length);
+ }
+
+ @Override
+ public float getSimilarity(String stringX, String stringY, Tokenizer tokenizer) {
+ Set<String> setX = new TreeSet<String>();
+ for (String token : tokenizer.tokenize(stringX)) {
+ setX.add(token);
+ }
+ Set<String> setY = new TreeSet<String>();
+ for (String token : tokenizer.tokenize(stringY)) {
+ setY.add(token);
+ }
+ int lengthX = setX.size();
+ int lengthY = setY.size();
+ setX.retainAll(setY);
+ return ((float) setX.size()) / (lengthX + lengthY - setX.size());
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/AbstractUTF8StringBinaryTokenizer.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/AbstractUTF8StringBinaryTokenizer.java
new file mode 100644
index 0000000..0e3c28f
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/AbstractUTF8StringBinaryTokenizer.java
@@ -0,0 +1,77 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+import edu.uci.ics.asterix.fuzzyjoin.IntArray;
+
+public abstract class AbstractUTF8StringBinaryTokenizer implements IBinaryTokenizer {
+
+ protected byte[] data;
+ protected int start;
+ protected int length;
+ protected int tokenLength;
+ protected int index;
+ protected int utf8Length;
+
+ protected final IntArray tokensStart;
+ protected final IntArray tokensLength;
+ protected final IToken token;
+
+ protected final boolean ignoreTokenCount;
+ protected final boolean sourceHasTypeTag;
+
+ public AbstractUTF8StringBinaryTokenizer(boolean ignoreTokenCount, boolean sourceHasTypeTag,
+ ITokenFactory tokenFactory) {
+ this.ignoreTokenCount = ignoreTokenCount;
+ this.sourceHasTypeTag = sourceHasTypeTag;
+ if (!ignoreTokenCount) {
+ tokensStart = new IntArray();
+ tokensLength = new IntArray();
+ } else {
+ tokensStart = null;
+ tokensLength = null;
+ }
+ token = tokenFactory.createToken();
+ }
+
+ @Override
+ public IToken getToken() {
+ return token;
+ }
+
+ @Override
+ public void reset(byte[] data, int start, int length) {
+ this.start = start;
+ index = this.start;
+ if (sourceHasTypeTag) {
+ index++; // skip type tag
+ }
+ utf8Length = StringUtils.getUTFLen(data, index);
+ index += 2; // skip utf8 length indicator
+ this.data = data;
+ this.length = length + start;
+
+ tokenLength = 0;
+ if (!ignoreTokenCount) {
+ tokensStart.reset();
+ tokensLength.reset();
+ }
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/AbstractUTF8Token.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/AbstractUTF8Token.java
new file mode 100644
index 0000000..d478f36
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/AbstractUTF8Token.java
@@ -0,0 +1,103 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+public abstract class AbstractUTF8Token implements IToken {
+ public static final int GOLDEN_RATIO_32 = 0x09e3779b9;
+
+ protected int length;
+ protected int tokenLength;
+ protected int start;
+ protected int tokenCount;
+ protected byte[] data;
+ protected final byte tokenTypeTag;
+ protected final byte countTypeTag;
+
+ public AbstractUTF8Token() {
+ tokenTypeTag = -1;
+ countTypeTag = -1;
+ }
+
+ public AbstractUTF8Token(byte tokenTypeTag, byte countTypeTag) {
+ this.tokenTypeTag = tokenTypeTag;
+ this.countTypeTag = countTypeTag;
+ }
+
+ @Override
+ public byte[] getData() {
+ return data;
+ }
+
+ @Override
+ public int getLength() {
+ return length;
+ }
+
+ public int getLowerCaseUTF8Len(int size) {
+ int lowerCaseUTF8Len = 0;
+ int pos = start;
+ for (int i = 0; i < size; i++) {
+ char c = StringUtils.toLowerCase(StringUtils.charAt(data, pos));
+ lowerCaseUTF8Len += StringUtils.getModifiedUTF8Len(c);
+ pos += StringUtils.charSize(data, pos);
+ }
+ return lowerCaseUTF8Len;
+ }
+
+ @Override
+ public int getStart() {
+ return start;
+ }
+
+ @Override
+ public int getTokenLength() {
+ return tokenLength;
+ }
+
+ public void handleCountTypeTag(DataOutput dos) throws IOException {
+ if (countTypeTag > 0) {
+ dos.write(countTypeTag);
+ }
+ }
+
+ public void handleTokenTypeTag(DataOutput dos) throws IOException {
+ if (tokenTypeTag > 0) {
+ dos.write(tokenTypeTag);
+ }
+ }
+
+ @Override
+ public void reset(byte[] data, int start, int length, int tokenLength, int tokenCount) {
+ this.data = data;
+ this.start = start;
+ this.length = length;
+ this.tokenLength = tokenLength;
+ this.tokenCount = tokenCount;
+ }
+
+ @Override
+ public void serializeTokenCount(DataOutput dos) throws IOException {
+ handleCountTypeTag(dos);
+ dos.writeInt(tokenCount);
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/AbstractUTF8TokenFactory.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/AbstractUTF8TokenFactory.java
new file mode 100644
index 0000000..5ade0dc
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/AbstractUTF8TokenFactory.java
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+public abstract class AbstractUTF8TokenFactory implements ITokenFactory {
+ private static final long serialVersionUID = 1L;
+ protected final byte tokenTypeTag;
+ protected final byte countTypeTag;
+
+ public AbstractUTF8TokenFactory() {
+ tokenTypeTag = -1;
+ countTypeTag = -1;
+ }
+
+ public AbstractUTF8TokenFactory(byte tokenTypeTag, byte countTypeTag) {
+ this.tokenTypeTag = tokenTypeTag;
+ this.countTypeTag = countTypeTag;
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizer.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizer.java
new file mode 100644
index 0000000..5a12c00
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizer.java
@@ -0,0 +1,79 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+public class DelimitedUTF8StringBinaryTokenizer extends AbstractUTF8StringBinaryTokenizer {
+
+ public DelimitedUTF8StringBinaryTokenizer(boolean ignoreTokenCount, boolean sourceHasTypeTag,
+ ITokenFactory tokenFactory) {
+ super(ignoreTokenCount, sourceHasTypeTag, tokenFactory);
+ }
+
+ @Override
+ public boolean hasNext() {
+ // skip delimiters
+ while (index < length && isSeparator(StringUtils.charAt(data, index))) {
+ index += StringUtils.charSize(data, index);
+ }
+ return index < length;
+ }
+
+ private boolean isSeparator(char c) {
+ return !(Character.isLetterOrDigit(c) || Character.getType(c) == Character.OTHER_LETTER || Character.getType(c) == Character.OTHER_NUMBER);
+ }
+
+ @Override
+ public void next() {
+ tokenLength = 0;
+ int currentTokenStart = index;
+ while (index < length && !isSeparator(StringUtils.charAt(data, index))) {
+ index += StringUtils.charSize(data, index);
+ tokenLength++;
+ }
+ int tokenCount = 1;
+ if (tokenLength > 0 && !ignoreTokenCount) {
+ // search if we got the same token before
+ for (int i = 0; i < tokensStart.length(); ++i) {
+ if (tokenLength == tokensLength.get(i)) {
+ int tokenStart = tokensStart.get(i);
+ tokenCount++; // assume we found it
+ int offset = 0;
+ int currLength = 0;
+ while (currLength < tokenLength) {
+ // case insensitive comparison
+ if (StringUtils.toLowerCase(StringUtils.charAt(data, currentTokenStart + offset)) != StringUtils
+ .toLowerCase(StringUtils.charAt(data, tokenStart + offset))) {
+ tokenCount--;
+ break;
+ }
+ offset += StringUtils.charSize(data, currentTokenStart + offset);
+ currLength++;
+ }
+ }
+ }
+ // add the new token to the list of seen tokens
+ tokensStart.add(currentTokenStart);
+ tokensLength.add(tokenLength);
+ }
+
+ // set token
+ token.reset(data, currentTokenStart, index, tokenLength, tokenCount);
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizerFactory.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizerFactory.java
new file mode 100644
index 0000000..cb002df
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizerFactory.java
@@ -0,0 +1,40 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+public class DelimitedUTF8StringBinaryTokenizerFactory implements IBinaryTokenizerFactory {
+
+ private static final long serialVersionUID = 1L;
+ private final boolean ignoreTokenCount;
+ private final boolean sourceHasTypeTag;
+ private final ITokenFactory tokenFactory;
+
+ public DelimitedUTF8StringBinaryTokenizerFactory(boolean ignoreTokenCount, boolean sourceHasTypeTag,
+ ITokenFactory tokenFactory) {
+ this.ignoreTokenCount = ignoreTokenCount;
+ this.sourceHasTypeTag = sourceHasTypeTag;
+ this.tokenFactory = tokenFactory;
+ }
+
+ @Override
+ public IBinaryTokenizer createTokenizer() {
+ return new DelimitedUTF8StringBinaryTokenizer(ignoreTokenCount, sourceHasTypeTag, tokenFactory);
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/HashedUTF8NGramToken.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/HashedUTF8NGramToken.java
new file mode 100644
index 0000000..461638f
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/HashedUTF8NGramToken.java
@@ -0,0 +1,62 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+public class HashedUTF8NGramToken extends UTF8NGramToken {
+ public HashedUTF8NGramToken(byte tokenTypeTag, byte countTypeTag) {
+ super(tokenTypeTag, countTypeTag);
+ }
+
+ @Override
+ public void serializeToken(DataOutput dos) throws IOException {
+ handleTokenTypeTag(dos);
+
+ int hash = GOLDEN_RATIO_32;
+
+ // pre chars
+ for (int i = 0; i < numPreChars; i++) {
+ hash ^= PRECHAR;
+ hash *= GOLDEN_RATIO_32;
+ }
+
+ // regular chars
+ int numRegGrams = tokenLength - numPreChars - numPostChars;
+ int pos = start;
+ for (int i = 0; i < numRegGrams; i++) {
+ hash ^= StringUtils.toLowerCase(StringUtils.charAt(data, pos));
+ hash *= GOLDEN_RATIO_32;
+ pos += StringUtils.charSize(data, pos);
+ }
+
+ // post chars
+ for (int i = 0; i < numPostChars; i++) {
+ hash ^= POSTCHAR;
+ hash *= GOLDEN_RATIO_32;
+ }
+
+ // token count
+ hash += tokenCount;
+
+ dos.writeInt(hash);
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/HashedUTF8NGramTokenFactory.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/HashedUTF8NGramTokenFactory.java
new file mode 100644
index 0000000..9b6f835
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/HashedUTF8NGramTokenFactory.java
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+public class HashedUTF8NGramTokenFactory extends AbstractUTF8TokenFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ public HashedUTF8NGramTokenFactory() {
+ super();
+ }
+
+ public HashedUTF8NGramTokenFactory(byte tokenTypeTag, byte countTypeTag) {
+ super(tokenTypeTag, countTypeTag);
+ }
+
+ @Override
+ public IToken createToken() {
+ return new HashedUTF8NGramToken(tokenTypeTag, countTypeTag);
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/HashedUTF8WordToken.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/HashedUTF8WordToken.java
new file mode 100644
index 0000000..c7d5f3a
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/HashedUTF8WordToken.java
@@ -0,0 +1,84 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+public class HashedUTF8WordToken extends UTF8WordToken {
+
+ private int hash = 0;
+
+ public HashedUTF8WordToken(byte tokenTypeTag, byte countTypeTag) {
+ super(tokenTypeTag, countTypeTag);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == null) {
+ return false;
+ }
+ if (!(o instanceof IToken)) {
+ return false;
+ }
+ IToken t = (IToken) o;
+ if (t.getTokenLength() != tokenLength) {
+ return false;
+ }
+ int offset = 0;
+ for (int i = 0; i < tokenLength; i++) {
+ if (StringUtils.charAt(t.getData(), t.getStart() + offset) != StringUtils.charAt(data, start + offset)) {
+ return false;
+ }
+ offset += StringUtils.charSize(data, start + offset);
+ }
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return hash;
+ }
+
+ @Override
+ public void reset(byte[] data, int start, int length, int tokenLength, int tokenCount) {
+ super.reset(data, start, length, tokenLength, tokenCount);
+
+ // pre-compute hash value using JAQL-like string hashing
+ int pos = start;
+ hash = GOLDEN_RATIO_32;
+ for (int i = 0; i < tokenLength; i++) {
+ hash ^= StringUtils.toLowerCase(StringUtils.charAt(data, pos));
+ hash *= GOLDEN_RATIO_32;
+ pos += StringUtils.charSize(data, pos);
+ }
+ hash += tokenCount;
+ }
+
+ @Override
+ public void serializeToken(DataOutput dos) throws IOException {
+ if (tokenTypeTag > 0) {
+ dos.write(tokenTypeTag);
+ }
+
+ // serialize hash value
+ dos.writeInt(hash);
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/HashedUTF8WordTokenFactory.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/HashedUTF8WordTokenFactory.java
new file mode 100644
index 0000000..aec26bc
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/HashedUTF8WordTokenFactory.java
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+public class HashedUTF8WordTokenFactory extends AbstractUTF8TokenFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ public HashedUTF8WordTokenFactory() {
+ super();
+ }
+
+ public HashedUTF8WordTokenFactory(byte tokenTypeTag, byte countTypeTag) {
+ super(tokenTypeTag, countTypeTag);
+ }
+
+ @Override
+ public IToken createToken() {
+ return new HashedUTF8WordToken(tokenTypeTag, countTypeTag);
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/IBinaryTokenizer.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/IBinaryTokenizer.java
new file mode 100644
index 0000000..2403e2f
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/IBinaryTokenizer.java
@@ -0,0 +1,30 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+public interface IBinaryTokenizer {
+ public IToken getToken();
+
+ public boolean hasNext();
+
+ public void next();
+
+ public void reset(byte[] data, int start, int length);
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/IBinaryTokenizerFactory.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/IBinaryTokenizerFactory.java
new file mode 100644
index 0000000..eecd98c
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/IBinaryTokenizerFactory.java
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+import java.io.Serializable;
+
+public interface IBinaryTokenizerFactory extends Serializable {
+ public IBinaryTokenizer createTokenizer();
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/INGramToken.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/INGramToken.java
new file mode 100644
index 0000000..ae55837
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/INGramToken.java
@@ -0,0 +1,28 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+public interface INGramToken {
+ public int getNumPostChars();
+
+ public int getNumPreChars();
+
+ public void setNumPrePostChars(int numPreChars, int numPostChars);
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/IToken.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/IToken.java
new file mode 100644
index 0000000..aa6dc86
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/IToken.java
@@ -0,0 +1,39 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+public interface IToken {
+ public byte[] getData();
+
+ public int getLength();
+
+ public int getStart();
+
+ public int getTokenLength();
+
+ public void reset(byte[] data, int start, int length, int tokenLength, int tokenCount);
+
+ public void serializeToken(DataOutput dos) throws IOException;
+
+ public void serializeTokenCount(DataOutput dos) throws IOException;
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/ITokenFactory.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/ITokenFactory.java
new file mode 100644
index 0000000..e5a95a1
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/ITokenFactory.java
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+import java.io.Serializable;
+
+public interface ITokenFactory extends Serializable {
+ public IToken createToken();
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/NGramTokenizer.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/NGramTokenizer.java
new file mode 100644
index 0000000..e909a24
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/NGramTokenizer.java
@@ -0,0 +1,90 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+public class NGramTokenizer implements Tokenizer {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ public static void main(String args[]) {
+ Tokenizer tokenizer = new NGramTokenizer();
+ String a = "hadoopoop";
+ System.out.println(a + ":" + tokenizer.tokenize(a));
+ }
+
+ private final int gramLength;
+
+ /**
+ * padding used in q gram calculation.
+ */
+ private final char QGRAMENDPADDING = '$';
+
+ /**
+ * padding used in q gram calculation.
+ */
+ private final char QGRAMSTARTPADDING = '$';
+
+ public NGramTokenizer() {
+ gramLength = 3;
+ }
+
+ public NGramTokenizer(int gramLength) {
+ this.gramLength = gramLength;
+ }
+
+ private StringBuffer getAdjustedString(String input) {
+ final StringBuffer adjustedString = new StringBuffer();
+ for (int i = 0; i < gramLength - 1; i++) {
+ adjustedString.append(QGRAMSTARTPADDING);
+ }
+ adjustedString.append(input);
+ for (int i = 0; i < gramLength - 1; i++) {
+ adjustedString.append(QGRAMENDPADDING);
+ }
+ return adjustedString;
+ }
+
+ public List<String> tokenize(String input) {
+ final ArrayList<String> returnVect = new ArrayList<String>();
+ final StringBuffer adjustedString = getAdjustedString(input);
+ int curPos = 0;
+ final int length = adjustedString.length() - (gramLength - 1);
+ final HashMap<String, Integer> grams = new HashMap<String, Integer>();
+ while (curPos < length) {
+ final String term = adjustedString.substring(curPos, curPos + gramLength);
+ Integer count = grams.get(term);
+ if (count == null) {
+ count = new Integer(0);
+ }
+ count++;
+ grams.put(term, count);
+ returnVect.add(term + count);
+ curPos++;
+ }
+ return returnVect;
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/NGramUTF8StringBinaryTokenizer.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/NGramUTF8StringBinaryTokenizer.java
new file mode 100644
index 0000000..362126f
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/NGramUTF8StringBinaryTokenizer.java
@@ -0,0 +1,116 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+public class NGramUTF8StringBinaryTokenizer extends AbstractUTF8StringBinaryTokenizer {
+
+ private int gramLength;
+ private boolean usePrePost;
+
+ private int gramNum;
+ private int totalGrams;
+
+ private final INGramToken concreteToken;
+
+ public NGramUTF8StringBinaryTokenizer(int gramLength, boolean usePrePost, boolean ignoreTokenCount,
+ boolean sourceHasTypeTag, ITokenFactory tokenFactory) {
+ super(ignoreTokenCount, sourceHasTypeTag, tokenFactory);
+ this.gramLength = gramLength;
+ this.usePrePost = usePrePost;
+ concreteToken = (INGramToken) token;
+ }
+
+ @Override
+ public boolean hasNext() {
+ if (gramNum < totalGrams) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public void next() {
+ int currentTokenStart = index;
+ int tokenCount = 1;
+ int numPreChars = 0;
+ int numPostChars = 0;
+ if (usePrePost) {
+ numPreChars = Math.max(gramLength - gramNum - 1, 0);
+ numPostChars = (gramNum > totalGrams - gramLength) ? gramLength - totalGrams + gramNum : 0;
+ }
+ gramNum++;
+
+ concreteToken.setNumPrePostChars(numPreChars, numPostChars);
+ if (numPreChars == 0) {
+ index += StringUtils.charSize(data, index);
+ }
+
+ // compute token count
+ // ignore pre and post grams for duplicate detection
+ if (!ignoreTokenCount && numPreChars == 0 && numPostChars == 0) {
+ int tmpIndex = start;
+ while (tmpIndex < currentTokenStart) {
+ tokenCount++; // assume found
+ int offset = 0;
+ for (int j = 0; j < gramLength; j++) {
+ if (StringUtils.toLowerCase(StringUtils.charAt(data, currentTokenStart + offset)) != StringUtils
+ .toLowerCase(StringUtils.charAt(data, tmpIndex + offset))) {
+ tokenCount--;
+ break;
+ }
+ offset += StringUtils.charSize(data, tmpIndex + offset);
+ }
+ tmpIndex += StringUtils.charSize(data, tmpIndex);
+ }
+ }
+
+ // set token
+ token.reset(data, currentTokenStart, length, gramLength, tokenCount);
+ }
+
+ @Override
+ public void reset(byte[] data, int start, int length) {
+ super.reset(data, start, length);
+ gramNum = 0;
+
+ int numChars = 0;
+ int pos = index;
+ int end = pos + utf8Length;
+ while (pos < end) {
+ numChars++;
+ pos += StringUtils.charSize(data, pos);
+ }
+
+ if (usePrePost) {
+ totalGrams = numChars + gramLength - 1;
+ } else {
+ totalGrams = numChars - gramLength + 1;
+ }
+ }
+
+ public void setGramlength(int gramLength) {
+ this.gramLength = gramLength;
+ }
+
+ public void setPrePost(boolean usePrePost) {
+ this.usePrePost = usePrePost;
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/StringUtils.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/StringUtils.java
new file mode 100644
index 0000000..48d61e7
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/StringUtils.java
@@ -0,0 +1,216 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+public class StringUtils {
+ public static char charAt(byte[] b, int s) {
+ int c = b[s] & 0xff;
+ switch (c >> 4) {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ return (char) c;
+
+ case 12:
+ case 13:
+ return (char) (((c & 0x1F) << 6) | ((b[s + 1]) & 0x3F));
+
+ case 14:
+ return (char) (((c & 0x0F) << 12) | (((b[s + 1]) & 0x3F) << 6) | (((b[s + 2]) & 0x3F) << 0));
+
+ default:
+ throw new IllegalArgumentException();
+ }
+ }
+
+ public static int charSize(byte[] b, int s) {
+ int c = b[s] & 0xff;
+ switch (c >> 4) {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ return 1;
+
+ case 12:
+ case 13:
+ return 2;
+
+ case 14:
+ return 3;
+ }
+ throw new IllegalStateException();
+ }
+
+ public static int getModifiedUTF8Len(char c) {
+ if (c >= 0x0000 && c <= 0x007F) {
+ return 1;
+ } else if (c <= 0x07FF) {
+ return 2;
+ } else {
+ return 3;
+ }
+ }
+
+ public static int getStrLen(byte[] b, int s) {
+ int pos = s + 2;
+ int end = pos + getUTFLen(b, s);
+ int charCount = 0;
+ while (pos < end) {
+ charCount++;
+ pos += charSize(b, pos);
+ }
+ return charCount;
+ }
+
+ public static int getUTFLen(byte[] b, int s) {
+ return ((b[s] & 0xff) << 8) + ((b[s + 1] & 0xff) << 0);
+ }
+
+ public static char toLowerCase(char c) {
+ switch (c) {
+ case 'A':
+ return 'a';
+ case 'B':
+ return 'b';
+ case 'C':
+ return 'c';
+ case 'D':
+ return 'd';
+ case 'E':
+ return 'e';
+ case 'F':
+ return 'f';
+ case 'G':
+ return 'g';
+ case 'H':
+ return 'h';
+ case 'I':
+ return 'i';
+ case 'J':
+ return 'j';
+ case 'K':
+ return 'k';
+ case 'L':
+ return 'l';
+ case 'M':
+ return 'm';
+ case 'N':
+ return 'n';
+ case 'O':
+ return 'o';
+ case 'P':
+ return 'p';
+ case 'Q':
+ return 'q';
+ case 'R':
+ return 'r';
+ case 'S':
+ return 's';
+ case 'T':
+ return 't';
+ case 'U':
+ return 'u';
+ case 'V':
+ return 'v';
+ case 'W':
+ return 'w';
+ case 'X':
+ return 'x';
+ case 'Y':
+ return 'y';
+ case 'Z':
+ return 'z';
+ case 'Ä':
+ return 'ä';
+ case 'Ǟ':
+ return 'ǟ';
+ case 'Ë':
+ return 'ë';
+ case 'Ḧ':
+ return 'ḧ';
+ case 'Ï':
+ return 'ï';
+ case 'Ḯ':
+ return 'ḯ';
+ case 'Ö':
+ return 'ö';
+ case 'Ȫ':
+ return 'ȫ';
+ case 'Ṏ':
+ return 'ṏ';
+ case 'Ü':
+ return 'ü';
+ case 'Ǖ':
+ return 'ǖ';
+ case 'Ǘ':
+ return 'ǘ';
+ case 'Ǚ':
+ return 'ǚ';
+ case 'Ǜ':
+ return 'ǜ';
+ case 'Ṳ':
+ return 'ṳ';
+ case 'Ṻ':
+ return 'ṻ';
+ case 'Ẅ':
+ return 'ẅ';
+ case 'Ẍ':
+ return 'ẍ';
+ case 'Ÿ':
+ return 'ÿ';
+ default:
+ // since I probably missed some chars above
+ // use Java to convert to lower case to be safe
+ return Character.toLowerCase(c);
+ }
+ }
+
+ public static void writeCharAsModifiedUTF8(char c, DataOutput dos) throws IOException {
+
+ if (c >= 0x0000 && c <= 0x007F) {
+ dos.writeByte(c);
+ } else if (c <= 0x07FF) {
+ dos.writeByte((byte) (0xC0 | ((c >> 6) & 0x3F)));
+ dos.writeByte((byte) (0x80 | (c & 0x3F)));
+ } else {
+ dos.writeByte((byte) (0xE0 | ((c >> 12) & 0x0F)));
+ dos.writeByte((byte) (0x80 | ((c >> 6) & 0x3F)));
+ dos.writeByte((byte) (0x80 | (c & 0x3F)));
+ }
+ }
+
+ public static void writeUTF8Len(int len, DataOutput dos) throws IOException {
+ dos.write((len >>> 8) & 0xFF);
+ dos.write((len >>> 0) & 0xFF);
+ }
+}
\ No newline at end of file
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/Token.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/Token.java
new file mode 100644
index 0000000..6c6d365
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/Token.java
@@ -0,0 +1,118 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+import java.io.Serializable;
+
+public class Token implements Serializable {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ private CharSequence data;
+ private int start;
+ private int length;
+ private int count;
+
+ /** Cache the hash code for the string */
+ private int hash; // Default to 0
+
+ public Token() {
+ }
+
+ public Token(CharSequence data, int start, int length, int count) {
+ set(data, start, length, count);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == null) {
+ return false;
+ }
+ if (!(o instanceof Token)) {
+ return false;
+ }
+ Token t = (Token) o;
+ if (t.length != length) {
+ return false;
+ }
+ for (int i = 0; i < length; i++) {
+ if (t.data.charAt(t.start + i) != data.charAt(start + i)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public CharSequence getCharSequence() {
+ return data;
+ }
+
+ public int getCount() {
+ return count;
+ }
+
+ public int getLength() {
+ return length;
+ }
+
+ public int getStart() {
+ return start;
+ }
+
+ @Override
+ public int hashCode() {
+ int h = hash;
+ if (h == 0 && length > 0) {
+ for (int i = 0; i < length; i++) {
+ h = 31 * h + data.charAt(start + i);
+ }
+ h = 31 * h + count;
+ hash = h;
+ }
+ return h;
+ }
+
+ public int length() {
+ return length;
+ }
+
+ public void set(CharSequence data, int start, int length, int count) {
+ this.data = data;
+ this.start = start;
+ this.length = length;
+ this.count = count;
+ hash = 0;
+ }
+
+ public void set(String data, int count) {
+ this.data = data;
+ start = 0;
+ length = data.length();
+ this.count = count;
+ hash = 0;
+ }
+
+ @Override
+ public String toString() {
+ return "(" + data.subSequence(start, start + length) + ", " + count + ")";
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/Tokenizer.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/Tokenizer.java
new file mode 100644
index 0000000..312c626
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/Tokenizer.java
@@ -0,0 +1,27 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+import java.io.Serializable;
+import java.util.List;
+
+public interface Tokenizer extends Serializable {
+ public List<String> tokenize(String text);
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/TokenizerBuffered.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/TokenizerBuffered.java
new file mode 100644
index 0000000..76dc588
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/TokenizerBuffered.java
@@ -0,0 +1,30 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+public interface TokenizerBuffered {
+ public void advance();
+
+ public boolean end();
+
+ public Token getToken();
+
+ public void reset();
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/TokenizerBufferedFactory.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/TokenizerBufferedFactory.java
new file mode 100644
index 0000000..8be793c
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/TokenizerBufferedFactory.java
@@ -0,0 +1,33 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+public class TokenizerBufferedFactory {
+ public static TokenizerBuffered getTokenizer(String tokenizer, StringBuilder buffer) {
+ if (tokenizer.equals("Word")) {
+ return new WordTokenizerBuffered(buffer);
+ }
+ throw new RuntimeException("Unknown tokenizer \"" + tokenizer + "\".");
+ }
+
+ public static boolean isSeparator(char c) {
+ return !(Character.isLetterOrDigit(c) || Character.getType(c) == Character.OTHER_LETTER || Character.getType(c) == Character.OTHER_NUMBER);
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/TokenizerFactory.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/TokenizerFactory.java
new file mode 100644
index 0000000..dad43e9
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/TokenizerFactory.java
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+public class TokenizerFactory {
+ public static Tokenizer getTokenizer(String tokenizer, String wordSeparator, char tokenSeparator) {
+ if (tokenizer.equals("NGram")) {
+ return new NGramTokenizer();
+ } else if (tokenizer.equals("Word")) {
+ return new WordTokenizer(wordSeparator, tokenSeparator);
+ }
+ throw new RuntimeException("Unknown tokenizer \"" + tokenizer + "\".");
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/UTF8NGramToken.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/UTF8NGramToken.java
new file mode 100644
index 0000000..07a5e0d
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/UTF8NGramToken.java
@@ -0,0 +1,83 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+public class UTF8NGramToken extends AbstractUTF8Token implements INGramToken {
+
+ public final static char PRECHAR = '#';
+
+ public final static char POSTCHAR = '$';
+
+ protected int numPreChars;
+ protected int numPostChars;
+
+ public UTF8NGramToken(byte tokenTypeTag, byte countTypeTag) {
+ super(tokenTypeTag, countTypeTag);
+ }
+
+ @Override
+ public int getNumPostChars() {
+ return numPreChars;
+ }
+
+ @Override
+ public int getNumPreChars() {
+ return numPostChars;
+ }
+
+ @Override
+ public void serializeToken(DataOutput dos) throws IOException {
+ handleTokenTypeTag(dos);
+
+ // regular chars
+ int numRegChars = tokenLength - numPreChars - numPostChars;
+
+ // assuming pre and post char need 1-byte each in utf8
+ int tokenUTF8Len = getLowerCaseUTF8Len(numRegChars) + numPreChars + numPostChars;
+
+ // write utf8 length indicator
+ StringUtils.writeUTF8Len(tokenUTF8Len, dos);
+
+ // pre chars
+ for (int i = 0; i < numPreChars; i++) {
+ StringUtils.writeCharAsModifiedUTF8(PRECHAR, dos);
+ }
+
+ int pos = start;
+ for (int i = 0; i < numRegChars; i++) {
+ char c = StringUtils.toLowerCase(StringUtils.charAt(data, pos));
+ StringUtils.writeCharAsModifiedUTF8(c, dos);
+ pos += StringUtils.charSize(data, pos);
+ }
+
+ // post chars
+ for (int i = 0; i < numPostChars; i++) {
+ StringUtils.writeCharAsModifiedUTF8(POSTCHAR, dos);
+ }
+ }
+
+ public void setNumPrePostChars(int numPreChars, int numPostChars) {
+ this.numPreChars = numPreChars;
+ this.numPostChars = numPostChars;
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/UTF8NGramTokenFactory.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/UTF8NGramTokenFactory.java
new file mode 100644
index 0000000..4417b9c
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/UTF8NGramTokenFactory.java
@@ -0,0 +1,39 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+public class UTF8NGramTokenFactory extends AbstractUTF8TokenFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ public UTF8NGramTokenFactory() {
+ super();
+ }
+
+ public UTF8NGramTokenFactory(byte tokenTypeTag, byte countTypeTag) {
+ super(tokenTypeTag, countTypeTag);
+ }
+
+ @Override
+ public IToken createToken() {
+ return new UTF8NGramToken(tokenTypeTag, countTypeTag);
+ }
+
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/UTF8WordToken.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/UTF8WordToken.java
new file mode 100644
index 0000000..aacbfd8
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/UTF8WordToken.java
@@ -0,0 +1,44 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+public class UTF8WordToken extends AbstractUTF8Token {
+
+ public UTF8WordToken(byte tokenTypeTag, byte countTypeTag) {
+ super(tokenTypeTag, countTypeTag);
+ }
+
+ @Override
+ public void serializeToken(DataOutput dos) throws IOException {
+ handleTokenTypeTag(dos);
+
+ int tokenUTF8Len = getLowerCaseUTF8Len(tokenLength);
+ StringUtils.writeUTF8Len(tokenUTF8Len, dos);
+ int pos = start;
+ for (int i = 0; i < tokenLength; i++) {
+ char c = StringUtils.toLowerCase(StringUtils.charAt(data, pos));
+ StringUtils.writeCharAsModifiedUTF8(c, dos);
+ pos += StringUtils.charSize(data, pos);
+ }
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/UTF8WordTokenFactory.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/UTF8WordTokenFactory.java
new file mode 100644
index 0000000..666f6bb
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/UTF8WordTokenFactory.java
@@ -0,0 +1,39 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+public class UTF8WordTokenFactory extends AbstractUTF8TokenFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ public UTF8WordTokenFactory() {
+ super();
+ }
+
+ public UTF8WordTokenFactory(byte tokenTypeTag, byte countTypeTag) {
+ super(tokenTypeTag, countTypeTag);
+ }
+
+ @Override
+ public IToken createToken() {
+ return new UTF8WordToken(tokenTypeTag, countTypeTag);
+ }
+
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/WordTokenizer.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/WordTokenizer.java
new file mode 100644
index 0000000..963ad33
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/WordTokenizer.java
@@ -0,0 +1,68 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+public class WordTokenizer implements Tokenizer {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ public static void main(String args[]) {
+ Tokenizer tokenizer = new WordTokenizer("_", '_');
+ String a = "hadoop_rocks_in_java";
+ System.out.println(a + ":" + tokenizer.tokenize(a));
+ }
+
+ private final String wordSeparator;
+ private final char tokenSeparator;
+
+ public WordTokenizer() {
+ this(" ", '_');
+ }
+
+ public WordTokenizer(String wordSeparator, char tokenSeparator) {
+ this.wordSeparator = wordSeparator;
+ this.tokenSeparator = tokenSeparator;
+ }
+
+ public List<String> tokenize(String input) {
+ final ArrayList<String> returnVect = new ArrayList<String>();
+ final HashMap<String, Integer> tokens = new HashMap<String, Integer>();
+ for (String term : input.split(wordSeparator)) {
+ if (term.length() == 0) {
+ continue;
+ }
+ Integer count = tokens.get(term);
+ if (count == null) {
+ count = 0;
+ }
+ count++;
+ tokens.put(term, count);
+ returnVect.add(term + tokenSeparator + count);
+ }
+ return returnVect;
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/WordTokenizerBuffered.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/WordTokenizerBuffered.java
new file mode 100644
index 0000000..5a64d24
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenizer/WordTokenizerBuffered.java
@@ -0,0 +1,92 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenizer;
+
+import edu.uci.ics.asterix.fuzzyjoin.IntArray;
+
+public class WordTokenizerBuffered implements TokenizerBuffered {
+
+ private final StringBuilder buffer;
+ private int index;
+ private final Token token;
+
+ private final IntArray tokensStart, tokensLength;
+
+ public WordTokenizerBuffered(StringBuilder buffer) {
+ this.buffer = buffer;
+ token = new Token();
+ tokensStart = new IntArray();
+ tokensLength = new IntArray();
+ reset();
+ }
+
+ @Override
+ public void advance() {
+ while (index < buffer.length() && TokenizerBufferedFactory.isSeparator(buffer.charAt(index))) {
+ index++;
+ }
+ int start = index;
+ while (index < buffer.length() && !TokenizerBufferedFactory.isSeparator(buffer.charAt(index))) {
+ buffer.setCharAt(index, Character.toLowerCase(buffer.charAt(index)));
+ index++;
+ }
+ int length = index - start;
+ int count = 1;
+ if (length > 0) {
+ // search if we got the same token before
+ for (int i = 0; i < tokensStart.length(); ++i) {
+ if (length == tokensLength.get(i)) {
+ int tokenStart = tokensStart.get(i);
+ count++; // assume we found it
+ for (int j = 0; j < length; ++j) {
+ if (buffer.charAt(start + j) != buffer.charAt(tokenStart + j)) {
+ count--; // token not found
+ break;
+ }
+ }
+ }
+ }
+ // add the new token to the list of seen tokens
+ tokensStart.add(start);
+ tokensLength.add(length);
+ }
+ // set token
+ token.set(buffer, start, length, count);
+ }
+
+ @Override
+ public boolean end() {
+ return token.length() <= 0;
+ }
+
+ @Override
+ public Token getToken() {
+ return token;
+ }
+
+ @Override
+ public void reset() {
+ index = 0;
+ tokensStart.reset();
+ tokensLength.reset();
+ advance();
+ }
+
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/IntTokenCountRank.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/IntTokenCountRank.java
new file mode 100644
index 0000000..d8a9185
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/IntTokenCountRank.java
@@ -0,0 +1,28 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenorder;
+
+import java.io.Serializable;
+
+public interface IntTokenCountRank extends Serializable {
+ public int add(int token, int count);
+
+ public int getRank(int token, int count);
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/IntTokenCountRankFrequency.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/IntTokenCountRankFrequency.java
new file mode 100644
index 0000000..acefb51
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/IntTokenCountRankFrequency.java
@@ -0,0 +1,58 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenorder;
+
+import java.util.HashMap;
+
+import edu.uci.ics.asterix.fuzzyjoin.IntPair;
+
+public class IntTokenCountRankFrequency implements IntTokenCountRank {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ private final HashMap<IntPair, Integer> ranksMap = new HashMap<IntPair, Integer>();
+ private final IntPair tmpPair = new IntPair();
+ private int crtRank = 0;
+
+ @Override
+ public int add(int token, int count) {
+ int prevRank = crtRank;
+ ranksMap.put(new IntPair(token, count), prevRank);
+ crtRank++;
+ return prevRank;
+ }
+
+ @Override
+ public int getRank(int token, int count) {
+ tmpPair.set(token, count);
+ Integer rank = ranksMap.get(tmpPair);
+ if (rank == null) {
+ return -1;
+ }
+ return rank;
+ }
+
+ @Override
+ public String toString() {
+ return "[" + crtRank + ",\n " + ranksMap + "\n]";
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/IntTokenRank.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/IntTokenRank.java
new file mode 100644
index 0000000..a70a4d9
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/IntTokenRank.java
@@ -0,0 +1,28 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenorder;
+
+import java.io.Serializable;
+
+public interface IntTokenRank extends Serializable {
+ public int add(int token);
+
+ public int getRank(int token);
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/IntTokenRankFrequency.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/IntTokenRankFrequency.java
new file mode 100644
index 0000000..da83a69
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/IntTokenRankFrequency.java
@@ -0,0 +1,54 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenorder;
+
+import java.util.HashMap;
+
+public class IntTokenRankFrequency implements IntTokenRank {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ private final HashMap<Integer, Integer> ranksMap = new HashMap<Integer, Integer>();
+ private int crtRank = 0;
+
+ @Override
+ public int add(int token) {
+ int prevRank = crtRank;
+ ranksMap.put(token, prevRank);
+ crtRank++;
+ return prevRank;
+ }
+
+ @Override
+ public int getRank(int token) {
+ Integer rank = ranksMap.get(token);
+ if (rank == null) {
+ return -1;
+ }
+ return rank;
+ }
+
+ @Override
+ public String toString() {
+ return "[" + crtRank + ",\n " + ranksMap + "\n]";
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/TokenLoad.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/TokenLoad.java
new file mode 100644
index 0000000..da6e47c
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/TokenLoad.java
@@ -0,0 +1,62 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenorder;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Serializable;
+
+import edu.uci.ics.asterix.fuzzyjoin.FuzzyJoinConfig;
+
+public class TokenLoad implements Serializable {
+ private final String path;
+ private final TokenRank rank;
+
+ public TokenLoad(String path, TokenRank rank) {
+ this.path = path;
+ this.rank = rank;
+ }
+
+ public void loadTokenRank() {
+ loadTokenRank(1);
+ }
+
+ public void loadTokenRank(int factor) {
+ try {
+ BufferedReader fis = new BufferedReader(
+ // new FileReader(path.toString())
+ new InputStreamReader(new FileInputStream(path), "UTF-8"));
+ String token = null;
+ while ((token = fis.readLine()) != null) {
+ rank.add(token);
+ // only used when increasing the token dictionary
+ for (int i = 1; i < factor; i++) {
+ // remove _COUNT at the end of the token (it is removed in
+ // the new records anyway)
+ rank.add(token.split(FuzzyJoinConfig.TOKEN_SEPARATOR_REGEX)[0] + i);
+ }
+ }
+ } catch (IOException ioe) {
+ throw new RuntimeException(ioe);
+ }
+ }
+}
\ No newline at end of file
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/TokenRank.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/TokenRank.java
new file mode 100644
index 0000000..6222bea
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/TokenRank.java
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenorder;
+
+import java.io.Serializable;
+import java.util.Collection;
+
+public interface TokenRank extends Serializable {
+ public int add(String token);
+
+ public Integer getRank(String token);
+
+ public Collection<Integer> getTokenRanks(Iterable<String> tokens);
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/TokenRankBufferedFrequency.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/TokenRankBufferedFrequency.java
new file mode 100644
index 0000000..ab7bf2f
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/TokenRankBufferedFrequency.java
@@ -0,0 +1,75 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenorder;
+
+import java.util.Collection;
+import java.util.HashMap;
+
+import edu.uci.ics.asterix.fuzzyjoin.tokenizer.Token;
+
+public class TokenRankBufferedFrequency implements TokenRank {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ private final HashMap<Token, Integer> ranksMap = new HashMap<Token, Integer>();
+ private int crtRank = 0;
+
+ public int add(String stringWithCount) {
+ int end = stringWithCount.lastIndexOf('_');
+ int count = 0;
+ for (int i = end + 1; i < stringWithCount.length(); ++i) {
+ count = count * 10 + (stringWithCount.charAt(i) - '0');
+ }
+ return add(stringWithCount.substring(0, end), count);
+ }
+
+ public int add(String string, int count) {
+ Token token = new Token(string, 0, string.length(), count);
+ return add(token);
+ }
+
+ public int add(Token token) {
+ int prevRank = crtRank;
+ ranksMap.put(token, prevRank);
+ crtRank++;
+ return prevRank;
+ }
+
+ @Override
+ public Integer getRank(String token) {
+ throw new UnsupportedOperationException();
+ }
+
+ public Integer getRank(Token token) {
+ return ranksMap.get(token);
+ }
+
+ @Override
+ public Collection<Integer> getTokenRanks(Iterable<String> tokens) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public String toString() {
+ return "[" + crtRank + ",\n " + ranksMap + "\n]";
+ }
+}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/TokenRankFrequency.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/TokenRankFrequency.java
new file mode 100644
index 0000000..0ae6c35
--- /dev/null
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/tokenorder/TokenRankFrequency.java
@@ -0,0 +1,61 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tokenorder;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.TreeSet;
+
+public class TokenRankFrequency implements TokenRank {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ private final HashMap<String, Integer> ranksMap = new HashMap<String, Integer>();
+ private int crtRank = 0;
+
+ public int add(String token) {
+ int prevRank = crtRank;
+ ranksMap.put(token, prevRank);
+ crtRank++;
+ return prevRank;
+ }
+
+ public Integer getRank(String token) {
+ return ranksMap.get(token);
+ }
+
+ public Collection<Integer> getTokenRanks(Iterable<String> tokens) {
+ TreeSet<Integer> ranksCol = new TreeSet<Integer>();
+ for (String token : tokens) {
+ Integer rank = getRank(token);
+ if (rank != null) {
+ ranksCol.add(rank);
+ }
+ }
+ return ranksCol;
+ }
+
+ @Override
+ public String toString() {
+ return "[" + crtRank + ",\n " + ranksMap + "\n]";
+ }
+}
diff --git a/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/FuzzyJoinTest.java b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/FuzzyJoinTest.java
new file mode 100644
index 0000000..f5a8cec9
--- /dev/null
+++ b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/FuzzyJoinTest.java
@@ -0,0 +1,65 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tests;
+
+import java.io.BufferedWriter;
+import java.io.FileWriter;
+import java.util.ArrayList;
+
+import org.junit.Test;
+
+import edu.uci.ics.asterix.fuzzyjoin.FuzzyJoinMemory;
+import edu.uci.ics.asterix.fuzzyjoin.ResultSelfJoin;
+import edu.uci.ics.asterix.fuzzyjoin.tests.dataset.AbstractDataset;
+import edu.uci.ics.asterix.fuzzyjoin.tests.dataset.AbstractDataset.Directory;
+import edu.uci.ics.asterix.fuzzyjoin.tests.dataset.DBLPSmallDataset;
+
+public class FuzzyJoinTest {
+
+ private static final AbstractDataset dataset = new DBLPSmallDataset();
+ private static final String base = "data/";
+
+ @Test
+ public void test() throws Exception {
+
+ ArrayList<int[]> records = new ArrayList<int[]>();
+ ArrayList<Integer> rids = new ArrayList<Integer>();
+ ArrayList<ResultSelfJoin> results = new ArrayList<ResultSelfJoin>();
+
+ dataset.createDirecotries(new String[] { base });
+
+ FuzzyJoinMemory fj = new FuzzyJoinMemory(dataset.getThreshold());
+
+ FuzzyJoinMemory.readRecords(base + dataset.getPathPart0(Directory.SSJOININ), records, rids);
+
+ for (int[] record : records) {
+ results.addAll(fj.selfJoinAndAddRecord(record));
+ }
+
+ BufferedWriter out = new BufferedWriter(new FileWriter(base + dataset.getPathPart0(Directory.SSJOINOUT)));
+ for (ResultSelfJoin result : results) {
+ out.write(String.format("%d %d %.3f\n", rids.get(result.indexX), rids.get(result.indexY), result.similarity));
+ }
+ out.close();
+
+ FuzzyJoinTestUtil.verifyDirectory(base + dataset.getPathPart0(Directory.SSJOINOUT),
+ base + dataset.getPathExpected(Directory.SSJOINOUT));
+ }
+}
diff --git a/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/FuzzyJoinTestUtil.java b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/FuzzyJoinTestUtil.java
new file mode 100644
index 0000000..db44850
--- /dev/null
+++ b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/FuzzyJoinTestUtil.java
@@ -0,0 +1,63 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tests;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.HashSet;
+
+import org.junit.Assert;
+
+public class FuzzyJoinTestUtil {
+
+ public static void verifyDirectory(String pathTest, String pathCorrect)
+ throws IOException {
+ verifyDirectory(pathTest, pathCorrect, false);
+ }
+
+ public static void verifyDirectory(String pathTest, String pathCorrect,
+ boolean noDup) throws IOException {
+ int countTest = 0, countTestDedup = 0, countCorrect = 0;
+
+ BufferedReader input;
+ String line;
+ HashSet<String> buffer = new HashSet<String>();
+
+ // buffer Test
+ input = new BufferedReader(new FileReader(pathTest));
+ while ((line = input.readLine()) != null) {
+ buffer.add(line);
+ countTest++;
+ }
+ countTestDedup = buffer.size();
+
+ // probe Correct
+ input = new BufferedReader(new FileReader(new File(pathCorrect)));
+ while ((line = input.readLine()) != null) {
+ Assert.assertTrue(buffer.contains(line));
+ countCorrect++;
+ }
+
+ // check counts
+ Assert.assertEquals(countTestDedup, countCorrect);
+ }
+}
diff --git a/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/NGramTokenizerTest.java b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/NGramTokenizerTest.java
new file mode 100644
index 0000000..e65bb25
--- /dev/null
+++ b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/NGramTokenizerTest.java
@@ -0,0 +1,239 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tests;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import edu.uci.ics.asterix.fuzzyjoin.tokenizer.AbstractUTF8Token;
+import edu.uci.ics.asterix.fuzzyjoin.tokenizer.HashedUTF8NGramTokenFactory;
+import edu.uci.ics.asterix.fuzzyjoin.tokenizer.IToken;
+import edu.uci.ics.asterix.fuzzyjoin.tokenizer.NGramUTF8StringBinaryTokenizer;
+import edu.uci.ics.asterix.fuzzyjoin.tokenizer.UTF8NGramTokenFactory;
+
+public class NGramTokenizerTest {
+
+ private char PRECHAR = '#';
+ private char POSTCHAR = '$';
+
+ private String str = "Jürgen S. Generic's Car";
+ private byte[] inputBuffer;
+
+ private int gramLength = 3;
+
+ private void getExpectedGrams(String s, int gramLength, ArrayList<String> grams, boolean prePost) {
+
+ String tmp = s.toLowerCase();
+ if (prePost) {
+ StringBuilder preBuilder = new StringBuilder();
+ for (int i = 0; i < gramLength - 1; i++) {
+ preBuilder.append(PRECHAR);
+ }
+ String pre = preBuilder.toString();
+
+ StringBuilder postBuilder = new StringBuilder();
+ for (int i = 0; i < gramLength - 1; i++) {
+ postBuilder.append(POSTCHAR);
+ }
+ String post = postBuilder.toString();
+
+ tmp = pre + s.toLowerCase() + post;
+ }
+
+ for (int i = 0; i < tmp.length() - gramLength + 1; i++) {
+ String gram = tmp.substring(i, i + gramLength);
+ grams.add(gram);
+ }
+ }
+
+ @Before
+ public void init() throws Exception {
+ // serialize string into bytes
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ DataOutput dos = new DataOutputStream(baos);
+ dos.writeUTF(str);
+ inputBuffer = baos.toByteArray();
+ }
+
+ void runTestNGramTokenizerWithCountedHashedUTF8Tokens(boolean prePost) throws IOException {
+ HashedUTF8NGramTokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
+ NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(gramLength, prePost, false,
+ false, tokenFactory);
+ tokenizer.reset(inputBuffer, 0, inputBuffer.length);
+
+ ArrayList<String> expectedGrams = new ArrayList<String>();
+ getExpectedGrams(str, gramLength, expectedGrams, prePost);
+ ArrayList<Integer> expectedHashedGrams = new ArrayList<Integer>();
+ HashMap<String, Integer> gramCounts = new HashMap<String, Integer>();
+ for (String s : expectedGrams) {
+ Integer count = gramCounts.get(s);
+ if (count == null) {
+ count = 1;
+ gramCounts.put(s, count);
+ } else {
+ count++;
+ }
+
+ int hash = tokenHash(s, count);
+ expectedHashedGrams.add(hash);
+ }
+
+ int tokenCount = 0;
+
+ while (tokenizer.hasNext()) {
+ tokenizer.next();
+
+ // serialize hashed token
+ ByteArrayOutputStream tokenBaos = new ByteArrayOutputStream();
+ DataOutput tokenDos = new DataOutputStream(tokenBaos);
+
+ IToken token = tokenizer.getToken();
+ token.serializeToken(tokenDos);
+
+ // deserialize token
+ ByteArrayInputStream bais = new ByteArrayInputStream(tokenBaos.toByteArray());
+ DataInput in = new DataInputStream(bais);
+
+ Integer hashedGram = in.readInt();
+
+ // System.out.println(hashedGram);
+
+ Assert.assertEquals(expectedHashedGrams.get(tokenCount), hashedGram);
+
+ tokenCount++;
+ }
+ // System.out.println("---------");
+ }
+
+ void runTestNGramTokenizerWithHashedUTF8Tokens(boolean prePost) throws IOException {
+ HashedUTF8NGramTokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
+ NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(gramLength, prePost, true, false,
+ tokenFactory);
+ tokenizer.reset(inputBuffer, 0, inputBuffer.length);
+
+ ArrayList<String> expectedGrams = new ArrayList<String>();
+ getExpectedGrams(str, gramLength, expectedGrams, prePost);
+ ArrayList<Integer> expectedHashedGrams = new ArrayList<Integer>();
+ for (String s : expectedGrams) {
+ int hash = tokenHash(s, 1);
+ expectedHashedGrams.add(hash);
+ }
+
+ int tokenCount = 0;
+
+ while (tokenizer.hasNext()) {
+ tokenizer.next();
+
+ // serialize hashed token
+ ByteArrayOutputStream tokenBaos = new ByteArrayOutputStream();
+ DataOutput tokenDos = new DataOutputStream(tokenBaos);
+
+ IToken token = tokenizer.getToken();
+ token.serializeToken(tokenDos);
+
+ // deserialize token
+ ByteArrayInputStream bais = new ByteArrayInputStream(tokenBaos.toByteArray());
+ DataInput in = new DataInputStream(bais);
+
+ Integer hashedGram = in.readInt();
+
+ // System.out.println(hashedGram);
+
+ Assert.assertEquals(expectedHashedGrams.get(tokenCount), hashedGram);
+
+ tokenCount++;
+ }
+ // System.out.println("---------");
+ }
+
+ void runTestNGramTokenizerWithUTF8Tokens(boolean prePost) throws IOException {
+ UTF8NGramTokenFactory tokenFactory = new UTF8NGramTokenFactory();
+ NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(gramLength, prePost, true, false,
+ tokenFactory);
+ tokenizer.reset(inputBuffer, 0, inputBuffer.length);
+
+ ArrayList<String> expectedGrams = new ArrayList<String>();
+ getExpectedGrams(str, gramLength, expectedGrams, prePost);
+
+ int tokenCount = 0;
+
+ while (tokenizer.hasNext()) {
+ tokenizer.next();
+
+ // serialize hashed token
+ ByteArrayOutputStream tokenBaos = new ByteArrayOutputStream();
+ DataOutput tokenDos = new DataOutputStream(tokenBaos);
+
+ IToken token = tokenizer.getToken();
+ token.serializeToken(tokenDos);
+
+ // deserialize token
+ ByteArrayInputStream bais = new ByteArrayInputStream(tokenBaos.toByteArray());
+ DataInput in = new DataInputStream(bais);
+
+ String strGram = in.readUTF();
+
+ // System.out.println("\"" + strGram + "\"");
+
+ Assert.assertEquals(expectedGrams.get(tokenCount), strGram);
+
+ tokenCount++;
+ }
+ // System.out.println("---------");
+ }
+
+ @Test
+ public void testNGramTokenizerWithCountedHashedUTF8Tokens() throws Exception {
+ runTestNGramTokenizerWithCountedHashedUTF8Tokens(false);
+ runTestNGramTokenizerWithCountedHashedUTF8Tokens(true);
+ }
+
+ @Test
+ public void testNGramTokenizerWithHashedUTF8Tokens() throws Exception {
+ runTestNGramTokenizerWithHashedUTF8Tokens(false);
+ runTestNGramTokenizerWithHashedUTF8Tokens(true);
+ }
+
+ @Test
+ public void testNGramTokenizerWithUTF8Tokens() throws IOException {
+ runTestNGramTokenizerWithUTF8Tokens(false);
+ runTestNGramTokenizerWithUTF8Tokens(true);
+ }
+
+ public int tokenHash(String token, int tokenCount) {
+ int h = AbstractUTF8Token.GOLDEN_RATIO_32;
+ for (int i = 0; i < token.length(); i++) {
+ h ^= token.charAt(i);
+ h *= AbstractUTF8Token.GOLDEN_RATIO_32;
+ }
+ return h + tokenCount;
+ }
+}
diff --git a/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/WordTokenizerTest.java b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/WordTokenizerTest.java
new file mode 100644
index 0000000..8fd05da
--- /dev/null
+++ b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/WordTokenizerTest.java
@@ -0,0 +1,214 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Alexander Behm <abehm (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tests;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import junit.framework.Assert;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import edu.uci.ics.asterix.fuzzyjoin.tokenizer.AbstractUTF8Token;
+import edu.uci.ics.asterix.fuzzyjoin.tokenizer.DelimitedUTF8StringBinaryTokenizer;
+import edu.uci.ics.asterix.fuzzyjoin.tokenizer.HashedUTF8WordTokenFactory;
+import edu.uci.ics.asterix.fuzzyjoin.tokenizer.IToken;
+import edu.uci.ics.asterix.fuzzyjoin.tokenizer.UTF8WordTokenFactory;
+
+public class WordTokenizerTest {
+
+ private String text = "Hello World, I would like to inform you of the importance of Foo Bar. Yes, Foo Bar. Jürgen.";
+ private byte[] inputBuffer;
+
+ private ArrayList<String> expectedUTF8Tokens = new ArrayList<String>();
+ private ArrayList<Integer> expectedHashedUTF8Tokens = new ArrayList<Integer>();
+ private ArrayList<Integer> expectedCountedHashedUTF8Tokens = new ArrayList<Integer>();
+
+ @Before
+ public void init() throws IOException {
+ // serialize text into bytes
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ DataOutput dos = new DataOutputStream(baos);
+ dos.writeUTF(text);
+ inputBuffer = baos.toByteArray();
+
+ // init expected string tokens
+ expectedUTF8Tokens.add("hello");
+ expectedUTF8Tokens.add("world");
+ expectedUTF8Tokens.add("i");
+ expectedUTF8Tokens.add("would");
+ expectedUTF8Tokens.add("like");
+ expectedUTF8Tokens.add("to");
+ expectedUTF8Tokens.add("inform");
+ expectedUTF8Tokens.add("you");
+ expectedUTF8Tokens.add("of");
+ expectedUTF8Tokens.add("the");
+ expectedUTF8Tokens.add("importance");
+ expectedUTF8Tokens.add("of");
+ expectedUTF8Tokens.add("foo");
+ expectedUTF8Tokens.add("bar");
+ expectedUTF8Tokens.add("yes");
+ expectedUTF8Tokens.add("foo");
+ expectedUTF8Tokens.add("bar");
+ expectedUTF8Tokens.add("jürgen");
+
+ // hashed tokens ignoring token count
+ for (int i = 0; i < expectedUTF8Tokens.size(); i++) {
+ int hash = tokenHash(expectedUTF8Tokens.get(i), 1);
+ expectedHashedUTF8Tokens.add(hash);
+ }
+
+ // hashed tokens using token count
+ HashMap<String, Integer> tokenCounts = new HashMap<String, Integer>();
+ for (int i = 0; i < expectedUTF8Tokens.size(); i++) {
+ Integer count = tokenCounts.get(expectedUTF8Tokens.get(i));
+ if (count == null) {
+ count = 1;
+ tokenCounts.put(expectedUTF8Tokens.get(i), count);
+ } else {
+ count++;
+ }
+
+ int hash = tokenHash(expectedUTF8Tokens.get(i), count);
+ expectedCountedHashedUTF8Tokens.add(hash);
+ }
+ }
+
+ @Test
+ public void testWordTokenizerWithCountedHashedUTF8Tokens() throws IOException {
+
+ HashedUTF8WordTokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
+ DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(false, false,
+ tokenFactory);
+
+ tokenizer.reset(inputBuffer, 0, inputBuffer.length);
+
+ int tokenCount = 0;
+
+ while (tokenizer.hasNext()) {
+ tokenizer.next();
+
+ // serialize token
+ ByteArrayOutputStream tokenBaos = new ByteArrayOutputStream();
+ DataOutput tokenDos = new DataOutputStream(tokenBaos);
+
+ IToken token = tokenizer.getToken();
+ token.serializeToken(tokenDos);
+
+ // deserialize token
+ ByteArrayInputStream bais = new ByteArrayInputStream(tokenBaos.toByteArray());
+ DataInput in = new DataInputStream(bais);
+
+ Integer hashedToken = in.readInt();
+
+ // System.out.println(hashedToken);
+
+ Assert.assertEquals(hashedToken, expectedCountedHashedUTF8Tokens.get(tokenCount));
+
+ tokenCount++;
+ }
+ }
+
+ @Test
+ public void testWordTokenizerWithHashedUTF8Tokens() throws IOException {
+
+ HashedUTF8WordTokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
+ DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false, tokenFactory);
+
+ tokenizer.reset(inputBuffer, 0, inputBuffer.length);
+
+ int tokenCount = 0;
+
+ while (tokenizer.hasNext()) {
+ tokenizer.next();
+
+ // serialize token
+ ByteArrayOutputStream tokenBaos = new ByteArrayOutputStream();
+ DataOutput tokenDos = new DataOutputStream(tokenBaos);
+
+ IToken token = tokenizer.getToken();
+ token.serializeToken(tokenDos);
+
+ // deserialize token
+ ByteArrayInputStream bais = new ByteArrayInputStream(tokenBaos.toByteArray());
+ DataInput in = new DataInputStream(bais);
+
+ Integer hashedToken = in.readInt();
+
+ // System.out.println(hashedToken);
+
+ Assert.assertEquals(expectedHashedUTF8Tokens.get(tokenCount), hashedToken);
+
+ tokenCount++;
+ }
+ }
+
+ @Test
+ public void testWordTokenizerWithUTF8Tokens() throws IOException {
+
+ UTF8WordTokenFactory tokenFactory = new UTF8WordTokenFactory();
+ DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false, tokenFactory);
+
+ tokenizer.reset(inputBuffer, 0, inputBuffer.length);
+
+ int tokenCount = 0;
+
+ while (tokenizer.hasNext()) {
+ tokenizer.next();
+
+ // serialize hashed token
+ ByteArrayOutputStream tokenBaos = new ByteArrayOutputStream();
+ DataOutput tokenDos = new DataOutputStream(tokenBaos);
+
+ IToken token = tokenizer.getToken();
+ token.serializeToken(tokenDos);
+
+ // deserialize token
+ ByteArrayInputStream bais = new ByteArrayInputStream(tokenBaos.toByteArray());
+ DataInput in = new DataInputStream(bais);
+
+ String strToken = in.readUTF();
+
+ // System.out.println(strToken);
+
+ Assert.assertEquals(expectedUTF8Tokens.get(tokenCount), strToken);
+
+ tokenCount++;
+ }
+ }
+
+ // JAQL
+ public int tokenHash(String token, int tokenCount) {
+ int h = AbstractUTF8Token.GOLDEN_RATIO_32;
+ for (int i = 0; i < token.length(); i++) {
+ h ^= token.charAt(i);
+ h *= AbstractUTF8Token.GOLDEN_RATIO_32;
+ }
+ return h + tokenCount;
+ }
+}
diff --git a/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/AbstractDataset.java b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/AbstractDataset.java
new file mode 100644
index 0000000..5ca6c6d
--- /dev/null
+++ b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/AbstractDataset.java
@@ -0,0 +1,158 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tests.dataset;
+
+import java.io.File;
+import java.util.NoSuchElementException;
+
+public abstract class AbstractDataset {
+ public static enum Directory {
+ RAW_R,
+ RAW_S,
+ RECORDPAIRS,
+ RECORDS_R,
+ RECORDS_S,
+ RECORDSBULK_R,
+ RECORDSBULK_S,
+ RIDPAIRS,
+ SSJOININ,
+ SSJOINOUT,
+ TOKENS,
+ TOKENS_R,
+ TOKENS_R_AQL,
+ }
+
+ public static enum Relation {
+ R, S,
+ }
+
+ public static final String FILE_PART = "part-";
+ public static final String FILE_PART0 = FILE_PART + "00000";
+ public static final String FILE_EXPECTED = "expected.txt";
+ public static final String AQL = "aql";
+
+ public static final String PATH_RAW = "raw";
+ public static final String PATH_RECORDPAIRS = "recordpairs";
+ public static final String PATH_RECORDS = "records";
+ public static final String PATH_RECORDSBULK = "recordsbulk";
+ public static final String PATH_RIDPAIRS = "ridpairs";
+ public static final String PATH_SSJOININ = "ssjoin.in";
+ public static final String PATH_SSJOINOUT = "ssjoin.out";
+ public static final String PATH_TOKENS = "tokens";
+
+ public static final String DIRECTORY_ID_FORMAT = "%03d";
+
+ public void createDirecotries(String[] paths) {
+ createDirecotries(paths, 0);
+ }
+
+ public void createDirecotries(String[] paths, int crtCopy) {
+ (new File(paths[0] + getPathDirecotry(Directory.SSJOINOUT, 0))).mkdir();
+ (new File(paths[0] + getPathDirecotry(Directory.RECORDSBULK_R, crtCopy))).mkdir();
+ (new File(paths[0] + getPathDirecotry(Directory.RECORDSBULK_S, crtCopy))).mkdir();
+ (new File(paths[0] + getPathDirecotry(Directory.RECORDS_R, crtCopy))).mkdir();
+ (new File(paths[0] + getPathDirecotry(Directory.RECORDS_S, crtCopy))).mkdir();
+ (new File(paths[0] + getPathDirecotry(Directory.TOKENS, crtCopy))).mkdir();
+ (new File(paths[0] + getPathDirecotry(Directory.TOKENS_R, crtCopy))).mkdir();
+ (new File(paths[0] + getPathDirecotry(Directory.TOKENS_R_AQL, crtCopy))).mkdir();
+ }
+
+ public abstract String getName();
+
+ public abstract int getNoRecords();
+
+ public abstract String getPath();
+
+ public String getPathDirecotry(Directory directory, int crtCopy) {
+ return getPathDirectory(getPath(), directory, crtCopy);
+ }
+
+ private String getPathDirectory(Directory directory, int crtCopy, boolean expected) {
+ return getPathDirectory(getName() + (expected ? ".expected" : ""), directory, crtCopy);
+ }
+
+ public String getPathDirectory(String path, Directory directory, int crtCopy) {
+ path += '/';
+ switch (directory) {
+ case SSJOININ:
+ path += AbstractDataset.PATH_SSJOININ;
+ break;
+ case SSJOINOUT:
+ path += AbstractDataset.PATH_SSJOINOUT;
+ break;
+ case RAW_R:
+ path += AbstractDataset.PATH_RAW + "." + getSuffix(Relation.R);
+ break;
+ case RAW_S:
+ path += AbstractDataset.PATH_RAW + "." + getSuffix(Relation.S);
+ break;
+ case RECORDSBULK_R:
+ path += AbstractDataset.PATH_RECORDSBULK + "." + getSuffix(Relation.R);
+ break;
+ case RECORDSBULK_S:
+ path += AbstractDataset.PATH_RECORDSBULK + "." + getSuffix(Relation.S);
+ break;
+ case RECORDS_R:
+ path += AbstractDataset.PATH_RECORDS + "." + getSuffix(Relation.R);
+ break;
+ case RECORDS_S:
+ path += AbstractDataset.PATH_RECORDS + "." + getSuffix(Relation.S);
+ break;
+ case TOKENS:
+ path += AbstractDataset.PATH_TOKENS;
+ break;
+ case TOKENS_R:
+ path += AbstractDataset.PATH_TOKENS + "." + getSuffix(Relation.R);
+ break;
+ case TOKENS_R_AQL:
+ path += AbstractDataset.PATH_TOKENS + "." + getSuffix(Relation.R) + "." + AQL;
+ break;
+ case RIDPAIRS:
+ path += AbstractDataset.PATH_RIDPAIRS;
+ break;
+ case RECORDPAIRS:
+ path += AbstractDataset.PATH_RECORDPAIRS;
+ break;
+ default:
+ throw new NoSuchElementException();
+ }
+ return path + "-" + String.format(DIRECTORY_ID_FORMAT, crtCopy);
+ }
+
+ public String getPathExpected(Directory directory) {
+ return getPathDirectory(directory, 0, true) + '/' + FILE_EXPECTED;
+ }
+
+ public String getPathPart(Directory directory, int crtCopy) {
+ return getPathDirecotry(directory, crtCopy) + '/' + FILE_PART;
+ }
+
+ public String getPathPart0(Directory directory) {
+ return getPathDirectory(directory, 0, false) + '/' + FILE_PART0;
+ }
+
+ public String getPathPart0(Directory directory, boolean expected) {
+ return getPathDirectory(directory, 0, expected) + '/' + (expected ? FILE_EXPECTED : FILE_PART0);
+ }
+
+ public abstract String getSuffix(Relation relation);
+
+ public abstract float getThreshold();
+}
diff --git a/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/AbstractTokenizableDataset.java b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/AbstractTokenizableDataset.java
new file mode 100644
index 0000000..5333cad
--- /dev/null
+++ b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/AbstractTokenizableDataset.java
@@ -0,0 +1,5 @@
+package edu.uci.ics.asterix.fuzzyjoin.tests.dataset;
+
+public abstract class AbstractTokenizableDataset extends AbstractDataset {
+ public abstract String getRecordData();
+}
diff --git a/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/DBLPDataset.java b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/DBLPDataset.java
new file mode 100644
index 0000000..3783829
--- /dev/null
+++ b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/DBLPDataset.java
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tests.dataset;
+
+
+public class DBLPDataset extends PublicationsDataset {
+ private static final String NAME = "dblp";
+ private static final int NO_RECORDS = 1268017;
+ private static final float THRESHOLD = .8f;
+ private static final String RECORD_DATA = "2,3";
+
+ public DBLPDataset() {
+ super(NAME, NO_RECORDS, THRESHOLD, RECORD_DATA, NAME, NAME);
+ }
+
+ public DBLPDataset(String recordData) {
+ super(NAME, NO_RECORDS, THRESHOLD, recordData, NAME, NAME);
+ }
+}
diff --git a/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/DBLPSmallDataset.java b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/DBLPSmallDataset.java
new file mode 100644
index 0000000..5eaebd2
--- /dev/null
+++ b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/DBLPSmallDataset.java
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tests.dataset;
+
+public class DBLPSmallDataset extends PublicationsDataset {
+ public DBLPSmallDataset() {
+ super("dblp-small", 100, .5f, "2,3", "dblp", "dblp");
+ }
+}
diff --git a/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/IntArrayBagSmallDataset.java b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/IntArrayBagSmallDataset.java
new file mode 100644
index 0000000..38aad37
--- /dev/null
+++ b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/IntArrayBagSmallDataset.java
@@ -0,0 +1,56 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tests.dataset;
+
+public class IntArrayBagSmallDataset extends AbstractDataset {
+ private final int NO_RECORDS = 4;
+ private final String NAME = "intarray-bag-small";
+ private final String PATH = NAME;
+ private final float THRESHOLD = .5f;
+
+ public IntArrayBagSmallDataset() {
+ }
+
+ @Override
+ public String getName() {
+ return NAME;
+ }
+
+ @Override
+ public int getNoRecords() {
+ return NO_RECORDS;
+ }
+
+ @Override
+ public String getPath() {
+ return PATH;
+ }
+
+ @Override
+ public String getSuffix(Relation relation) {
+ return "r";
+ }
+
+ @Override
+ public float getThreshold() {
+ return THRESHOLD;
+ }
+
+}
diff --git a/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/IntArraySetSmallDataset.java b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/IntArraySetSmallDataset.java
new file mode 100644
index 0000000..7c8c80d
--- /dev/null
+++ b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/IntArraySetSmallDataset.java
@@ -0,0 +1,56 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tests.dataset;
+
+public class IntArraySetSmallDataset extends AbstractDataset {
+ private final int NO_RECORDS = 4;
+ private final String NAME = "intarray-set-small";
+ private final String PATH = NAME;
+ private final float THRESHOLD = .5f;
+
+ public IntArraySetSmallDataset() {
+ }
+
+ @Override
+ public String getName() {
+ return NAME;
+ }
+
+ @Override
+ public int getNoRecords() {
+ return NO_RECORDS;
+ }
+
+ @Override
+ public String getPath() {
+ return PATH;
+ }
+
+ @Override
+ public String getSuffix(Relation relation) {
+ return "r";
+ }
+
+ @Override
+ public float getThreshold() {
+ return THRESHOLD;
+ }
+
+}
diff --git a/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/PUBDataset.java b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/PUBDataset.java
new file mode 100644
index 0000000..e8c2f2a
--- /dev/null
+++ b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/PUBDataset.java
@@ -0,0 +1,41 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tests.dataset;
+
+public class PUBDataset extends PublicationsDataset {
+ private static final String DBLP_SUFFIX = "dblp";
+ private static final String CSX_SUFFIX = "csx";
+ private static final String NAME = "pub";
+ private static final int NO_RECORDS = 1385532;
+ private static final float THRESHOLD = .8f;
+ private static final String RECORD_DATA = "2,3";
+
+ public PUBDataset() {
+ super(NAME, NO_RECORDS, THRESHOLD, RECORD_DATA, DBLP_SUFFIX, CSX_SUFFIX);
+ }
+
+ public PUBDataset(float threshold) {
+ super(NAME, NO_RECORDS, threshold, RECORD_DATA, DBLP_SUFFIX, CSX_SUFFIX);
+ }
+
+ public PUBDataset(float threshold, String recordData) {
+ super(NAME, NO_RECORDS, threshold, recordData, DBLP_SUFFIX, CSX_SUFFIX);
+ }
+}
diff --git a/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/PUBSmallDataset.java b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/PUBSmallDataset.java
new file mode 100644
index 0000000..eed28e4
--- /dev/null
+++ b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/PUBSmallDataset.java
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tests.dataset;
+
+public class PUBSmallDataset extends PublicationsDataset {
+ public PUBSmallDataset() {
+ super("pub-small", 100, .5f, "2,3", "dblp", "csx");
+ }
+}
diff --git a/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/PublicationsDataset.java b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/PublicationsDataset.java
new file mode 100644
index 0000000..e1653cd
--- /dev/null
+++ b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/PublicationsDataset.java
@@ -0,0 +1,80 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tests.dataset;
+
+import java.util.NoSuchElementException;
+
+public class PublicationsDataset extends AbstractTokenizableDataset {
+ protected final String name;
+ protected final String path;
+ protected final int noRecords;
+ protected final float threshold;
+ protected final String recordData;
+ protected final String rSuffix, sSuffix;
+
+ public PublicationsDataset(String name, int noRecords, float threshold, String recordData, String rSuffix,
+ String sSuffix) {
+ this.name = name;
+ this.noRecords = noRecords;
+ this.threshold = threshold;
+ this.recordData = recordData;
+ this.rSuffix = rSuffix;
+ this.sSuffix = sSuffix;
+
+ path = name;
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ @Override
+ public int getNoRecords() {
+ return noRecords;
+ }
+
+ @Override
+ public String getPath() {
+ return path;
+ }
+
+ @Override
+ public String getRecordData() {
+ return recordData;
+ }
+
+ @Override
+ public String getSuffix(Relation relation) {
+ switch (relation) {
+ case R:
+ return rSuffix;
+ case S:
+ return sSuffix;
+ default:
+ throw new NoSuchElementException();
+ }
+ }
+
+ @Override
+ public float getThreshold() {
+ return threshold;
+ }
+}
diff --git a/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/UsersVisitorsSmallDataset.java b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/UsersVisitorsSmallDataset.java
new file mode 100644
index 0000000..6463b2d
--- /dev/null
+++ b/asterix-fuzzyjoin/src/test/java/edu/uci/ics/asterix/fuzzyjoin/tests/dataset/UsersVisitorsSmallDataset.java
@@ -0,0 +1,67 @@
+/**
+ * Copyright 2010-2011 The Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on
+ * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under
+ * the License.
+ *
+ * Author: Rares Vernica <rares (at) ics.uci.edu>
+ */
+
+package edu.uci.ics.asterix.fuzzyjoin.tests.dataset;
+
+import java.util.NoSuchElementException;
+
+public class UsersVisitorsSmallDataset extends AbstractDataset {
+ private final int NO_RECORDS = 4;
+ private final String NAME = "users-visitors-small";
+ private static final String USERS_SUFFIX = "users";
+ private static final String VISITORS_SUFFIX = "visitors";
+ private final String PATH = NAME;
+ private final float THRESHOLD = .5f;
+
+ public UsersVisitorsSmallDataset() {
+ }
+
+ @Override
+ public String getName() {
+ return NAME;
+ }
+
+ @Override
+ public int getNoRecords() {
+ return NO_RECORDS;
+ }
+
+ @Override
+ public String getPath() {
+ return PATH;
+ }
+
+ @Override
+ public String getSuffix(Relation relation) {
+ switch (relation) {
+ case R:
+ return USERS_SUFFIX;
+ case S:
+ return VISITORS_SUFFIX;
+ default:
+ throw new NoSuchElementException();
+ }
+ }
+
+ @Override
+ public float getThreshold() {
+ return THRESHOLD;
+ }
+
+}
diff --git a/asterix-fuzzyjoin/src/test/scripts/conf.sh b/asterix-fuzzyjoin/src/test/scripts/conf.sh
new file mode 100644
index 0000000..45e962c
--- /dev/null
+++ b/asterix-fuzzyjoin/src/test/scripts/conf.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#
+# Copyright 2010-2011 The Regents of the University of California
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you
+# may not use this file except in compliance with the License. You
+# may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS"; BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+#
+# Author: Rares Vernica <rares (at) ics.uci.edu>
+
+### http://www.cse.unsw.edu.au/~weiw/project/simjoin.html
+SSJOIN=/home/rares/workspace/ssjoin-bin
+DATA=../data
+
+IN=ssjoin.in-000
+OUT=ssjoin.out-000
diff --git a/asterix-fuzzyjoin/src/test/scripts/fuzzyjoin.sh b/asterix-fuzzyjoin/src/test/scripts/fuzzyjoin.sh
new file mode 100755
index 0000000..0cd6ccc
--- /dev/null
+++ b/asterix-fuzzyjoin/src/test/scripts/fuzzyjoin.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+#
+# Copyright 2010-2011 The Regents of the University of California
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you
+# may not use this file except in compliance with the License. You
+# may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS"; BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+#
+# Author: Rares Vernica <rares (at) ics.uci.edu>
+
+DIR=`dirname $0`; if [ "${DIR:0:1}" == "." ]; then DIR=`pwd`"${DIR:1}"; fi
+source $DIR/conf.sh
+
+ARGS=1 # Required number of arguments
+E_BADARGS=85 # Wrong number of arguments passed to script.
+if [ $# -lt "$ARGS" ]
+then
+ echo "Usage: `basename $0` dataset"
+ echo "Example: `basename $0` dblp-small"
+ exit $E_BADARGS
+fi
+
+THR="0.80"
+if [ "$1" == "dblp-small" ]; then
+ THR="0.50"
+fi
+
+
+mkdir $DATA/$1.expected/$OUT
+$SSJOIN/ppjoinplus j $THR $DATA/$1/$IN/part-00000 | \
+ sed 's/0\.812/0\.813/' | \
+ sort > $DATA/$1.expected/$OUT/expected.txt
+
+mkdir $DATA/$1/$OUT
+java \
+ -Xmx2g \
+ -jar $DIR/../../../target/fuzzyjoin-core-0.0.1.jar \
+ $THR $DATA/$1/$IN/part-00000 | \
+ sort > $DATA/$1/$OUT/part-00000
+
+diff $DATA/$1.expected/$OUT/expected.txt $DATA/$1/$OUT/part-00000
diff --git a/asterix-fuzzyjoin/src/test/scripts/inmemory.sh b/asterix-fuzzyjoin/src/test/scripts/inmemory.sh
new file mode 100755
index 0000000..c9394d4
--- /dev/null
+++ b/asterix-fuzzyjoin/src/test/scripts/inmemory.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+#
+# Copyright 2010-2011 The Regents of the University of California
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you
+# may not use this file except in compliance with the License. You
+# may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS"; BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+#
+# Author: Rares Vernica <rares (at) ics.uci.edu>
+
+DATA="/home/rares/data/fuzzyjoin/dblp/records-024"
+FUZZYJOIN="/home/rares/fuzzyjoin/fuzzyjoin-core/target/fuzzyjoin-core-0.0.2-SNAPSHOT.jar"
+
+echo "-- - Step 0: Project and append length - --"
+
+# java -cp $FUZZYJOIN edu.uci.ics.fuzzyjoin.FuzzyJoinAppendLength $DATA/part-00000 $DATA/part-00000-len
+
+date
+
+echo "== START =="
+
+echo "-- - Step 1: Sort by length - --"
+
+# time sort -n -k 5 -t ":" $DATA/part-00000-len > $DATA/part-00000-len-sorted
+
+echo "-- - Step 2: Tokenize - --"
+
+# time java -cp $FUZZYJOIN edu.uci.ics.fuzzyjoin.FuzzyJoinTokenize $DATA/part-00000-len-sorted $DATA/part-00000-tokens $DATA/part-00000-tokenized
+
+echo "-- - Step 3: RID pairs - --"
+
+time java -Xmx8g -cp $FUZZYJOIN edu.uci.ics.fuzzyjoin.FuzzyJoinMemory .8 $DATA/part-00000-tokenized > $DATA/part-00000-ridpairs
+
+echo "== END =="
+
+date
+
+
+### SSJoin ###
+# cut -d ":" -f 3,4 records-000/part-0000* > ! ssjoin.raw-000/part-00000
+# ~/workspace/ssjoin-bin/txtformat ssjoin.raw-000/part-00000 ssjoin.norm-000/part-00000 l
+# sed 's/_\+/ /g' ssjoin.norm-000/part-00000 > ! ssjoin.space-000/part-00000
+# ~/workspace/ssjoin-bin/tokenizer ssjoin.space-000/part-00000
+# ~/workspace/ssjoin-bin/ppjoinplus j .8 ssjoin.space-000/part-00000.bin > /dev/null
+# java -jar /fuzzyjoin/fuzzyjoin-core/target/fuzzyjoin-core-0.0.2-SNAPSHOT.jar .8 ssjoin.space-000/part-00000.bin > /dev/null
diff --git a/asterix-fuzzyjoin/src/test/scripts/tokenize.sh b/asterix-fuzzyjoin/src/test/scripts/tokenize.sh
new file mode 100755
index 0000000..5498d44
--- /dev/null
+++ b/asterix-fuzzyjoin/src/test/scripts/tokenize.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+#
+# Copyright 2010-2011 The Regents of the University of California
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you
+# may not use this file except in compliance with the License. You
+# may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS"; BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+#
+# Author: Rares Vernica <rares (at) ics.uci.edu>
+
+DIR=`dirname $0`; if [ "${DIR:0:1}" == "." ]; then DIR=`pwd`"${DIR:1}"; fi
+source $DIR/conf.sh
+
+ARGS=1 # Required number of arguments
+E_BADARGS=85 # Wrong number of arguments passed to script.
+if [ $# -lt "$ARGS" ]
+then
+ echo "Usage: `basename $0` dataset"
+ echo "Example: `basename $0` dblp-small"
+ exit $E_BADARGS
+fi
+
+$SSJOIN/tokenizer $DATA/$1/raw-000/part-00000 $2
+mkdir $DATA/$1/$IN
+mv $DATA/$1/raw-000/part-00000.bin $DATA/$1/$IN/part-00000
+
diff --git a/asterix-runtime/pom.xml b/asterix-runtime/pom.xml
index e5fe2f0..06ac7fa 100644
--- a/asterix-runtime/pom.xml
+++ b/asterix-runtime/pom.xml
@@ -137,9 +137,9 @@
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>edu.uci.ics.fuzzyjoin</groupId>
- <artifactId>fuzzyjoin-core</artifactId>
- <version>0.0.3</version>
+ <groupId>edu.uci.ics.asterix</groupId>
+ <artifactId>asterix-fuzzyjoin</artifactId>
+ <version>0.8.1-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AbstractAsterixListIterator.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AbstractAsterixListIterator.java
index d8676c0..56e5139 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AbstractAsterixListIterator.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AbstractAsterixListIterator.java
@@ -19,7 +19,7 @@
import edu.uci.ics.asterix.formats.nontagged.AqlBinaryComparatorFactoryProvider;
import edu.uci.ics.asterix.om.types.ATypeTag;
import edu.uci.ics.asterix.om.types.EnumDeserializer;
-import edu.uci.ics.fuzzyjoin.similarity.IListIterator;
+import edu.uci.ics.asterix.fuzzyjoin.similarity.IListIterator;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
public abstract class AbstractAsterixListIterator implements IListIterator {
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/EditDistanceEvaluator.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/EditDistanceEvaluator.java
index 72faaf1..0a5f251 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/EditDistanceEvaluator.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/EditDistanceEvaluator.java
@@ -23,7 +23,7 @@
import edu.uci.ics.asterix.om.types.ATypeTag;
import edu.uci.ics.asterix.om.types.BuiltinType;
import edu.uci.ics.asterix.om.types.EnumDeserializer;
-import edu.uci.ics.fuzzyjoin.similarity.SimilarityMetricEditDistance;
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityMetricEditDistance;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityFiltersCache.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityFiltersCache.java
index 5a857bf..7265ebb 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityFiltersCache.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityFiltersCache.java
@@ -18,8 +18,8 @@
import java.nio.ByteBuffer;
import java.util.Arrays;
-import edu.uci.ics.fuzzyjoin.similarity.SimilarityFilters;
-import edu.uci.ics.fuzzyjoin.similarity.SimilarityFiltersFactory;
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityFilters;
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityFiltersFactory;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardPrefixEvaluator.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardPrefixEvaluator.java
index 393f32b..b9b7dfa 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardPrefixEvaluator.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardPrefixEvaluator.java
@@ -27,10 +27,10 @@
import edu.uci.ics.asterix.om.types.ATypeTag;
import edu.uci.ics.asterix.om.types.BuiltinType;
import edu.uci.ics.asterix.om.types.EnumDeserializer;
-import edu.uci.ics.fuzzyjoin.IntArray;
-import edu.uci.ics.fuzzyjoin.similarity.PartialIntersect;
-import edu.uci.ics.fuzzyjoin.similarity.SimilarityFiltersJaccard;
-import edu.uci.ics.fuzzyjoin.similarity.SimilarityMetric;
+import edu.uci.ics.asterix.fuzzyjoin.IntArray;
+import edu.uci.ics.asterix.fuzzyjoin.similarity.PartialIntersect;
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityFiltersJaccard;
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityMetric;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardSortedCheckEvaluator.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardSortedCheckEvaluator.java
index 11f1e3d..15f788c 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardSortedCheckEvaluator.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardSortedCheckEvaluator.java
@@ -15,7 +15,7 @@
package edu.uci.ics.asterix.runtime.evaluators.common;
import edu.uci.ics.asterix.om.types.ATypeTag;
-import edu.uci.ics.fuzzyjoin.similarity.SimilarityMetricJaccard;
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityMetricJaccard;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardSortedEvaluator.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardSortedEvaluator.java
index 67b9ccc..079df7a 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardSortedEvaluator.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardSortedEvaluator.java
@@ -15,7 +15,7 @@
package edu.uci.ics.asterix.runtime.evaluators.common;
import edu.uci.ics.asterix.om.types.ATypeTag;
-import edu.uci.ics.fuzzyjoin.similarity.SimilarityMetricJaccard;
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityMetricJaccard;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/PrefixLenDescriptor.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/PrefixLenDescriptor.java
index d803072..14657e9 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/PrefixLenDescriptor.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/PrefixLenDescriptor.java
@@ -29,7 +29,7 @@
import edu.uci.ics.asterix.om.types.EnumDeserializer;
import edu.uci.ics.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
import edu.uci.ics.asterix.runtime.evaluators.common.SimilarityFiltersCache;
-import edu.uci.ics.fuzzyjoin.similarity.SimilarityFilters;
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityFilters;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/PrefixLenJaccardDescriptor.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/PrefixLenJaccardDescriptor.java
index 0232f66..eba03e2 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/PrefixLenJaccardDescriptor.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/PrefixLenJaccardDescriptor.java
@@ -28,7 +28,7 @@
import edu.uci.ics.asterix.om.types.BuiltinType;
import edu.uci.ics.asterix.om.types.EnumDeserializer;
import edu.uci.ics.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
-import edu.uci.ics.fuzzyjoin.similarity.SimilarityFiltersJaccard;
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityFiltersJaccard;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/SimilarityDescriptor.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/SimilarityDescriptor.java
index 6ad7b4e..1fa2e53 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/SimilarityDescriptor.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/SimilarityDescriptor.java
@@ -32,10 +32,10 @@
import edu.uci.ics.asterix.om.types.EnumDeserializer;
import edu.uci.ics.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
import edu.uci.ics.asterix.runtime.evaluators.common.SimilarityFiltersCache;
-import edu.uci.ics.fuzzyjoin.IntArray;
-import edu.uci.ics.fuzzyjoin.similarity.PartialIntersect;
-import edu.uci.ics.fuzzyjoin.similarity.SimilarityFilters;
-import edu.uci.ics.fuzzyjoin.similarity.SimilarityMetric;
+import edu.uci.ics.asterix.fuzzyjoin.IntArray;
+import edu.uci.ics.asterix.fuzzyjoin.similarity.PartialIntersect;
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityFilters;
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityMetric;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/SpatialIntersectDescriptor.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/SpatialIntersectDescriptor.java
index 9ebefd7..13f8d23 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/SpatialIntersectDescriptor.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/SpatialIntersectDescriptor.java
@@ -35,7 +35,7 @@
import edu.uci.ics.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
import edu.uci.ics.asterix.runtime.evaluators.common.DoubleArray;
import edu.uci.ics.asterix.runtime.evaluators.common.SpatialUtils;
-import edu.uci.ics.fuzzyjoin.IntArray;
+import edu.uci.ics.asterix.fuzzyjoin.IntArray;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.common.exceptions.NotImplementedException;
import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
diff --git a/pom.xml b/pom.xml
index 8a78880..c0dfb4e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -99,6 +99,7 @@
<module>asterix-installer</module>
<module>asterix-events</module>
<module>asterix-doc</module>
+ <module>asterix-fuzzyjoin</module>
</modules>
<repositories>