[ASTERIXDB-2773] Support inverted index with var-len pk
Currently, the inverted index is limited to the dataset with a fixed-length primary key (e.g. int), and this PR aims to support the inverted index with variable-length primary key cases.
Change-Id: I9ec3100615585be952806098180e92a0d1f87c0a
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/7804
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Ian Maxon <imaxon@uci.edu>
diff --git a/asterixdb/asterix-app/data/dblp-small/dblp-small-nulls-string-as-primary-key.adm b/asterixdb/asterix-app/data/dblp-small/dblp-small-nulls-string-as-primary-key.adm
new file mode 100644
index 0000000..b092b64
--- /dev/null
+++ b/asterixdb/asterix-app/data/dblp-small/dblp-small-nulls-string-as-primary-key.adm
@@ -0,0 +1,100 @@
+{ "id": "2", "dblpid": "books/acm/kim95/Blakeley95", "title": "OQL[C++] Extending C++ with an Object Query Capability.", "authors": "José A. Blakeley", "misc": "2002-01-03 69-88 Modern Database Systems db/books/collections/kim95.html#Blakeley95 1995" }
+{ "id": "4", "dblpid": "books/acm/kim95/ChristodoulakisK95", "title": "Multimedia Information Systems Issues and Approaches.", "authors": "Stavros Christodoulakis Leonidas Koveos", "misc": "2002-01-03 318-337 1995 Modern Database Systems db/books/collections/kim95.html#ChristodoulakisK95" }
+{ "id": "6", "dblpid": "books/acm/kim95/DittrichD95", "title": "Where Object-Oriented DBMSs Should Do Better A Critique Based on Early Experiences.", "authors": "Angelika Kotz Dittrich Klaus R. Dittrich", "misc": "2002-01-03 238-254 1995 Modern Database Systems db/books/collections/kim95.html#DittrichD95" }
+{ "id": "8", "dblpid": "books/acm/kim95/Goodman95", "title": "An Object-Oriented DBMS War Story Developing a Genome Mapping Database in C++.", "authors": "Nathan Goodman", "misc": "2002-01-03 216-237 1995 Modern Database Systems db/books/collections/kim95.html#Goodman95" }
+{ "id": "10", "dblpid": "books/acm/kim95/KelleyGKRG95", "title": "Schema Architecture of the UniSQL/M Multidatabase System", "authors": "William Kelley Sunit K. Gala Won Kim Tom C. Reyes Bruce Graham", "misc": "2004-03-08 Modern Database Systems books/acm/Kim95 621-648 1995 db/books/collections/kim95.html#KelleyGKRG95" }
+{ "id": "12", "dblpid": "books/acm/kim95/Kim95", "authors": "Won Kim", "misc": "2002-01-03 5-17 1995 Modern Database Systems db/books/collections/kim95.html#Kim95" }
+{ "id": "14", "dblpid": "books/acm/kim95/Kim95b", "title": "Introduction to Part 2 Technology for Interoperating Legacy Databases.", "authors": "Won Kim", "misc": "2002-01-03 515-520 1995 Modern Database Systems db/books/collections/kim95.html#Kim95b" }
+{ "id": "16", "dblpid": "books/acm/kim95/KimG95", "authors": "Won Kim Jorge F. Garza", "misc": "2002-01-03 203-215 1995 Modern Database Systems db/books/collections/kim95.html#KimG95" }
+{ "id": "18", "dblpid": "books/acm/kim95/Kowalski95", "title": "The POSC Solution to Managing E&P Data.", "authors": "Vincent J. Kowalski", "misc": "2002-01-03 281-301 1995 Modern Database Systems db/books/collections/kim95.html#Kowalski95" }
+{ "id": "20", "dblpid": "books/acm/kim95/Lunt95", "title": "Authorization in Object-Oriented Databases.", "authors": "Teresa F. Lunt", "misc": "2002-01-03 130-145 1995 Modern Database Systems db/books/collections/kim95.html#Lunt95" }
+{ "id": "22", "dblpid": "books/acm/kim95/Motro95", "authors": "Amihai Motro", "misc": "2002-01-03 457-476 1995 Modern Database Systems db/books/collections/kim95.html#Motro95" }
+{ "id": "24", "dblpid": "books/acm/kim95/OzsuB95", "authors": "M. Tamer Özsu José A. Blakeley", "misc": "2002-01-03 146-174 1995 Modern Database Systems db/books/collections/kim95.html#OzsuB95" }
+{ "id": "26", "dblpid": "books/acm/kim95/Samet95", "title": "Spatial Data Structures.", "authors": "Hanan Samet", "misc": "2004-03-08 361-385 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#Samet95 1995" }
+{ "id": "28", "dblpid": "books/acm/kim95/ShanADDK95", "title": "Pegasus A Heterogeneous Information Management System.", "authors": "Ming-Chien Shan Rafi Ahmed Jim Davis Weimin Du William Kent", "misc": "2004-03-08 664-682 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#ShanADDK95 1995" }
+{ "id": "30", "dblpid": "books/acm/kim95/SoleyK95", "title": "The OMG Object Model.", "authors": "Richard Mark Soley William Kent", "misc": "2002-01-03 18-41 1995 Modern Database Systems db/books/collections/kim95.html#SoleyK95" }
+{ "id": "32", "dblpid": "books/acm/kim95/Thompson95", "title": "The Changing Database Standards Landscape.", "authors": "Craig W. Thompson", "misc": "2002-01-03 302-317 1995 Modern Database Systems db/books/collections/kim95.html#Thompson95" }
+{ "id": "34", "dblpid": "books/acm/Kim95", "title": "Modern Database Systems The Object Model, Interoperability, and Beyond.", "authors": "", "misc": "2004-03-08 Won Kim Modern Database Systems ACM Press and Addison-Wesley 1995 0-201-59098-0 db/books/collections/kim95.html" }
+{ "id": "36", "dblpid": "books/aw/kimL89/BjornerstedtH89", "title": "Version Control in an Object-Oriented Architecture.", "authors": "Anders Björnerstedt Christer Hulten", "misc": "2006-02-24 451-485 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#BjornerstedtH89" }
+{ "id": "38", "dblpid": "books/aw/kimL89/CareyDRS89", "title": "Storage Management in EXODUS.", "authors": "Michael J. Carey David J. DeWitt Joel E. Richardson Eugene J. Shekita", "misc": "2002-01-03 341-369 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#CareyDRS89" }
+{ "id": "40", "dblpid": "books/aw/kimL89/DiederichM89", "title": "Objects, Messages, and Rules in Database Design.", "authors": "Jim Diederich Jack Milton", "misc": "2002-01-03 177-197 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#DiederichM89" }
+{ "id": "42", "dblpid": "books/aw/kimL89/FishmanABCCDHHKLLMNRSW89", "title": "Overview of the Iris DBMS.", "authors": "Daniel H. Fishman Jurgen Annevelink David Beech E. C. Chow Tim Connors J. W. Davis Waqar Hasan C. G. Hoch William Kent S. Leichner Peter Lyngbæk Brom Mahbod Marie-Anne Neimat Tore Risch Ming-Chien Shan W. Kevin Wilkinson", "misc": "2002-01-03 219-250 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#FishmanABCCDHHKLLMNRSW89" }
+{ "id": "44", "dblpid": "books/aw/kimL89/KimKD89", "title": "Indexing Techniques for Object-Oriented Databases.", "authors": "Won Kim Kyung-Chang Kim Alfred G. Dale", "misc": "2002-01-03 371-394 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#KimKD89" }
+{ "id": "46", "dblpid": "books/aw/kimL89/Maier89", "title": "Making Database Systems Fast Enough for CAD Applications.", "authors": "David Maier", "misc": "2002-01-03 573-582 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Maier89" }
+{ "id": "48", "dblpid": "books/aw/kimL89/Moon89", "title": "The Common List Object-Oriented Programming Language Standard.", "authors": "David A. Moon", "misc": "2002-01-03 49-78 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Moon89" }
+{ "id": "50", "dblpid": "books/aw/kimL89/Nierstrasz89", "title": "A Survey of Object-Oriented Concepts.", "authors": "Oscar Nierstrasz", "misc": "2002-01-03 3-21 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Nierstrasz89" }
+{ "id": "52", "dblpid": "books/aw/kimL89/Russinoff89", "title": "Proteus A Frame-Based Nonmonotonic Inference System.", "authors": "David M. Russinoff", "misc": "2002-01-03 127-150 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#Russinoff89" }
+{ "id": "54", "dblpid": "books/aw/kimL89/SteinLU89", "authors": "Lynn Andrea Stein Henry Lieberman David Ungar", "misc": "2002-01-03 31-48 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#SteinLU89" }
+{ "id": "56", "dblpid": "books/aw/kimL89/TomlinsonS89", "title": "Concurrent Object-Oriented Programming Languages.", "authors": "Chris Tomlinson Mark Scheevel", "misc": "2002-01-03 79-124 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#TomlinsonS89" }
+{ "id": "58", "dblpid": "books/aw/kimL89/Wand89", "title": "A Proposal for a Formal Model of Objects.", "authors": "Yair Wand", "misc": "2002-01-03 537-559 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Wand89" }
+{ "id": "60", "dblpid": "books/aw/stonebraker86/RoweS86", "authors": "Lawrence A. Rowe Michael Stonebraker", "misc": "2002-01-03 63-82 1986 The INGRES Papers db/books/collections/Stonebraker86.html#RoweS86 db/books/collections/Stonebraker86/RoweS86.html ingres/P063.pdf" }
+{ "id": "62", "dblpid": "books/aw/stonebraker86/Stonebraker86a", "title": "Supporting Studies on Relational Systems (Introduction to Section 2).", "authors": "Michael Stonebraker", "misc": "2002-01-03 83-85 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86a db/books/collections/Stonebraker86/Stonebraker86a.html ingres/P083.pdf" }
+{ "id": "64", "dblpid": "books/aw/stonebraker86/Stonebraker86c", "authors": "Michael Stonebraker", "misc": "2002-01-03 187-196 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86c db/books/collections/Stonebraker86/Stonebraker86c.html ingres/P187.pdf" }
+{ "id": "66", "dblpid": "books/aw/stonebraker86/Stonebraker86e", "title": "Extended Semantics for the Relational Model (Introduction to Section 5).", "authors": "Michael Stonebraker", "misc": "2002-01-03 313-316 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86e db/books/collections/Stonebraker86/Stonebraker86e.html ingres/P313.pdf" }
+{ "id": "68", "dblpid": "books/aw/stonebraker86/X86", "title": "Title, Preface, Contents.", "authors": "", "misc": "2002-01-03 1986 The INGRES Papers db/books/collections/Stonebraker86.html#X86 db/books/collections/Stonebraker86/X86.html ingres/frontmatter.pdf" }
+{ "id": "70", "dblpid": "books/aw/Knuth86a", "title": "TeX The Program", "authors": "Donald E. Knuth", "misc": "2002-01-03 Addison-Wesley 1986 0-201-13437-3" }
+{ "id": "72", "dblpid": "books/aw/Lamport86", "title": "LaTeX User's Guide & Reference Manual", "authors": "Leslie Lamport", "misc": "2002-01-03 Addison-Wesley 1986 0-201-15790-X" }
+{ "id": "74", "dblpid": "books/aw/Lamport2002", "title": "Specifying Systems, The TLA+ Language and Tools for Hardware and Software Engineers", "authors": "Leslie Lamport", "misc": "2005-07-28 Addison-Wesley 2002 0-3211-4306-X http //research.microsoft.com/users/lamport/tla/book.html" }
+{ "id": "76", "dblpid": "books/aw/LewisBK01", "title": "Databases and Transaction Processing An Application-Oriented Approach", "authors": "Philip M. Lewis Arthur J. Bernstein Michael Kifer", "misc": "2002-01-03 Addison-Wesley 2001 0-201-70872-8" }
+{ "id": "78", "dblpid": "books/aw/LindholmY97", "title": "The Java Virtual Machine Specification", "authors": "Tim Lindholm Frank Yellin", "misc": "2002-01-28 Addison-Wesley 1997 0-201-63452-X" }
+{ "id": "80", "dblpid": "books/aw/Sedgewick83", "title": "Algorithms", "authors": "Robert Sedgewick", "misc": "2002-01-03 Addison-Wesley 1983 0-201-06672-6" }
+{ "id": "82", "dblpid": "conf/focs/AspnesW92", "title": "Randomized Consensus in Expected O(n log ^2 n) Operations Per Processor", "authors": "James Aspnes Orli Waarts", "misc": "2006-04-25 137-146 conf/focs/FOCS33 1992 FOCS db/conf/focs/focs92.html#AspnesW92" }
+{ "id": "84", "dblpid": "conf/stoc/Bloniarz80", "title": "A Shortest-Path Algorithm with Expected Time O(n^2 log n log ^* n)", "authors": "Peter A. Bloniarz", "misc": "2006-04-25 378-384 conf/stoc/STOC12 1980 STOC db/conf/stoc/stoc80.html#Bloniarz80" }
+{ "id": "86", "dblpid": "conf/focs/Megiddo82", "title": "Linear-Time Algorithms for Linear Programming in R^3 and Related Problems", "authors": "Nimrod Megiddo", "misc": "2006-04-25 329-338 conf/focs/FOCS23 1982 FOCS db/conf/focs/focs82.html#Megiddo82" }
+{ "id": "88", "dblpid": "conf/focs/MoffatT85", "title": "An All Pairs Shortest Path Algorithm with Expected Running Time O(n^2 log n)", "authors": "Alistair Moffat Tadao Takaoka", "misc": "2006-04-25 101-105 conf/focs/FOCS26 1985 FOCS db/conf/focs/focs85.html#MoffatT85" }
+{ "id": "90", "dblpid": "conf/hicss/SchonfeldL99", "title": "VORTEX Video Retrieval and Tracking from Compressed Multimedia Databases ¾ Visual Search Engine.", "authors": "Dan Schonfeld Dan Lelescu", "misc": "2002-01-03 1999 HICSS http //computer.org/proceedings/hicss/0001/00013/00013006abs.htm db/conf/hicss/hicss1999-3.html#SchonfeldL99" }
+{ "id": "92", "dblpid": "conf/stacs/Laue08", "title": "Geometric Set Cover and Hitting Sets for Polytopes in R³.", "authors": "Sören Laue", "misc": "2008-03-04 2008 STACS 479-490 http //drops.dagstuhl.de/opus/volltexte/2008/1367 conf/stacs/2008 db/conf/stacs/stacs2008.html#Laue08" }
+{ "id": "94", "dblpid": "conf/awoc/IbarraJRC88", "title": "On Some Languages in NC.", "authors": "Oscar H. Ibarra Tao Jiang Bala Ravikumar Jik H. Chang", "misc": "2002-08-06 64-73 1988 conf/awoc/1988 AWOC db/conf/awoc/awoc88.html#IbarraJRC88" }
+{ "id": "96", "dblpid": "conf/focs/GalilHLSW82", "title": "An O(n^3 log n) Deterministic and an O(n^3) Probabilistic Isomorphism Test for Trivalent Graphs", "authors": "Zvi Galil Christoph M. Hoffmann Eugene M. Luks Claus-Peter Schnorr Andreas Weber", "misc": "2006-04-25 118-125 conf/focs/FOCS23 1982 FOCS db/conf/focs/focs82.html#GalilHLSW82" }
+{ "id": "98", "dblpid": "conf/focs/GalilT86", "title": "An O(n^2 (m + n log n) log n) Min-Cost Flow Algorithm", "authors": "Zvi Galil Éva Tardos", "misc": "2006-04-25 1-9 conf/focs/FOCS27 1986 FOCS db/conf/focs/focs86.html#GalilT86" }
+{ "id": "100", "dblpid": "series/synthesis/2009Brozos", "title": "The Geometry of Walker Manifolds", "authors": "Miguel Brozos-Vázquez Eduardo García-Río Peter Gilkey Stana Nikcevic Rámon Vázquez-Lorenzo", "misc": "2009-09-06 The Geometry of Walker Manifolds http //dx.doi.org/10.2200/S00197ED1V01Y200906MAS005 http //dx.doi.org/10.2200/S00197ED1V01Y200906MAS005 2009 Synthesis Lectures on Mathematics & Statistics Morgan & Claypool Publishers" }
+{ "id": "1", "dblpid": "books/acm/kim95/AnnevelinkACFHK95", "title": "Object SQL - A Language for the Design and Implementation of Object Databases.", "authors": "Jurgen Annevelink Rafiul Ahad Amelia Carlson Daniel H. Fishman Michael L. Heytens William Kent", "misc": "2002-01-03 42-68 1995 Modern Database Systems db/books/collections/kim95.html#AnnevelinkACFHK95" }
+{ "id": "3", "dblpid": "books/acm/kim95/BreitbartGS95", "title": "Transaction Management in Multidatabase Systems.", "authors": "Yuri Breitbart Hector Garcia-Molina Abraham Silberschatz", "misc": "2004-03-08 573-591 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#BreitbartGS95 1995" }
+{ "id": "5", "dblpid": "books/acm/kim95/DayalHW95", "title": "Active Database Systems.", "authors": "Umeshwar Dayal Eric N. Hanson Jennifer Widom", "misc": "2002-01-03 434-456 1995 Modern Database Systems db/books/collections/kim95.html#DayalHW95" }
+{ "id": "7", "dblpid": "books/acm/kim95/Garcia-MolinaH95", "title": "Distributed Databases.", "authors": "Hector Garcia-Molina Meichun Hsu", "misc": "2002-01-03 477-493 1995 Modern Database Systems db/books/collections/kim95.html#Garcia-MolinaH95" }
+{ "id": "9", "dblpid": "books/acm/kim95/Kaiser95", "title": "Cooperative Transactions for Multiuser Environments.", "authors": "Gail E. Kaiser", "misc": "2002-01-03 409-433 1995 Modern Database Systems db/books/collections/kim95.html#Kaiser95" }
+{ "id": "11", "dblpid": "books/acm/kim95/KemperM95", "title": "Physical Object Management.", "authors": "Alfons Kemper Guido Moerkotte", "misc": "2002-01-03 175-202 1995 Modern Database Systems db/books/collections/kim95.html#KemperM95" }
+{ "id": "13", "dblpid": "books/acm/kim95/Kim95a", "title": "Object-Oriented Database Systems Promises, Reality, and Future.", "authors": "Won Kim", "misc": "2002-01-03 255-280 1995 Modern Database Systems db/books/collections/kim95.html#Kim95a" }
+{ "id": "15", "dblpid": "books/acm/kim95/KimCGS95", "title": "On Resolving Schematic Heterogeneity in Multidatabase Systems.", "authors": "Won Kim Injun Choi Sunit K. Gala Mark Scheevel", "misc": "2002-01-03 521-550 1995 Modern Database Systems db/books/collections/kim95.html#KimCGS95" }
+{ "id": "17", "dblpid": "books/acm/kim95/KimK95", "title": "On View Support in Object-Oriented Databases Systems.", "authors": "Won Kim William Kelley", "misc": "2002-01-03 108-129 1995 Modern Database Systems db/books/collections/kim95.html#KimK95" }
+{ "id": "19", "dblpid": "books/acm/kim95/KriegerA95", "title": "C++ Bindings to an Object Database.", "authors": "David Krieger Tim Andrews", "misc": "2002-01-03 89-107 1995 Modern Database Systems db/books/collections/kim95.html#KriegerA95" }
+{ "id": "21", "dblpid": "books/acm/kim95/MengY95", "title": "Query Processing in Multidatabase Systems.", "authors": "Weiyi Meng Clement T. Yu", "misc": "2002-01-03 551-572 1995 Modern Database Systems db/books/collections/kim95.html#MengY95" }
+{ "id": "23", "dblpid": "books/acm/kim95/Omiecinski95", "title": "Parallel Relational Database Systems.", "authors": "Edward Omiecinski", "misc": "2002-01-03 494-512 1995 Modern Database Systems db/books/collections/kim95.html#Omiecinski95" }
+{ "id": "25", "dblpid": "books/acm/kim95/RusinkiewiczS95", "title": "Specification and Execution of Transactional Workflows.", "authors": "Marek Rusinkiewicz Amit P. Sheth", "misc": "2004-03-08 592-620 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#RusinkiewiczS95 1995" }
+{ "id": "27", "dblpid": "books/acm/kim95/SametA95", "title": "Spatial Data Models and Query Processing.", "authors": "Hanan Samet Walid G. Aref", "misc": "2002-01-03 338-360 1995 Modern Database Systems db/books/collections/kim95.html#SametA95" }
+{ "id": "29", "dblpid": "books/acm/kim95/Snodgrass95", "title": "Temporal Object-Oriented Databases A Critical Comparison.", "authors": "Richard T. Snodgrass", "misc": "2002-01-03 386-408 1995 Modern Database Systems db/books/collections/kim95.html#Snodgrass95" }
+{ "id": "31", "dblpid": "books/acm/kim95/Stout95", "title": "EDA/SQL.", "authors": "Ralph L. Stout", "misc": "2004-03-08 649-663 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#Stout95 1995" }
+{ "id": "33", "dblpid": "books/acm/kim95/BreitbartR95", "title": "Overview of the ADDS System.", "authors": "Yuri Breitbart Tom C. Reyes", "misc": "2009-06-12 683-701 Modern Database Systems books/acm/Kim95 db/books/collections/kim95.html#BreitbartR95 1995" }
+{ "id": "35", "dblpid": "books/ap/MarshallO79", "title": "Inequalities Theory of Majorization and Its Application.", "authors": "Albert W. Marshall Ingram Olkin", "misc": "2002-01-03 Academic Press 1979 0-12-473750-1" }
+{ "id": "37", "dblpid": "books/aw/kimL89/BretlMOPSSWW89", "title": "The GemStone Data Management System.", "authors": "Robert Bretl David Maier Allen Otis D. Jason Penney Bruce Schuchardt Jacob Stein E. Harold Williams Monty Williams", "misc": "2002-01-03 283-308 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#BretlMOPSSWW89" }
+{ "id": "39", "dblpid": "books/aw/kimL89/Decouchant89", "title": "A Distributed Object Manager for the Smalltalk-80 System.", "authors": "Dominique Decouchant", "misc": "2002-01-03 487-520 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Decouchant89" }
+{ "id": "41", "dblpid": "books/aw/kimL89/EllisG89", "title": "Active Objects Ealities and Possibilities.", "authors": "Clarence A. Ellis Simon J. Gibbs", "misc": "2002-01-03 561-572 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#EllisG89" }
+{ "id": "43", "dblpid": "books/aw/kimL89/KimBCGW89", "title": "Features of the ORION Object-Oriented Database System.", "authors": "Won Kim Nat Ballou Hong-Tai Chou Jorge F. Garza Darrell Woelk", "misc": "2002-01-03 251-282 Object-Oriented Concepts, Databases, and Applications ACM Press and Addison-Wesley 1989 db/books/collections/kim89.html#KimBCGW89" }
+{ "id": "45", "dblpid": "books/aw/kimL89/King89", "title": "My Cat Is Object-Oriented.", "authors": "Roger King", "misc": "2002-01-03 23-30 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#King89" }
+{ "id": "47", "dblpid": "books/aw/kimL89/MellenderRS89", "authors": "Fred Mellender Steve Riegel Andrew Straw", "misc": "2002-01-03 423-450 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#MellenderRS89" }
+{ "id": "49", "dblpid": "books/aw/kimL89/Moss89", "title": "Object Orientation as Catalyst for Language-Database Inegration.", "authors": "J. Eliot B. Moss", "misc": "2002-01-03 583-592 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#Moss89" }
+{ "id": "51", "dblpid": "books/aw/kimL89/NierstraszT89", "title": "Integrated Office Systems.", "authors": "Oscar Nierstrasz Dennis Tsichritzis", "misc": "2002-01-03 199-215 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#NierstraszT89" }
+{ "id": "53", "dblpid": "books/aw/kimL89/SkarraZ89", "title": "Concurrency Control and Object-Oriented Databases.", "authors": "Andrea H. Skarra Stanley B. Zdonik", "misc": "2002-01-03 395-421 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#SkarraZ89" }
+{ "id": "55", "dblpid": "books/aw/kimL89/TarltonT89", "title": "Pogo A Declarative Representation System for Graphics.", "authors": "Mark A. Tarlton P. Nong Tarlton", "misc": "2002-01-03 151-176 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#TarltonT89" }
+{ "id": "57", "dblpid": "books/aw/kimL89/TsichritzisN89", "title": "Directions in Object-Oriented Research.", "authors": "Dennis Tsichritzis Oscar Nierstrasz", "misc": "2002-01-03 523-536 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#TsichritzisN89" }
+{ "id": "59", "dblpid": "books/aw/kimL89/WeiserL89", "title": "OZ+ An Object-Oriented Database System.", "authors": "Stephen P. Weiser Frederick H. Lochovsky", "misc": "2002-01-03 309-337 1989 Object-Oriented Concepts, Databases, and Applications db/books/collections/kim89.html#WeiserL89" }
+{ "id": "61", "dblpid": "books/aw/stonebraker86/Stonebraker86", "title": "Design of Relational Systems (Introduction to Section 1).", "authors": "Michael Stonebraker", "misc": "2002-01-03 1-3 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86 db/books/collections/Stonebraker86/Stonebraker86.html ingres/P001.pdf" }
+{ "id": "63", "dblpid": "books/aw/stonebraker86/Stonebraker86b", "title": "Distributed Database Systems (Introduction to Section 3).", "authors": "Michael Stonebraker", "misc": "2002-01-03 183-186 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86b db/books/collections/Stonebraker86/Stonebraker86b.html ingres/P183.pdf" }
+{ "id": "65", "dblpid": "books/aw/stonebraker86/Stonebraker86d", "title": "User Interfaces for Database Systems (Introduction to Section 4).", "authors": "Michael Stonebraker", "misc": "2002-01-03 243-245 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86d db/books/collections/Stonebraker86/Stonebraker86d.html ingres/P243.pdf" }
+{ "id": "67", "dblpid": "books/aw/stonebraker86/Stonebraker86f", "authors": "Michael Stonebraker", "misc": "2002-01-03 393-394 1986 The INGRES Papers db/books/collections/Stonebraker86.html#Stonebraker86f db/books/collections/Stonebraker86/Stonebraker86f.html ingres/P393.pdf" }
+{ "id": "69", "dblpid": "books/aw/stonebraker86/X86a", "title": "References.", "authors": "", "misc": "2002-01-03 429-444 1986 The INGRES Papers db/books/collections/Stonebraker86.html#X86a db/books/collections/Stonebraker86/X86a.html ingres/P429.pdf" }
+{ "id": "71", "dblpid": "books/aw/AbiteboulHV95", "title": "Foundations of Databases.", "authors": "Serge Abiteboul Richard Hull Victor Vianu", "misc": "2002-01-03 Addison-Wesley 1995 0-201-53771-0 AHV/Toc.pdf ... ... journals/tods/AstrahanBCEGGKLMMPTWW76 books/bc/AtzeniA93 journals/tcs/AtzeniABM82 journals/jcss/AbiteboulB86 journals/csur/AtkinsonB87 conf/pods/AtzeniB87 journals/vldb/AbiteboulB95 conf/sigmod/AbiteboulB91 conf/dood/AtkinsonBDDMZ89 conf/vldb/AlbanoBGO93 ... conf/icdt/Abiteboul88 journals/ipl/Abiteboul89 conf/ds/Abrial74 journals/tods/AhoBU79 books/mk/minker88/AptBW88 conf/vldb/AroraC78 conf/stoc/AfratiC89 journals/tods/AlbanoCO85 conf/pods/AfratiCY91 conf/pods/AusielloDM85 conf/vldb/AbiteboulG85 journals/jacm/AjtaiG87 conf/focs/AjtaiG89 journals/tods/AbiteboulG91 ... ... journals/tods/AbiteboulH87 conf/sigmod/AbiteboulH88 ... conf/sigmod/AbiteboulK89 journals/tcs/AbiteboulKG91 journals/jcss/AbiteboulKRW95 conf/sigmod/AbiteboulLUW93 conf/pods/AtzeniP82 conf/pods/AfratiP87 conf/pods/AptP87 conf/wg/AndriesP91 conf/pods/AfratiPPRSU86 books/el/leeuwen90/Apt90 conf/ifip/Armstrong74 journals/siamcomp/AhoSSU81 journals/tods/AhoSU79 journals/siamcomp/AhoSU79 conf/pods/AbiteboulSV90 journals/is/AtzeniT93 conf/popl/AhoU79 conf/pods/AbiteboulV87 conf/jcdkb/AbiteboulV88 journals/jacm/AbiteboulV88 conf/pods/AbiteboulV88 journals/jacm/AbiteboulV89 journals/jcss/AbiteboulV90 journals/jcss/AbiteboulV91 conf/stoc/AbiteboulV91 journals/amai/AbiteboulV91 journals/jcss/AbiteboulV95 journals/jacm/AptE82 conf/coco/AbiteboulVV92 conf/iclp/AptB88 conf/oopsla/BobrowKKMSZ86 journals/tse/BatoryBGSTTW88 conf/mfcs/Bancilhon78 ... conf/db-workshops/Bancilhon85 books/el/leeuwen90/Barendregt90 ... journals/tods/BeeriB79 books/el/leeuwen90/BerstelB90 conf/icdt/BeneventanoB92 conf/vldb/BernsteinBC80 conf/vldb/BeeriBG78 conf/sigmod/BorgidaBMR89 journals/tods/BunemanC79 journals/jacm/BernsteinC81 conf/dbpl/BancilhonCD89 books/bc/tanselCGSS93/BaudinetCW93 conf/sigmod/BiskupDB79 journals/jacm/BeeriDFS84 books/mk/BancilhonDK92 conf/edbt/BryDM88 conf/pods/BunemanDW88 journals/jcss/BunemanDW91 journals/tods/Beeri80 journals/dke/Beeri90 ... journals/tods/Bernstein76 conf/lics/BidoitF87 journals/iandc/BidoitF91 conf/sigmod/BeeriFH77 conf/stoc/BeeriFMMUY81 journals/jacm/BeeriFMY83 journals/tods/BunemanFN82 journals/siamcomp/BernsteinG81 journals/iandc/BlassGK85 conf/ijcai/BrachmanGL85 journals/tods/BernsteinGWRR81 books/aw/BernsteinHG87 ... journals/tcs/Bidoit91 journals/tcs/Biskup80 conf/adbt/Biskup79 journals/tods/Biskup83 journals/tcs/BunemanJO91 journals/tods/BeeriK86 conf/pods/BeeriKBR87 conf/icdt/BidoitL90 journals/csur/BatiniL86 conf/sigmod/BlakeleyLT86 conf/vldb/BeeriM91 conf/sigmod/BlakeleyMG93 journals/siamcomp/BeeriMSU81 conf/pods/BancilhonMSU86 conf/pods/BeeriNRST87 journals/software/Borgida85 conf/icalp/BraP83 conf/fgcs/BalbinMR88 ... conf/pods/BeeriR87 journals/jlp/BalbinR87 conf/sigmod/BancilhonR86 books/mk/minker88/BancilhonR88 journals/jlp/BeeriR91 conf/vldb/BancilhonRS82 conf/pods/BeeriRSS92 conf/dood/Bry89 journals/tods/BancilhonS81 journals/cogsci/BrachmanS85 journals/tods/BergamaschiS92 conf/sigmod/BernsteinST75 conf/dbpl/TannenBN91 conf/icdt/TannenBW92 ... journals/jacm/BeeriV84 conf/icalp/BeeriV81 conf/adbt/BeeriV79 journals/siamcomp/BeeriV84 journals/iandc/BeeriV84 journals/jacm/BeeriV84 journals/tcs/BeeriV85 journals/ibmrd/ChamberlinAEGLMRW76 ... journals/iandc/Cardelli88 books/mk/Cattell94 conf/sigmod/CacaceCCTZ90 conf/vldb/CastilhoCF82 conf/adbt/CasanovaF82 conf/focs/CaiFI89 journals/jcss/CasanovaFP84 conf/stoc/CosmadakisGKV88 conf/dood/CorciuloGP93 books/sp/CeriGT90 conf/focs/ChandraH80 journals/jcss/ChandraH80 journals/jcss/ChandraH82 journals/jlp/ChandraH85 conf/popl/Chandra81 conf/adbt/Chang79 conf/pods/Chandra88 ... journals/tods/Chen76 conf/ride/ChenHM94 conf/icde/Chomicki92 conf/pods/Chomicki92 ... ... ... conf/stoc/CosmadakisK85 journals/acr/CosmadakisK86 ... journals/jcss/CosmadakisKS86 journals/jacm/CosmadakisKV90 ... conf/pods/CalvaneseL94 conf/adbt/Clark77 conf/stoc/ChandraLM81 conf/stoc/ChandraM77 conf/pods/ConsensM90 conf/sigmod/ConsensM93 conf/icdt/ConsensM90 journals/cacm/Codd70 conf/sigmod/Codd71a persons/Codd71a persons/Codd72 conf/ifip/Codd74 ... conf/sigmod/Codd79 journals/cacm/Codd82 ... conf/sigmod/Cohen89 journals/cacm/Cohen90 ... journals/jcss/Cook74 conf/pods/Cosmadakis83 conf/focs/Cosmadakis87 books/el/leeuwen90/Courcelle90a journals/jacm/CosmadakisP84 conf/edbt/CeriCGLLTZ88 ... conf/vldb/CeriT87 conf/vldb/CasanovaTF88 ... conf/pods/CasanovaV83 journals/siamcomp/ChandraV85 conf/pods/ChaudhuriV92 conf/pods/ChaudhuriV93 conf/pods/ChaudhuriV94 journals/csur/CardelliW85 conf/pods/ChenW89 conf/pods/CohenW89 conf/vldb/CeriW90 conf/vldb/CeriW91 conf/iclp/ChenW92 conf/vldb/CeriW93 ... conf/birthday/Dahlhaus87 conf/vldb/Date81 books/aw/Date86 ... conf/dbpl/Dayal89 journals/tods/DayalB82 journals/ibmrd/DelobelC73 conf/icde/DelcambreD89 ... journals/tods/Delobel78 journals/jacm/Demolombe92 journals/tods/DateF92 ... conf/vldb/DayalHL91 journals/jacm/Paola69a conf/caap/DahlhausM86 journals/acr/DAtriM86 journals/iandc/DahlhausM92 conf/sigmod/DerrMP93 conf/vldb/MaindrevilleS88 conf/pods/Dong92 conf/adbt/BraP82 ... conf/dbpl/DongS91 journals/iandc/DongS95 conf/dbpl/DongS93 conf/dbpl/DongS93 conf/icdt/DongT92 conf/vldb/DenninghoffV91 conf/pods/DenninghoffV93 ... ... books/acm/kim95/DayalHW95 ... conf/pods/EiterGM94 conf/pods/Escobar-MolanoHJ93 ... books/el/leeuwen90/Emerson90 books/bc/ElmasriN89 ... conf/icse/Eswaran76 conf/sigmod/EpsteinSW78 ... ... conf/vldb/Fagin77 journals/tods/Fagin77 conf/sigmod/Fagin79 journals/tods/Fagin81 journals/ipl/FaginV83 journals/jacm/Fagin82 journals/jacm/Fagin83 journals/tcs/Fagin93 books/sp/kimrb85/FurtadoC85 ... journals/jlp/Fitting85a journals/tcs/FischerJT83 journals/acr/FaginKUV86 conf/icdt/FernandezM92 journals/tods/FaginMU82 conf/vldb/FaloutsosNS91 ... journals/ai/Forgy82 ... conf/sigmod/Freytag87 ... journals/siamcomp/FischerT83 journals/siamcomp/FaginMUY83 conf/pods/FaginUV83 conf/icalp/FaginV84 ... ... ... ... conf/sigmod/GraefeD87 conf/ride/GatziuD94 conf/sigmod/GardarinM86 conf/sigmod/GyssensG88 journals/tcs/GinsburgH83a journals/jacm/GinsburgH86 ... books/bc/tanselCGSS93/Ginsburg93 books/fm/GareyJ79 journals/jacm/GrantJ82 conf/vldb/GehaniJ91 conf/vldb/GhandeharizadehHJCELLTZ93 journals/tods/GhandeharizadehHJ96 conf/vldb/GehaniJS92 ... conf/sigmod/GehaniJS92 ... conf/deductive/GuptaKM92 conf/pods/GurevichL82 conf/iclp/GelfondL88 conf/adbt/77 journals/csur/GallaireMN84 conf/pods/GrahneMR92 conf/sigmod/GuptaMS93 conf/lics/GaifmanMSV87 journals/jacm/GaifmanMSV93 journals/jacm/GrahamMV86 conf/csl/GradelO92 ... conf/pods/Gottlob87 conf/pods/GyssensPG90 conf/dood/GiannottiPSZ91 books/aw/GoldbergR83 journals/acr/GrahneR86 journals/ipl/Grant77 ... journals/iandc/Grandjean83 conf/vldb/Grahne84 ... journals/csur/Graefe93 books/sp/Greibach75 journals/tods/GoodmanS82 journals/jcss/GoodmanS84 conf/focs/GurevichS85 ... conf/pods/GrumbachS94 conf/sigmod/GangulyST90 ... journals/tcs/Gunter92 ... ... ... ... conf/pods/GrahamV84 conf/pods/GrumbachV91 conf/icde/GardarinV92 conf/sigmod/GraefeW89 ... journals/jacm/GinsburgZ82 conf/vldb/GottlobZ88 ... ... journals/sigmod/Hanson89 ... journals/cacm/Harel80 journals/tkde/HaasCLMWLLPCS90 conf/lics/Hella92 journals/iandc/Herrmann95 conf/pods/HirstH93 conf/vldb/HullJ91 conf/ewdw/HullJ90 journals/csur/HullK87 journals/tods/HudsonK89 conf/lics/HillebrandKM93 conf/nato/HillebrandKR93 conf/jcdkb/HsuLM88 journals/ipl/HoneymanLY80 journals/tods/HammerM81 conf/adbt/HenschenMN82 ... journals/jacm/HenschenN84 journals/jacm/Honeyman82 conf/sigmod/HullS89 conf/pods/HullS89 journals/acta/HullS94 journals/jcss/HullS93 conf/fodo/HullTY89 journals/jcss/Hull83 journals/jacm/Hull84 journals/tcs/Hull85 journals/siamcomp/Hull86 ... conf/vldb/Hulin89 ... journals/jacm/HullY84 conf/vldb/HullY90 conf/pods/HullY91 conf/sigmod/IoannidisK90 journals/jcss/ImielinskiL84 conf/adbt/Imielinski82 journals/jcss/Immerman82 journals/iandc/Immerman86 ... journals/siamcomp/Immerman87 conf/pods/ImielinskiN88 conf/vldb/IoannidisNSS92 conf/sigmod/ImielinskiNV91 conf/dood/ImielinskiNV91 conf/vldb/Ioannidis85 journals/jacm/Jacobs82 conf/dbpl/JacobsH91 journals/csur/JarkeK84 journals/jcss/JohnsonK84 conf/popl/JaffarL87 books/el/leeuwen90/Johnson90 journals/jacm/Joyner76 conf/pods/JaeschkeS82 ... books/mk/minker88/Kanellakis88 books/el/leeuwen90/Kanellakis90 conf/oopsla/KhoshafianC86 conf/edbt/KotzDM88 conf/jcdkb/Keller82 conf/pods/Keller85 journals/computer/Keller86 ... journals/tods/Kent79 ... journals/ngc/RohmerLK86 conf/tacs/KanellakisG94 conf/jcdkb/Kifer88 conf/pods/KanellakisKR90 conf/sigmod/KiferKS92 ... conf/icdt/KiferL86 books/aw/KimL89 ... journals/tods/Klug80 journals/jacm/Klug82 journals/jacm/Klug88 journals/jacm/KiferLW95 conf/kr/KatsunoM91 journals/ai/KatsunoM92 conf/jcdkb/KrishnamurthyN88 journals/csur/Knight89 ... journals/iandc/Kolaitis91 journals/ai/Konolige88 conf/ifip/Kowalski74 journals/jacm/Kowalski75 conf/bncod/Kowalski84 conf/vldb/KoenigP81 journals/tods/KlugP82 ... conf/pods/KolaitisP88 conf/pods/KiferRS88 conf/sigmod/KrishnamurthyRS88 books/mg/SilberschatzK91 conf/iclp/KempT88 conf/sigmod/KellerU84 conf/dood/Kuchenhoff91 ... journals/jlp/Kunen87 conf/iclp/Kunen88 conf/pods/Kuper87 conf/pods/Kuper88 conf/ppcp/Kuper93 conf/pods/KuperV84 conf/stoc/KolaitisV87 journals/tcs/KarabegV90 journals/iandc/KolaitisV90 conf/pods/KolaitisV90 journals/tods/KarabegV91 journals/iandc/KolaitisV92 journals/tcs/KuperV93 journals/tods/KuperV93 journals/tse/KellerW85 conf/pods/KiferW89 conf/jcdkb/Lang88 books/el/Leeuwen90 ... journals/jcss/Leivant89 ... journals/iandc/Leivant90 ... conf/db-workshops/Levesque82 journals/ai/Levesque84 conf/mfdbs/Libkin91 conf/er/Lien79 journals/jacm/Lien82 books/mk/minker88/Lifschitz88 ... journals/tcs/Lindell91 journals/tods/Lipski79 journals/jacm/Lipski81 journals/tcs/LeratL86 journals/cj/LeveneL90 books/sp/Lloyd87 conf/pods/LakshmananM89 conf/tlca/LeivantM93 conf/sigmod/LaverMG83 conf/pods/LiptonN90 journals/jcss/LucchesiO78 conf/sigmod/Lohman88 ... conf/ijcai/Lozinskii85 books/ph/LewisP81 ... conf/sigmod/LecluseRV88 journals/is/LipeckS87 journals/jlp/LloydST87 journals/tods/LingTK81 conf/sigmod/LyngbaekV87 conf/dood/LefebvreV89 conf/pods/LibkinW93 conf/dbpl/LibkinW93 journals/jacm/Maier80 books/cs/Maier83 ... conf/vldb/Makinouchi77 conf/icalp/Makowsky81 ... conf/icdt/Malvestuto86 conf/aaai/MacGregorB92 journals/tods/MylopoulosBW80 conf/sigmod/McCarthyD89 journals/csur/MishraE92 conf/sigmod/MumickFPR90 books/mk/Minker88 journals/jlp/Minker88 conf/vldb/MillerIR93 journals/is/MillerIR94 journals/iandc/Mitchell83 conf/pods/Mitchell83 conf/vldb/MendelzonM79 journals/tods/MaierMS79 journals/jcss/MaierMSU80 conf/pods/MendelzonMW94 journals/debu/MorrisNSUG87 journals/ai/Moore85 conf/vldb/Morgenstern83 conf/pods/Morris88 ... conf/pods/MannilaR85 ... journals/jlp/MinkerR90 books/aw/MannilaR92 journals/acr/MaierRW86 ... journals/tods/MarkowitzS92 conf/pods/Marchetti-SpaccamelaPS87 journals/jacm/MaierSY81 conf/iclp/MorrisUG86 journals/tods/MaierUV84 conf/iclp/MorrisUG86 journals/acta/MakowskyV86 books/bc/MaierW88 books/mk/minker88/ManchandraW88 conf/pods/Naughton86 conf/sigmod/NgFS91 ... conf/vldb/Nejdl87 conf/adbt/NicolasM77 conf/sigmod/Nicolas78 journals/acta/Nicolas82 conf/ds/76 conf/pods/NaqviK88 journals/tods/NegriPS91 conf/vldb/NaughtonRSU89 conf/pods/NaughtonS87 ... ... conf/vldb/Osborn79 ... journals/tods/OzsoyogluY87 conf/adbt/Paige82 ... books/cs/Papadimitriou86 ... journals/ipl/Paredaens78 ... books/sp/ParedaensBGG89 journals/ai/Andersen91 books/el/leeuwen90/Perrin90 journals/ins/Petrov89 conf/pods/ParedaensG88 conf/pods/PatnaikI94 conf/adbt/ParedaensJ79 journals/csur/PeckhamM88 ... ... conf/sigmod/ParkerP80 ... conf/iclp/Przymusinski88 conf/pods/Przymusinski89 ... conf/vldb/ParkerSV92 conf/aaai/PearlV87 journals/ai/PereiraW80a conf/pods/PapadimitriouY92 journals/tkde/QianW91 ... journals/jlp/Ramakrishnan91 conf/pods/RamakrishnanBS87 ... conf/adbt/Reiter77 journals/ai/Reiter80 conf/db-workshops/Reiter82 journals/jacm/Reiter86 journals/tods/Rissanen77 conf/mfcs/Rissanen78 conf/pods/Rissanen82 ... journals/ngc/RohmerLK86 journals/jacm/Robinson65 ... conf/pods/Ross89 ... ... conf/sigmod/RoweS79 conf/sigmod/RichardsonS91 journals/debu/RamamohanaraoSBPNTZD87 conf/vldb/RamakrishnanSS92 conf/sigmod/RamakrishnanSSS93 conf/pods/RamakrishnanSUV89 journals/jcss/RamakrishnanSUV93 journals/jlp/RamakrishnanU95 conf/sigmod/SelingerACLP79 conf/sigmod/Sagiv81 journals/tods/Sagiv83 books/mk/minker88/Sagiv88 conf/slp/Sagiv90 conf/sigmod/Sciore81 journals/jacm/Sciore82 conf/pods/Sciore83 journals/acr/Sciore86 journals/jacm/SagivDPF81 conf/pods/X89 ... journals/ai/SmithG85 books/mk/minker88/Shepherdson88 journals/tods/Shipman81 conf/pods/Shmueli87 conf/iclp/SekiI88 conf/sigmod/ShmueliI84 journals/tc/Sickel76 journals/jsc/Siekmann89 conf/sigmod/StonebrakerJGP90 conf/vldb/SimonKM92 journals/csur/ShethL90 conf/pods/SeibL91 conf/sigmod/SuLRD93 conf/adbt/SilvaM79 journals/sigmod/Snodgrass90 journals/sigmod/Soo91 conf/pods/SuciuP94 conf/sigmod/StonebrakerR86 conf/slp/SudarshanR93 conf/pods/SagivS86 journals/cacm/Stonebraker81 books/mk/Stonebraker88 journals/tkde/Stonebraker92 books/aw/Stroustrup91 journals/jacm/SadriU82 conf/vldb/Su91 conf/pods/SagivV89 journals/jacm/SagivW82 journals/tods/StonebrakerWKH76 journals/jacm/SagivY80 conf/pods/SaccaZ86 journals/tcs/SaccaZ88 ... conf/pods/SaccaZ90 ... ... books/bc/TanselCGJSS93 ... journals/acr/ThomasF86 ... ... ... ... journals/tcs/Topor87 ... books/mk/minker88/ToporS88 ... journals/siamcomp/TarjanY84 journals/csur/TeoreyYF86 journals/algorithmica/UllmanG88 conf/pods/Ullman82 books/cs/Ullman82 journals/tods/Ullman85 books/cs/Ullman88 conf/pods/Ullman89 books/cs/Ullman89 conf/sigmod/Gelder86 ... conf/pods/BusscheG92 conf/focs/BusscheGAG92 conf/pods/BusscheP91 conf/slp/Gelder86 conf/pods/Gelder89 conf/pods/GelderRS88 journals/jacm/GelderRS91 journals/tods/GelderT91 journals/ipl/Vardi81 conf/stoc/Vardi82 conf/focs/Vardi82 journals/acta/Vardi83 journals/jcss/Vardi84 conf/pods/Vardi85 conf/pods/Vardi86 journals/jcss/Vardi86 ... conf/pods/Vardi88 conf/sigmod/Vassiliou79 ... ... journals/jacm/EmdenK76 conf/nf2/SchollABBGPRV87 journals/jacm/Vianu87 journals/acta/Vianu87 conf/eds/Vieille86 conf/iclp/Vieille87 ... conf/eds/Vieille88 journals/tcs/Vieille89 ... journals/tcs/VianuV92 conf/sigmod/WidomF90 conf/icde/WangH92 conf/pos/WidjojoHW90 journals/computer/Wiederhold92 conf/pods/Wilkins86 conf/pods/Winslett88 conf/sigmod/WolfsonO90 conf/pods/Wong93 conf/sigmod/WolfsonS88 journals/ibmrd/WangW75 journals/tods/WongY76 conf/vldb/Yannakakis81 journals/csur/YuC84 ... journals/jcss/YannakakisP82 ... journals/tods/Zaniolo82 journals/jcss/Zaniolo84 ... conf/edbt/ZhouH90 journals/ibmsj/Zloof77 books/mk/ZdonikM90 db/books/dbtext/abiteboul95.html" }
+{ "id": "73", "dblpid": "books/aw/AhoHU74", "title": "The Design and Analysis of Computer Algorithms.", "authors": "Alfred V. Aho John E. Hopcroft Jeffrey D. Ullman", "misc": "2002-01-03 Addison-Wesley 1974 0-201-00029-6" }
+{ "id": "75", "dblpid": "books/aw/AhoHU83", "title": "Data Structures and Algorithms.", "authors": "Alfred V. Aho John E. Hopcroft Jeffrey D. Ullman", "misc": "2002-01-03 Addison-Wesley 1983 0-201-00023-7" }
+{ "id": "77", "dblpid": "books/aw/AhoKW88", "title": "The AWK Programming Language", "authors": "Alfred V. Aho Brian W. Kernighan Peter J. Weinberger", "misc": "2002-01-03 Addison-Wesley 1988" }
+{ "id": "79", "dblpid": "books/aw/AhoSU86", "title": "Compilers Princiles, Techniques, and Tools.", "authors": "Alfred V. Aho Ravi Sethi Jeffrey D. Ullman", "misc": "2002-01-03 Addison-Wesley 1986 0-201-10088-6" }
+{ "id": "81", "dblpid": "journals/siamcomp/AspnesW96", "authors": "James Aspnes Orli Waarts", "misc": "2002-01-03 1024-1044 1996 25 SIAM J. Comput. 5 db/journals/siamcomp/siamcomp25.html#AspnesW96" }
+{ "id": "83", "dblpid": "journals/siamcomp/Bloniarz83", "title": "A Shortest-Path Algorithm with Expected Time O(n² log n log* n).", "authors": "Peter A. Bloniarz", "misc": "2002-01-03 588-600 1983 12 SIAM J. Comput. 3 db/journals/siamcomp/siamcomp12.html#Bloniarz83" }
+{ "id": "85", "dblpid": "journals/siamcomp/Megiddo83a", "title": "Linear-Time Algorithms for Linear Programming in R³ and Related Problems.", "authors": "Nimrod Megiddo", "misc": "2002-01-03 759-776 1983 12 SIAM J. Comput. 4 db/journals/siamcomp/siamcomp12.html#Megiddo83a" }
+{ "id": "87", "dblpid": "journals/siamcomp/MoffatT87", "title": "An All Pairs Shortest Path Algorithm with Expected Time O(n² log n).", "authors": "Alistair Moffat Tadao Takaoka", "misc": "2002-01-03 1023-1031 1987 16 SIAM J. Comput. 6 db/journals/siamcomp/siamcomp16.html#MoffatT87" }
+{ "id": "89", "dblpid": "conf/icip/SchonfeldL98", "title": "VORTEX Video Retrieval and Tracking from Compressed Multimedia Databases.", "authors": "Dan Schonfeld Dan Lelescu", "misc": "2002-11-05 123-127 1998 ICIP (3) db/conf/icip/icip1998-3.html#SchonfeldL98" }
+{ "id": "91", "dblpid": "journals/corr/abs-0802-2861", "title": "Geometric Set Cover and Hitting Sets for Polytopes in $R^3$", "authors": "Sören Laue", "misc": "2008-03-03 http //arxiv.org/abs/0802.2861 2008 CoRR abs/0802.2861 db/journals/corr/corr0802.html#abs-0802-2861 informal publication" }
+{ "id": "93", "dblpid": "journals/iandc/IbarraJCR91", "authors": "Oscar H. Ibarra Tao Jiang Jik H. Chang Bala Ravikumar", "misc": "2006-04-25 86-106 Inf. Comput. January 1991 90 1 db/journals/iandc/iandc90.html#IbarraJCR91" }
+{ "id": "95", "dblpid": "journals/jacm/GalilHLSW87", "title": "An O(n³log n) deterministic and an O(n³) Las Vegs isomorphism test for trivalent graphs.", "authors": "Zvi Galil Christoph M. Hoffmann Eugene M. Luks Claus-Peter Schnorr Andreas Weber", "misc": "2003-11-20 513-531 1987 34 J. ACM 3 http //doi.acm.org/10.1145/28869.28870 db/journals/jacm/jacm34.html#GalilHLSW87" }
+{ "id": "97", "dblpid": "journals/jacm/GalilT88", "title": "An O(n²(m + n log n)log n) min-cost flow algorithm.", "authors": "Zvi Galil Éva Tardos", "misc": "2003-11-20 374-386 1988 35 J. ACM 2 http //doi.acm.org/10.1145/42282.214090 db/journals/jacm/jacm35.html#GalilT88" }
+{ "id": "99", "dblpid": "series/synthesis/2009Weintraub", "title": "Jordan Canonical Form Theory and Practice", "authors": "Steven H. Weintraub", "misc": "2009-09-06 Jordan Canonical Form Theory and Practice http //dx.doi.org/10.2200/S00218ED1V01Y200908MAS006 http //dx.doi.org/10.2200/S00218ED1V01Y200908MAS006 2009 Synthesis Lectures on Mathematics & Statistics Morgan & Claypool Publishers" }
diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
index 9a8895b..f912c76 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
@@ -92,7 +92,6 @@
import org.apache.asterix.external.operators.FeedIntakeOperatorNodePushable;
import org.apache.asterix.external.util.ExternalDataConstants;
import org.apache.asterix.external.util.ExternalDataUtils;
-import org.apache.asterix.formats.nontagged.TypeTraitProvider;
import org.apache.asterix.lang.common.base.IReturningStatement;
import org.apache.asterix.lang.common.base.IRewriterFactory;
import org.apache.asterix.lang.common.base.IStatementRewriter;
@@ -214,7 +213,6 @@
import org.apache.hyracks.algebricks.runtime.writers.PrinterBasedWriterFactory;
import org.apache.hyracks.api.client.IClusterInfoCollector;
import org.apache.hyracks.api.client.IHyracksClientConnection;
-import org.apache.hyracks.api.dataflow.value.ITypeTraits;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.exceptions.SourceLocation;
import org.apache.hyracks.api.exceptions.Warning;
@@ -1105,31 +1103,6 @@
validateIndexKeyFields(stmtCreateIndex, keySourceIndicators, aRecordType, metaRecordType, indexFields,
indexFieldTypes);
- // Checks whether a user is trying to create an inverted secondary index on a
- // dataset
- // with a variable-length primary key.
- // Currently, we do not support this. Therefore, as a temporary solution, we
- // print an
- // error message and stop.
- if (indexType == IndexType.SINGLE_PARTITION_WORD_INVIX
- || indexType == IndexType.SINGLE_PARTITION_NGRAM_INVIX
- || indexType == IndexType.LENGTH_PARTITIONED_WORD_INVIX
- || indexType == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX) {
- List<List<String>> partitioningKeys = ds.getPrimaryKeys();
- for (List<String> partitioningKey : partitioningKeys) {
- IAType keyType = aRecordType.getSubFieldType(partitioningKey);
- ITypeTraits typeTrait = TypeTraitProvider.INSTANCE.getTypeTrait(keyType);
-
- // If it is not a fixed length
- if (!typeTrait.isFixedLength()) {
- throw new CompilationException(ErrorCode.COMPILATION_ERROR, sourceLoc,
- "The keyword or ngram index " + indexName + " cannot be created on the dataset "
- + datasetName + " due to its variable-length primary key field "
- + partitioningKey);
- }
-
- }
- }
Index newIndex = new Index(dataverseName, datasetName, indexName, indexType, indexFields,
keySourceIndicators, indexFieldTypes, stmtCreateIndex.getGramLength(), overridesFieldTypes,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key.1.ddl.sqlpp
new file mode 100644
index 0000000..4c9bae5
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key.1.ddl.sqlpp
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-delete-inverted-index-ngram-secondary-index.aql
+ * Description : This test is intended to test deletion from secondary ngram inverted index.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+
+use test;
+
+
+create type test.DBLPType as
+ closed {
+ id : bigint,
+ dblpid : string,
+ title : string,
+ authors : string,
+ misc : string
+};
+
+create dataset DBLP(DBLPType) primary key id;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key.2.update.sqlpp
new file mode 100644
index 0000000..08e9fec
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key.2.update.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-delete-inverted-index-ngram-secondary-index.aql
+ * Description : This test is intended to test deletion from secondary ngram inverted index.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+load dataset DBLP using localfs ((`path`=`asterix_nc1://data/dblp-small/dblp-small-id.txt`),(`format`=`delimited-text`),(`delimiter`=`:`)) pre-sorted;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key.3.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key.3.ddl.sqlpp
new file mode 100644
index 0000000..157d445
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key.3.ddl.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-delete-inverted-index-ngram-secondary-index.aql
+ * Description : This test is intended to test deletion from secondary ngram inverted index.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+create index ngram_index on DBLP (title) type ngram (3);
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key.4.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key.4.update.sqlpp
new file mode 100644
index 0000000..abf1233
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key.4.update.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-delete-inverted-index-ngram-secondary-index.aql
+ * Description : This test is intended to test deletion from secondary ngram inverted index.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+delete from DBLP
+ where id > 50;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key.5.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key.5.query.sqlpp
new file mode 100644
index 0000000..d3c8253
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key.5.query.sqlpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-delete-inverted-index-ngram-secondary-index.aql
+ * Description : This test is intended to test deletion from secondary ngram inverted index.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+select element o
+from DBLP as o
+where test.contains(o.title,'Multimedia')
+order by o.id
+;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key.1.ddl.sqlpp
new file mode 100644
index 0000000..808c7d4
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key.1.ddl.sqlpp
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-ngram-correlated-secondary-index-nullable.sqlpp
+ * Description : This test is intended to test insertion from correlated secondary ngram inverted index
+ * that are built on nullable fields.
+ * Expected Result : Success
+ * Date : June 21 2017
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+
+use test;
+
+
+create type test.DBLPType as
+ closed {
+ id : string,
+ dblpid : string,
+ title : string?,
+ authors : string,
+ misc : string
+};
+
+create dataset DBLP(DBLPType) primary key id
+with {
+ "merge-policy": {
+ "name": "correlated-prefix",
+ "parameters": { "max-mergable-component-size": 16384, "max-tolerance-component-count": 3 }
+ }
+};
+
+create dataset DBLP1(DBLPType) primary key id
+with {
+ "merge-policy": {
+ "name": "correlated-prefix",
+ "parameters": { "max-mergable-component-size": 16384, "max-tolerance-component-count": 3 }
+ }
+};
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key.2.update.sqlpp
new file mode 100644
index 0000000..2a7ce84
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key.2.update.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+
+load dataset DBLP using localfs ((`path`=`asterix_nc1://data/dblp-small/dblp-small-nulls-string-as-primary-key.adm`),(`format`=`adm`));
+
+insert into DBLP1
+select element {'id':o.id,'dblpid':o.dblpid,'title':o.title,'authors':o.authors,'misc':o.misc}
+from DBLP as o
+where test.contains(o.title,'Multimedia')
+order by o.id
+;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key.3.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key.3.ddl.sqlpp
new file mode 100644
index 0000000..2854258
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key.3.ddl.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+
+create index ngram_index on DBLP (title) type ngram (3);
+
+create index ngram_index1 on DBLP1 (title) type ngram (3);
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key.4.get.http b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key.4.get.http
new file mode 100644
index 0000000..f3a6e46
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key.4.get.http
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/connector?datasetName=DBLP&dataverseName=test
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key.5.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key.5.query.sqlpp
new file mode 100644
index 0000000..cfa4899
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key.5.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+
+select element o
+from DBLP1 as o
+where test.contains(o.title,'Multimedia')
+order by o.id
+;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.1.ddl.sqlpp
new file mode 100644
index 0000000..aacf42b
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.1.ddl.sqlpp
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-ngram-secondary-index-nullable.aql
+ * Description : This test is intended to test insertion from secondary ngram inverted index that are built on nullable fields.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+
+use test;
+
+
+create type test.DBLPType as
+ closed {
+ id : string,
+ dblpid : string,
+ title : string?,
+ authors : string,
+ misc : string
+};
+
+create dataset DBLP(DBLPType) primary key id;
+
+create dataset DBLP1(DBLPType) primary key id;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.2.update.sqlpp
new file mode 100644
index 0000000..0dd89bb
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.2.update.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-ngram-secondary-index-nullable.aql
+ * Description : This test is intended to test insertion from secondary ngram inverted index that are built on nullable fields.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+load dataset DBLP using localfs ((`path`=`asterix_nc1://data/dblp-small/dblp-small-nulls-string-as-primary-key.adm`),(`format`=`adm`));
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.3.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.3.ddl.sqlpp
new file mode 100644
index 0000000..b6edf47
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.3.ddl.sqlpp
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-ngram-secondary-index-nullable.aql
+ * Description : This test is intended to test insertion from secondary ngram inverted index that are built on nullable fields.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+create index ngram_index on DBLP (title) type ngram (3);
+
+create index ngram_index1 on DBLP1 (title) type ngram (3);
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.4.get.http b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.4.get.http
new file mode 100644
index 0000000..f3a6e46
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.4.get.http
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/connector?datasetName=DBLP&dataverseName=test
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.5.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.5.update.sqlpp
new file mode 100644
index 0000000..e027679
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.5.update.sqlpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-ngram-secondary-index-nullable.aql
+ * Description : This test is intended to test insertion from secondary ngram inverted index that are built on nullable fields.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+insert into DBLP1
+select element {'id':o.id,'dblpid':o.dblpid,'title':o.title,'authors':o.authors,'misc':o.misc}
+from DBLP as o
+where test.contains(o.title,'Multimedia')
+order by o.id
+;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.6.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.6.query.sqlpp
new file mode 100644
index 0000000..2a02c8e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.6.query.sqlpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-ngram-secondary-index-nullable.aql
+ * Description : This test is intended to test insertion from secondary ngram inverted index that are built on nullable fields.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+select element o
+from DBLP1 as o
+where test.contains(o.title,'Multimedia')
+order by o.id
+;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.1.ddl.sqlpp
new file mode 100644
index 0000000..369a3a9
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.1.ddl.sqlpp
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-ngram-secondary-index.aql
+ * Description : This test is intended to test insertion from secondary ngram inverted index.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+
+use test;
+
+
+create type test.DBLPType as
+ closed {
+ id : string,
+ dblpid : string,
+ title : string,
+ authors : string,
+ misc : string
+};
+
+create dataset DBLP(DBLPType) primary key id;
+
+create dataset DBLP1(DBLPType) primary key id;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.2.update.sqlpp
new file mode 100644
index 0000000..826bcdb
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.2.update.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-ngram-secondary-index.aql
+ * Description : This test is intended to test insertion from secondary ngram inverted index.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+load dataset DBLP using localfs ((`path`=`asterix_nc1://data/dblp-small/dblp-small-id.txt`),(`format`=`delimited-text`),(`delimiter`=`:`)) pre-sorted;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.3.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.3.ddl.sqlpp
new file mode 100644
index 0000000..d034b0f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.3.ddl.sqlpp
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-ngram-secondary-index.aql
+ * Description : This test is intended to test insertion from secondary ngram inverted index.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+create index ngram_index on DBLP (title) type ngram (3);
+
+create index ngram_index1 on DBLP1 (title) type ngram (3);
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.4.get.http b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.4.get.http
new file mode 100644
index 0000000..f3a6e46
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.4.get.http
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/connector?datasetName=DBLP&dataverseName=test
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.5.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.5.update.sqlpp
new file mode 100644
index 0000000..9e7c24e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.5.update.sqlpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-ngram-secondary-index.aql
+ * Description : This test is intended to test insertion from secondary ngram inverted index.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+insert into DBLP1
+select element {'id':o.id,'dblpid':o.dblpid,'title':o.title,'authors':o.authors,'misc':o.misc}
+from DBLP as o
+where test.contains(o.title,'Multimedia')
+order by o.id
+;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.6.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.6.query.sqlpp
new file mode 100644
index 0000000..c0f9b7c
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.6.query.sqlpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-ngram-secondary-index.aql
+ * Description : This test is intended to test insertion from secondary ngram inverted index.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+select element o
+from DBLP1 as o
+where test.contains(o.title,'Multimedia')
+order by o.id
+;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key.1.ddl.sqlpp
new file mode 100644
index 0000000..17633e8
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key.1.ddl.sqlpp
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-word-correlated-secondary-index.sqlpp
+ * Description : This test is intended to test insertion from correlated secondary keyword inverted index.
+ * Expected Result : Success
+ * Date : June 21 2017
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+
+use test;
+
+
+create type test.DBLPType as
+ closed {
+ id : string,
+ dblpid : string,
+ title : string,
+ authors : string,
+ misc : string
+};
+
+create dataset DBLP(DBLPType) primary key id
+with {
+ "merge-policy": {
+ "name": "correlated-prefix",
+ "parameters": { "max-mergable-component-size": 16384, "max-tolerance-component-count": 3 }
+ }
+};
+
+create dataset DBLP1(DBLPType) primary key id
+with {
+ "merge-policy": {
+ "name": "correlated-prefix",
+ "parameters": { "max-mergable-component-size": 16384, "max-tolerance-component-count": 3 }
+ }
+};
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key.2.update.sqlpp
new file mode 100644
index 0000000..cf9a86a
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key.2.update.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+use test;
+
+
+load dataset DBLP using localfs ((`path`=`asterix_nc1://data/dblp-small/dblp-small-id.txt`),(`format`=`delimited-text`),(`delimiter`=`:`)) pre-sorted;
+
+insert into DBLP1
+select element {'id':o.id,'dblpid':o.dblpid,'title':o.title,'authors':o.authors,'misc':o.misc}
+from DBLP as o
+order by o.id
+;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key.3.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key.3.ddl.sqlpp
new file mode 100644
index 0000000..ab2be5c
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key.3.ddl.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+
+create index keyword_index on DBLP (title) type keyword;
+
+create index keyword_index1 on DBLP1 (title) type keyword;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key.4.get.http b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key.4.get.http
new file mode 100644
index 0000000..f3a6e46
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key.4.get.http
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/connector?datasetName=DBLP&dataverseName=test
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key.5.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key.5.query.sqlpp
new file mode 100644
index 0000000..34f332c
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key.5.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+
+select element o
+from DBLP1 as o
+with jacc as test.`similarity-jaccard-check`(test.`word-tokens`(o.title),test.`word-tokens`('Transactions for Cooperative Environments'),0.500000f)
+where jacc[0]
+;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key.1.ddl.sqlpp
new file mode 100644
index 0000000..eea34f1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key.1.ddl.sqlpp
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-word-secondary-index-nullable.aql
+ * Description : This test is intended to test insertion from secondary keyword inverted index that are built on nullable fields.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+
+use test;
+
+
+create type test.DBLPType as
+ closed {
+ id : string,
+ dblpid : string,
+ title : string?,
+ authors : string,
+ misc : string
+};
+
+create dataset DBLP(DBLPType) primary key id;
+
+create dataset DBLP1(DBLPType) primary key id;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key.2.update.sqlpp
new file mode 100644
index 0000000..da667ae
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key.2.update.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-word-secondary-index-nullable.aql
+ * Description : This test is intended to test insertion from secondary keyword inverted index that are built on nullable fields.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+load dataset DBLP using localfs ((`path`=`asterix_nc1://data/dblp-small/dblp-small-nulls-string-as-primary-key.adm`),(`format`=`adm`));
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key.3.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key.3.ddl.sqlpp
new file mode 100644
index 0000000..c95f616
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key.3.ddl.sqlpp
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-word-secondary-index-nullable.aql
+ * Description : This test is intended to test insertion from secondary keyword inverted index that are built on nullable fields.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+create index keyword_index on DBLP (title) type keyword;
+
+create index keyword_index1 on DBLP1 (title) type keyword;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key.4.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key.4.update.sqlpp
new file mode 100644
index 0000000..abd20e2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key.4.update.sqlpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-word-secondary-index-nullable.aql
+ * Description : This test is intended to test insertion from secondary keyword inverted index that are built on nullable fields.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+insert into DBLP1
+select element {'id':o.id,'dblpid':o.dblpid,'title':o.title,'authors':o.authors,'misc':o.misc}
+from DBLP as o
+order by o.id
+;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key.5.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key.5.query.sqlpp
new file mode 100644
index 0000000..aadbeb6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key.5.query.sqlpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-word-secondary-index-nullable.aql
+ * Description : This test is intended to test insertion from secondary keyword inverted index that are built on nullable fields.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+select element o
+from DBLP1 as o
+with jacc as test.`similarity-jaccard-check`(test.`word-tokens`(o.title),test.`word-tokens`('Transactions for Cooperative Environments'),0.500000f)
+where jacc[0]
+;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.1.ddl.sqlpp
new file mode 100644
index 0000000..a0227f9
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.1.ddl.sqlpp
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-word-secondary-index.aql
+ * Description : This test is intended to test insertion from secondary keyword inverted index.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+
+use test;
+
+
+create type test.DBLPType as
+ closed {
+ id : string,
+ dblpid : string,
+ title : string,
+ authors : string,
+ misc : string
+};
+
+create dataset DBLP(DBLPType) primary key id;
+
+create dataset DBLP1(DBLPType) primary key id;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.2.update.sqlpp
new file mode 100644
index 0000000..b37beb2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.2.update.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-word-secondary-index.aql
+ * Description : This test is intended to test insertion from secondary keyword inverted index.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+load dataset DBLP using localfs ((`path`=`asterix_nc1://data/dblp-small/dblp-small-id.txt`),(`format`=`delimited-text`),(`delimiter`=`:`)) pre-sorted;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.3.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.3.ddl.sqlpp
new file mode 100644
index 0000000..27c3e6c
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.3.ddl.sqlpp
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-word-secondary-index.aql
+ * Description : This test is intended to test insertion from secondary keyword inverted index.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+create index keyword_index on DBLP (title) type keyword;
+
+create index keyword_index1 on DBLP1 (title) type keyword;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.4.get.http b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.4.get.http
new file mode 100644
index 0000000..f3a6e46
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.4.get.http
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/connector?datasetName=DBLP&dataverseName=test
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.5.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.5.update.sqlpp
new file mode 100644
index 0000000..460e51e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.5.update.sqlpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-word-secondary-index.aql
+ * Description : This test is intended to test insertion from secondary keyword inverted index.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+insert into DBLP1
+select element {'id':o.id,'dblpid':o.dblpid,'title':o.title,'authors':o.authors,'misc':o.misc}
+from DBLP as o
+order by o.id
+;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.6.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.6.query.sqlpp
new file mode 100644
index 0000000..cde417e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.6.query.sqlpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : scan-insert-inverted-index-word-secondary-index.aql
+ * Description : This test is intended to test insertion from secondary keyword inverted index.
+ * Expected Result : Success
+ * Date : March 31 2013
+ */
+
+use test;
+
+
+select element o
+from DBLP1 as o
+with jacc as test.`similarity-jaccard-check`(test.`word-tokens`(o.title),test.`word-tokens`('Transactions for Cooperative Environments'),0.500000f)
+where jacc[0]
+;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/fuzzyjoin/dblp-string-as-primary-key/dblp-string-as-primary-key.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/fuzzyjoin/dblp-string-as-primary-key/dblp-string-as-primary-key.1.ddl.sqlpp
new file mode 100644
index 0000000..8ae179e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/fuzzyjoin/dblp-string-as-primary-key/dblp-string-as-primary-key.1.ddl.sqlpp
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse fuzzyjoin if exists;
+create dataverse fuzzyjoin;
+
+use fuzzyjoin;
+
+
+create type DBLPType as
+{
+ id : string,
+ dblpid : string,
+ title : string,
+ authors : string,
+ misc : string
+};
+
+create dataset DBLP(DBLPType) primary key id;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/fuzzyjoin/dblp-string-as-primary-key/dblp-string-as-primary-key.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/fuzzyjoin/dblp-string-as-primary-key/dblp-string-as-primary-key.2.update.sqlpp
new file mode 100644
index 0000000..51c5bb1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/fuzzyjoin/dblp-string-as-primary-key/dblp-string-as-primary-key.2.update.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use fuzzyjoin;
+
+load dataset DBLP using localfs ((`path`=`asterix_nc1://data/dblp-small/dblp-small-id.txt`),(`format`=`delimited-text`),(`delimiter`=`:`)) pre-sorted;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/fuzzyjoin/dblp-string-as-primary-key/dblp-string-as-primary-key.3.get.http b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/fuzzyjoin/dblp-string-as-primary-key/dblp-string-as-primary-key.3.get.http
new file mode 100644
index 0000000..e9e914e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/fuzzyjoin/dblp-string-as-primary-key/dblp-string-as-primary-key.3.get.http
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/connector?datasetName=DBLP&dataverseName=fuzzyjoin
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/fuzzyjoin/dblp-string-as-primary-key/dblp-string-as-primary-key.4.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/fuzzyjoin/dblp-string-as-primary-key/dblp-string-as-primary-key.4.query.sqlpp
new file mode 100644
index 0000000..0d3f4ca
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/fuzzyjoin/dblp-string-as-primary-key/dblp-string-as-primary-key.4.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use fuzzyjoin;
+
+SELECT VALUE id FROM DBLP dblp where ftcontains(dblp.title, ["storage", "ExODuS"], {"mode" : "all"});
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key.1.adm
new file mode 100644
index 0000000..a9a79c0
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key/scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key.1.adm
@@ -0,0 +1 @@
+{ "id": 4, "dblpid": "books/acm/kim95/ChristodoulakisK95", "title": "Multimedia Information Systems Issues and Approaches.", "authors": "Stavros Christodoulakis Leonidas Koveos", "misc": "2002-01-03 318-337 1995 Modern Database Systems db/books/collections/kim95.html#ChristodoulakisK95" }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.4.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.4.adm
new file mode 100644
index 0000000..9671834
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.4.adm
@@ -0,0 +1 @@
+{"keys":"id","type":{"type":"org.apache.asterix.om.types.ARecordType","name":"DBLPType","open":false,"fields":[{"id":{"type":"AString"}},{"dblpid":{"type":"AString"}},{"title":{"type":"org.apache.asterix.om.types.AUnionType","fields":[{"type":"AString"},{"type":"ANULL"},{"type":"AMISSING"}]}},{"authors":{"type":"AString"}},{"misc":{"type":"AString"}}]},"splits":[{"ip":"127.0.0.1","path":"storage/partition_0/test/DBLP/0/DBLP"},{"ip":"127.0.0.1","path":"storage/partition_1/test/DBLP/0/DBLP"},{"ip":"127.0.0.1","path":"storage/partition_2/test/DBLP/0/DBLP"},{"ip":"127.0.0.1","path":"storage/partition_3/test/DBLP/0/DBLP"}]}
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.6.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.6.adm
new file mode 100644
index 0000000..24eea46
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key.6.adm
@@ -0,0 +1,3 @@
+{ "id": "4", "dblpid": "books/acm/kim95/ChristodoulakisK95", "title": "Multimedia Information Systems Issues and Approaches.", "authors": "Stavros Christodoulakis Leonidas Koveos", "misc": "2002-01-03 318-337 1995 Modern Database Systems db/books/collections/kim95.html#ChristodoulakisK95" }
+{ "id": "89", "dblpid": "conf/icip/SchonfeldL98", "title": "VORTEX Video Retrieval and Tracking from Compressed Multimedia Databases.", "authors": "Dan Schonfeld Dan Lelescu", "misc": "2002-11-05 123-127 1998 ICIP (3) db/conf/icip/icip1998-3.html#SchonfeldL98" }
+{ "id": "90", "dblpid": "conf/hicss/SchonfeldL99", "title": "VORTEX Video Retrieval and Tracking from Compressed Multimedia Databases ¾ Visual Search Engine.", "authors": "Dan Schonfeld Dan Lelescu", "misc": "2002-01-03 1999 HICSS http //computer.org/proceedings/hicss/0001/00013/00013006abs.htm db/conf/hicss/hicss1999-3.html#SchonfeldL99" }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.4.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.4.adm
new file mode 100644
index 0000000..e5d5f6e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.4.adm
@@ -0,0 +1 @@
+{"keys":"id","type":{"type":"org.apache.asterix.om.types.ARecordType","name":"DBLPType","open":false,"fields":[{"id":{"type":"AString"}},{"dblpid":{"type":"AString"}},{"title":{"type":"AString"}},{"authors":{"type":"AString"}},{"misc":{"type":"AString"}}]},"splits":[{"ip":"127.0.0.1","path":"storage/partition_0/test/DBLP/0/DBLP"},{"ip":"127.0.0.1","path":"storage/partition_1/test/DBLP/0/DBLP"},{"ip":"127.0.0.1","path":"storage/partition_2/test/DBLP/0/DBLP"},{"ip":"127.0.0.1","path":"storage/partition_3/test/DBLP/0/DBLP"}]}
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.6.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.6.adm
new file mode 100644
index 0000000..24eea46
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key/scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key.6.adm
@@ -0,0 +1,3 @@
+{ "id": "4", "dblpid": "books/acm/kim95/ChristodoulakisK95", "title": "Multimedia Information Systems Issues and Approaches.", "authors": "Stavros Christodoulakis Leonidas Koveos", "misc": "2002-01-03 318-337 1995 Modern Database Systems db/books/collections/kim95.html#ChristodoulakisK95" }
+{ "id": "89", "dblpid": "conf/icip/SchonfeldL98", "title": "VORTEX Video Retrieval and Tracking from Compressed Multimedia Databases.", "authors": "Dan Schonfeld Dan Lelescu", "misc": "2002-11-05 123-127 1998 ICIP (3) db/conf/icip/icip1998-3.html#SchonfeldL98" }
+{ "id": "90", "dblpid": "conf/hicss/SchonfeldL99", "title": "VORTEX Video Retrieval and Tracking from Compressed Multimedia Databases ¾ Visual Search Engine.", "authors": "Dan Schonfeld Dan Lelescu", "misc": "2002-01-03 1999 HICSS http //computer.org/proceedings/hicss/0001/00013/00013006abs.htm db/conf/hicss/hicss1999-3.html#SchonfeldL99" }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key.1.adm
new file mode 100644
index 0000000..40fd3b5
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key.1.adm
@@ -0,0 +1 @@
+{ "id": "9", "dblpid": "books/acm/kim95/Kaiser95", "title": "Cooperative Transactions for Multiuser Environments.", "authors": "Gail E. Kaiser", "misc": "2002-01-03 409-433 1995 Modern Database Systems db/books/collections/kim95.html#Kaiser95" }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.4.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.4.adm
new file mode 100644
index 0000000..e5d5f6e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.4.adm
@@ -0,0 +1 @@
+{"keys":"id","type":{"type":"org.apache.asterix.om.types.ARecordType","name":"DBLPType","open":false,"fields":[{"id":{"type":"AString"}},{"dblpid":{"type":"AString"}},{"title":{"type":"AString"}},{"authors":{"type":"AString"}},{"misc":{"type":"AString"}}]},"splits":[{"ip":"127.0.0.1","path":"storage/partition_0/test/DBLP/0/DBLP"},{"ip":"127.0.0.1","path":"storage/partition_1/test/DBLP/0/DBLP"},{"ip":"127.0.0.1","path":"storage/partition_2/test/DBLP/0/DBLP"},{"ip":"127.0.0.1","path":"storage/partition_3/test/DBLP/0/DBLP"}]}
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.6.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.6.adm
new file mode 100644
index 0000000..40fd3b5
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/scan-insert-inverted-index-word-secondary-index-string-as-primary-key/scan-insert-inverted-index-word-secondary-index-string-as-primary-key.6.adm
@@ -0,0 +1 @@
+{ "id": "9", "dblpid": "books/acm/kim95/Kaiser95", "title": "Cooperative Transactions for Multiuser Environments.", "authors": "Gail E. Kaiser", "misc": "2002-01-03 409-433 1995 Modern Database Systems db/books/collections/kim95.html#Kaiser95" }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/dblp-string-as-primary-key/dblp-string-as-primary-key.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/dblp-string-as-primary-key/dblp-string-as-primary-key.3.adm
new file mode 100644
index 0000000..c08b4db
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/dblp-string-as-primary-key/dblp-string-as-primary-key.3.adm
@@ -0,0 +1 @@
+{"keys":"id","type":{"type":"org.apache.asterix.om.types.ARecordType","name":"DBLPType","open":true,"fields":[{"id":{"type":"AString"}},{"dblpid":{"type":"AString"}},{"title":{"type":"AString"}},{"authors":{"type":"AString"}},{"misc":{"type":"AString"}}]},"splits":[{"ip":"127.0.0.1","path":"storage/partition_0/fuzzyjoin/DBLP/0/DBLP"},{"ip":"127.0.0.1","path":"storage/partition_1/fuzzyjoin/DBLP/0/DBLP"},{"ip":"127.0.0.1","path":"storage/partition_2/fuzzyjoin/DBLP/0/DBLP"},{"ip":"127.0.0.1","path":"storage/partition_3/fuzzyjoin/DBLP/0/DBLP"}]}
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/dblp-string-as-primary-key/dblp-string-as-primary-key.4.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/dblp-string-as-primary-key/dblp-string-as-primary-key.4.adm
new file mode 100644
index 0000000..bffa712
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/dblp-string-as-primary-key/dblp-string-as-primary-key.4.adm
@@ -0,0 +1 @@
+"38"
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index 854028e..5eb0291 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -3970,7 +3970,6 @@
<test-case FilePath="ddl/create-index">
<compilation-unit name="create-inverted-index-with-variable-length-primary-key">
<output-dir compare="Text">create-inverted-index-with-variable-length-primary-key</output-dir>
- <expected-error>ASX1079: Compilation error: The keyword or ngram index MyIndex cannot be created on the dataset MyDataset due to its variable-length primary key field [myKey] (in line 33, at column 1)</expected-error>
</compilation-unit>
</test-case>
<test-case FilePath="ddl">
@@ -4536,6 +4535,12 @@
</compilation-unit>
</test-case>
<test-case FilePath="dml">
+ <compilation-unit name="scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key">
+ <output-dir compare="Text">scan-insert-inverted-index-ngram-secondary-index-string-as-primary-key</output-dir>
+ </compilation-unit>
+ </test-case>
+
+ <test-case FilePath="dml">
<compilation-unit name="scan-insert-inverted-index-ngram-correlated-secondary-index">
<output-dir compare="Text">scan-insert-inverted-index-ngram-secondary-index</output-dir>
</compilation-unit>
@@ -4546,26 +4551,51 @@
</compilation-unit>
</test-case>
<test-case FilePath="dml">
+ <compilation-unit name="scan-insert-inverted-index-word-secondary-index-string-as-primary-key">
+ <output-dir compare="Text">scan-insert-inverted-index-word-secondary-index-string-as-primary-key</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="dml">
<compilation-unit name="scan-insert-inverted-index-word-correlated-secondary-index">
<output-dir compare="Text">scan-insert-inverted-index-word-secondary-index</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="dml">
+ <compilation-unit name="scan-insert-inverted-index-word-correlated-secondary-index-string-as-primary-key">
+ <output-dir compare="Text">scan-insert-inverted-index-word-secondary-index-string-as-primary-key</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="dml">
<compilation-unit name="scan-insert-inverted-index-ngram-secondary-index-nullable">
<output-dir compare="Text">scan-insert-inverted-index-ngram-secondary-index-nullable</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="dml">
+ <compilation-unit name="scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key">
+ <output-dir compare="Text">scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="dml">
<compilation-unit name="scan-insert-inverted-index-ngram-correlated-secondary-index-nullable">
<output-dir compare="Text">scan-insert-inverted-index-ngram-secondary-index-nullable</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="dml">
+ <compilation-unit name="scan-insert-inverted-index-ngram-correlated-secondary-index-nullable-string-as-primary-key">
+ <output-dir compare="Text">scan-insert-inverted-index-ngram-secondary-index-nullable-string-as-primary-key</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="dml">
<compilation-unit name="scan-insert-inverted-index-word-secondary-index-nullable">
<output-dir compare="Text">scan-insert-inverted-index-word-secondary-index-nullable</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="dml">
+ <compilation-unit name="scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key">
+ <output-dir compare="Text">scan-insert-inverted-index-word-secondary-index-nullable-string-as-primary-key</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="dml">
<compilation-unit name="scan-insert-inverted-index-word-correlated-secondary-index-nullable">
<output-dir compare="Text">scan-insert-inverted-index-word-secondary-index-nullable</output-dir>
</compilation-unit>
@@ -4576,6 +4606,11 @@
</compilation-unit>
</test-case>
<test-case FilePath="dml">
+ <compilation-unit name="scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key">
+ <output-dir compare="Text">scan-delete-inverted-index-ngram-secondary-index-string-as-primary-key</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="dml">
<compilation-unit name="scan-delete-inverted-index-ngram-correlated-secondary-index">
<output-dir compare="Text">scan-delete-inverted-index-ngram-secondary-index</output-dir>
</compilation-unit>
@@ -4848,6 +4883,11 @@
<output-dir compare="Text">basic-1_1_3</output-dir>
</compilation-unit>
</test-case>
+ <test-case FilePath="fuzzyjoin">
+ <compilation-unit name="dblp-string-as-primary-key">
+ <output-dir compare="Text">dblp-string-as-primary-key</output-dir>
+ </compilation-unit>
+ </test-case>
</test-group>
<!--
<test-group name="flwor">
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java
index 87c7d87..2171974 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java
@@ -708,6 +708,15 @@
return ((InternalDatasetDetails) getDatasetDetails()).getPartitioningKey();
}
+ /**
+ @return the array of type traits that contains the following type traits in order
+ 1) the primary keys,
+ 2) the query record type,
+ 3) the metadata type trait if the dataset has metadata
+ */
+ // ToDo: returning such an array can be confusing because it may contain the metadata type or not.
+ // instead of returning an array, create a new class that contains 1) a type trait array for the primary keys,
+ // 2) the record type trait, and 3) an nullable meta type trait
public ITypeTraits[] getPrimaryTypeTraits(MetadataProvider metadataProvider, ARecordType recordType,
ARecordType metaType) throws AlgebricksException {
IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider();
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/InvertedIndexResourceFactoryProvider.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/InvertedIndexResourceFactoryProvider.java
index f1fe625..f2b7558 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/InvertedIndexResourceFactoryProvider.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/InvertedIndexResourceFactoryProvider.java
@@ -127,6 +127,9 @@
invertedIndexFieldsForNonBulkLoadOps, bloomFilterFalsePositiveRate);
}
+ // Returns an array of the type traits of the inverted list elements
+ // It contains the primary key(s) type traits of the corresponding indexed rows,
+ // and those primary keys are the elements in the inverted list.
private static ITypeTraits[] getInvListTypeTraits(MetadataProvider metadataProvider, Dataset dataset,
ARecordType recordType, ARecordType metaType) throws AlgebricksException {
ITypeTraits[] primaryTypeTraits = dataset.getPrimaryTypeTraits(metadataProvider, recordType, metaType);
diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
index 8dfad94..a93da48 100644
--- a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
+++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
@@ -158,6 +158,7 @@
public static final int ERROR_PRINTING_PLAN = 122;
public static final int INSUFFICIENT_MEMORY = 123;
public static final int PARSING_ERROR = 124;
+ public static final int INVALID_INVERTED_LIST_TYPE_TRAITS = 125;
// Compilation error codes.
public static final int RULECOLLECTION_NOT_INSTANCE_OF_LIST = 10000;
diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
index c2eee62..01895a6 100644
--- a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
+++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
@@ -142,6 +142,7 @@
122 = Encountered an error while printing the plan
123 = Insufficient memory is provided for the join operators, please increase the join memory budget.
124 = Parsing error at %1$s line %2$s field %3$s: %4$s
+125 = Invalid inverted list type traits: %1$s
10000 = The given rule collection %1$s is not an instance of the List class.
10001 = Cannot compose partition constraint %1$s with %2$s
diff --git a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/VarLengthTypeTrait.java b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/VarLengthTypeTrait.java
index 1b27784..b6e4d94 100644
--- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/VarLengthTypeTrait.java
+++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/VarLengthTypeTrait.java
@@ -39,6 +39,11 @@
@Override
public int getFixedLength() {
+ // This method should never be called: here we are getting the fixed length of a variable length field
+ // A better way to handle this method is to throw an exception
+ // such as UnsupportedOperationException("try to get the fixed length of a variable length type trait"),
+ // however, since this method is somehow fundamental and pretty old, we want to keep it this way
+ // to avoid potential issues.
return 0;
}
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/buffermanager/SingleFrameBufferManager.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/buffermanager/SingleFrameBufferManager.java
new file mode 100644
index 0000000..2b4ffc8
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/buffermanager/SingleFrameBufferManager.java
@@ -0,0 +1,62 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.dataflow.std.buffermanager;
+
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+// This SingleFrameBufferManager is used for **scan** purpose.
+// When scanning an inverted index, we will load the pages from disk to memory one by one,
+// and this single frame buffer manager will allocate only one frame at the same time.
+//
+// Note: this buffer manager is NOT thread-safe
+public class SingleFrameBufferManager implements ISimpleFrameBufferManager {
+ boolean isAcquired = false;
+ ByteBuffer buffer = null;
+
+ @Override
+ public ByteBuffer acquireFrame(int frameSize) throws HyracksDataException {
+ if (buffer == null) {
+ buffer = ByteBuffer.allocate(frameSize);
+ }
+
+ if (isAcquired) {
+ return null;
+ } else {
+ if (buffer.capacity() >= frameSize) {
+ isAcquired = true;
+ buffer.clear();
+ Arrays.fill(buffer.array(), (byte) 0);
+ return buffer;
+ } else {
+ throw new HyracksDataException("Frame size changed");
+ }
+ }
+ }
+
+ @Override
+ public void releaseFrame(ByteBuffer frame) {
+ buffer.clear();
+ isAcquired = false;
+ }
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInPlaceInvertedIndex.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInPlaceInvertedIndex.java
index ed4287d..099eda5 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInPlaceInvertedIndex.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInPlaceInvertedIndex.java
@@ -30,7 +30,7 @@
*
* @throws HyracksDataException
*/
- InvertedListCursor createInvertedListCursor(IHyracksTaskContext ctx) throws HyracksDataException;
+ IInvertedListCursor createInvertedListCursor(IHyracksTaskContext ctx) throws HyracksDataException;
/**
* Creates an inverted-list-range-search cursor. This cursor is mainly used to conduct
@@ -38,7 +38,7 @@
*
* @throws HyracksDataException
*/
- InvertedListCursor createInvertedListRangeSearchCursor(IIndexCursorStats stats) throws HyracksDataException;
+ IInvertedListCursor createInvertedListRangeSearchCursor(IIndexCursorStats stats) throws HyracksDataException;
/**
* Opens an inverted list cursor
@@ -51,6 +51,6 @@
* the operation context under which the cursor is to be open
* @throws HyracksDataException
*/
- void openInvertedListCursor(InvertedListCursor listCursor, ITupleReference searchKey, IIndexOperationContext ictx)
+ void openInvertedListCursor(IInvertedListCursor listCursor, ITupleReference searchKey, IIndexOperationContext ictx)
throws HyracksDataException;
}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexAccessor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexAccessor.java
index fe29c5f..f0ff2bb 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexAccessor.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexAccessor.java
@@ -26,9 +26,9 @@
import org.apache.hyracks.storage.common.ISearchPredicate;
public interface IInvertedIndexAccessor extends IIndexAccessor {
- public InvertedListCursor createInvertedListCursor() throws HyracksDataException;
+ public IInvertedListCursor createInvertedListCursor() throws HyracksDataException;
- public void openInvertedListCursor(InvertedListCursor listCursor, ITupleReference searchKey)
+ public void openInvertedListCursor(IInvertedListCursor listCursor, ITupleReference searchKey)
throws HyracksDataException;
public IIndexCursor createRangeSearchCursor() throws HyracksDataException;
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedListBuilder.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedListBuilder.java
index ce2af37..f157213 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedListBuilder.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedListBuilder.java
@@ -30,6 +30,8 @@
public void setTargetBuffer(byte[] targetBuf, int startPos);
+ public boolean isFixedSize();
+
public int getListSize();
public int getPos();
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedListBuilderFactory.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedListBuilderFactory.java
index 6aed487..defd8a1 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedListBuilderFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedListBuilderFactory.java
@@ -19,6 +19,8 @@
package org.apache.hyracks.storage.am.lsm.invertedindex.api;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
public interface IInvertedListBuilderFactory {
- public IInvertedListBuilder create();
+ public IInvertedListBuilder create() throws HyracksDataException;
}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedListCursor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedListCursor.java
new file mode 100644
index 0000000..77f68b4
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedListCursor.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.lsm.invertedindex.api;
+
+import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
+import org.apache.hyracks.storage.common.IIndexCursor;
+import org.apache.hyracks.storage.common.MultiComparator;
+
+/**
+ * A cursor that reads an inverted list.
+ */
+public interface IInvertedListCursor extends IIndexCursor, Comparable<IInvertedListCursor> {
+
+ /**
+ * Conducts any operation that is required before loading pages.
+ */
+ void prepareLoadPages() throws HyracksDataException;
+
+ /**
+ * Loads one or more pages to memory.
+ */
+ void loadPages() throws HyracksDataException;
+
+ /**
+ * Unloads currently loaded pages in the memory.
+ */
+ void unloadPages() throws HyracksDataException;
+
+ /**
+ * Gets the cardinality of elements in the cursor.
+ */
+ int size() throws HyracksDataException;
+
+ /**
+ * Checks whether the given tuple is contained in the cursor.
+ *
+ * Note that this method is used when merging two sorted list, that means we can move the internal cursor of a list
+ * in one-direction: the cursor won't go back.
+ * A better name of this method might be moveCursorForwardToCheckContainsKey()
+ */
+ boolean containsKey(ITupleReference searchTuple, MultiComparator invListCmp) throws HyracksDataException;
+
+ /**
+ * Prints all elements in the cursor (debug method).
+ */
+ @SuppressWarnings("rawtypes")
+ String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException;
+
+ /**
+ * Prints the current element in the cursor (debug method).
+ */
+ @SuppressWarnings("rawtypes")
+ String printCurrentElement(ISerializerDeserializer[] serdes) throws HyracksDataException;
+}
\ No newline at end of file
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedListSearchResultFrameTupleAppender.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedListSearchResultFrameTupleAppender.java
new file mode 100644
index 0000000..562d4b2
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedListSearchResultFrameTupleAppender.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.lsm.invertedindex.api;
+
+import java.nio.ByteBuffer;
+
+public interface IInvertedListSearchResultFrameTupleAppender {
+
+ public void reset(ByteBuffer buffer);
+
+ public void reset(ByteBuffer buffer, boolean clear, int tupleCount, int tupleDataEndOffset);
+
+ public boolean append(byte[] bytes, int offset, int length);
+
+ public boolean append(int fieldValue);
+
+ public boolean append(long fieldValue);
+
+ public boolean append(char fieldValue);
+
+ public boolean append(byte fieldValue);
+
+ // returns true if a tuple of the given length fits, returns false otherwise
+ public boolean hasSpace(int length);
+
+ public void incrementTupleCount(int count);
+
+ public int getTupleCount();
+
+ public ByteBuffer getBuffer();
+
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedListTupleReference.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedListTupleReference.java
new file mode 100644
index 0000000..2c109a5
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/IInvertedListTupleReference.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.lsm.invertedindex.api;
+
+import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
+
+public interface IInvertedListTupleReference extends ITupleReference {
+ void reset(byte[] data, int startOff);
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/InvertedListCursor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/InvertedListCursor.java
deleted file mode 100644
index b6d5902..0000000
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/api/InvertedListCursor.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hyracks.storage.am.lsm.invertedindex.api;
-
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
-import org.apache.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndexSearchCursorInitialState;
-import org.apache.hyracks.storage.common.EnforcedIndexCursor;
-import org.apache.hyracks.storage.common.ICursorInitialState;
-import org.apache.hyracks.storage.common.ISearchPredicate;
-import org.apache.hyracks.storage.common.MultiComparator;
-
-/**
- * A cursor that reads an inverted list.
- */
-public abstract class InvertedListCursor extends EnforcedIndexCursor implements Comparable<InvertedListCursor> {
-
- /**
- * Opens an inverted list cursor.
- */
- protected void doOpen(ICursorInitialState initialState, ISearchPredicate searchPred) throws HyracksDataException {
- // If the given cursor state has page ids and the number of elements for the given inverted list,
- // this should be set. Otherwise (for in-memory cursor), doesn't need to do anything.
- int invListStartPageId = LSMInvertedIndexSearchCursorInitialState.INVALID_VALUE;
- int invListEndPageId = LSMInvertedIndexSearchCursorInitialState.INVALID_VALUE;
- int invListStartOffset = LSMInvertedIndexSearchCursorInitialState.INVALID_VALUE;
- int invListNumElements = LSMInvertedIndexSearchCursorInitialState.INVALID_VALUE;
- if (initialState instanceof LSMInvertedIndexSearchCursorInitialState) {
- LSMInvertedIndexSearchCursorInitialState invIndexInitialState =
- (LSMInvertedIndexSearchCursorInitialState) initialState;
- invListStartPageId = invIndexInitialState.getInvListStartPageId();
- invListEndPageId = invIndexInitialState.getInvListEndPageId();
- invListStartOffset = invIndexInitialState.getInvListStartOffset();
- invListNumElements = invIndexInitialState.getInvListNumElements();
- }
- if (invListNumElements != LSMInvertedIndexSearchCursorInitialState.INVALID_VALUE) {
- setInvListInfo(invListStartPageId, invListEndPageId, invListStartOffset, invListNumElements);
- }
- }
-
- /**
- * Sets the disk-based inverted list information such as page ids and the number of elements
- * for the given inverted list.
- */
- protected abstract void setInvListInfo(int startPageId, int endPageId, int startOff, int numElements)
- throws HyracksDataException;
-
- /**
- * Conducts any operation that is required before loading pages.
- */
- public abstract void prepareLoadPages() throws HyracksDataException;
-
- /**
- * Loads one or more pages to memory.
- */
- public abstract void loadPages() throws HyracksDataException;
-
- /**
- * Unloads currently loaded pages in the memory.
- */
- public abstract void unloadPages() throws HyracksDataException;
-
- /**
- * Gets the cardinality of elements in the cursor.
- */
- public abstract int size() throws HyracksDataException;
-
- /**
- * Checks whether the given tuple is contained in the cursor.
- */
- public abstract boolean containsKey(ITupleReference searchTuple, MultiComparator invListCmp)
- throws HyracksDataException;
-
- /**
- * Prints all elements in the cursor (debug method).
- */
- @SuppressWarnings("rawtypes")
- public abstract String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException;
-
- /**
- * Prints the current element in the cursor (debug method).
- */
- @SuppressWarnings("rawtypes")
- public abstract String printCurrentElement(ISerializerDeserializer[] serdes) throws HyracksDataException;
-}
\ No newline at end of file
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/AbstractInvertedListCursor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/AbstractInvertedListCursor.java
new file mode 100644
index 0000000..295d942
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/AbstractInvertedListCursor.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.lsm.invertedindex.impls;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
+import org.apache.hyracks.storage.common.EnforcedIndexCursor;
+import org.apache.hyracks.storage.common.ICursorInitialState;
+import org.apache.hyracks.storage.common.ISearchPredicate;
+
+/**
+ * A cursor that reads an inverted list.
+ */
+public abstract class AbstractInvertedListCursor extends EnforcedIndexCursor implements IInvertedListCursor {
+
+ /**
+ * Opens an inverted list cursor.
+ */
+ protected void doOpen(ICursorInitialState initialState, ISearchPredicate searchPred) throws HyracksDataException {
+ // If the given cursor state has page ids and the number of elements for the given inverted list,
+ // this should be set. Otherwise (for in-memory cursor), doesn't need to do anything.
+ int invListStartPageId = LSMInvertedIndexSearchCursorInitialState.INVALID_VALUE;
+ int invListEndPageId = LSMInvertedIndexSearchCursorInitialState.INVALID_VALUE;
+ int invListStartOffset = LSMInvertedIndexSearchCursorInitialState.INVALID_VALUE;
+ int invListNumElements = LSMInvertedIndexSearchCursorInitialState.INVALID_VALUE;
+ if (initialState instanceof LSMInvertedIndexSearchCursorInitialState) {
+ LSMInvertedIndexSearchCursorInitialState invIndexInitialState =
+ (LSMInvertedIndexSearchCursorInitialState) initialState;
+ invListStartPageId = invIndexInitialState.getInvListStartPageId();
+ invListEndPageId = invIndexInitialState.getInvListEndPageId();
+ invListStartOffset = invIndexInitialState.getInvListStartOffset();
+ invListNumElements = invIndexInitialState.getInvListNumElements();
+ }
+ if (invListNumElements != LSMInvertedIndexSearchCursorInitialState.INVALID_VALUE) {
+ setInvListInfo(invListStartPageId, invListEndPageId, invListStartOffset, invListNumElements);
+ }
+ }
+
+ protected abstract void setInvListInfo(int startPageId, int endPageId, int startOff, int numElements)
+ throws HyracksDataException;
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/AbstractOnDiskInvertedListCursor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/AbstractOnDiskInvertedListCursor.java
new file mode 100644
index 0000000..7ec6ce2
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/AbstractOnDiskInvertedListCursor.java
@@ -0,0 +1,337 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.lsm.invertedindex.impls;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
+import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.api.exceptions.ErrorCode;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.util.HyracksConstants;
+import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
+import org.apache.hyracks.dataflow.common.utils.TaskUtil;
+import org.apache.hyracks.dataflow.std.buffermanager.ISimpleFrameBufferManager;
+import org.apache.hyracks.dataflow.std.buffermanager.SingleFrameBufferManager;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListTupleReference;
+import org.apache.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexUtils;
+import org.apache.hyracks.storage.common.IIndexCursorStats;
+import org.apache.hyracks.storage.common.buffercache.IBufferCache;
+import org.apache.hyracks.storage.common.buffercache.ICachedPage;
+import org.apache.hyracks.storage.common.file.BufferedFileHandle;
+
+/**
+ * A cursor that reads an inverted list.
+ */
+public abstract class AbstractOnDiskInvertedListCursor extends AbstractInvertedListCursor {
+
+ protected final IBufferCache bufferCache;
+ protected final int fileId;
+ // for sequential scan
+ public int currentElementIxForScan;
+ protected int currentOffsetForScan;
+ protected int currentPageIxForScan;
+ // the whole range of the given inverted list
+ protected int startPageId;
+ protected int endPageId;
+ protected int startOff;
+ protected int numElements;
+ protected int numPages;
+ // the current range of the loaded pages in memory
+ protected int bufferStartPageId;
+ protected int bufferEndPageId;
+ protected int bufferStartElementIx;
+ protected int bufferNumLoadedPages;
+
+ protected final IInvertedListTupleReference tuple;
+ protected final ITypeTraits[] invListFields;
+ protected ICachedPage page;
+ // buffer manager to conform to the memory budget
+ protected final ISimpleFrameBufferManager bufferManagerForSearch;
+ protected ArrayList<ByteBuffer> buffers;
+ protected boolean moreBlocksToRead = true;
+ // The last searched element index (used for random traversal for containsKey())
+ protected int lastRandomSearchedElementIx;
+ protected final IIndexCursorStats stats;
+
+ protected AbstractOnDiskInvertedListCursor(IBufferCache bufferCache, int fileId, ITypeTraits[] invListFields,
+ IHyracksTaskContext ctx, IIndexCursorStats stats) throws HyracksDataException {
+ this(bufferCache, fileId, invListFields, ctx, stats, false);
+ }
+
+ protected AbstractOnDiskInvertedListCursor(IBufferCache bufferCache, int fileId, ITypeTraits[] invListFields,
+ IIndexCursorStats stats) throws HyracksDataException {
+ this(bufferCache, fileId, invListFields, null, stats, true);
+ }
+
+ // If isScan, use the SingleFrameBufferManager to minimize memory cost by allocating only one memory frame,
+ // elsewhere use a regular buffer manager
+ protected AbstractOnDiskInvertedListCursor(IBufferCache bufferCache, int fileId, ITypeTraits[] invListFields,
+ IHyracksTaskContext ctx, IIndexCursorStats stats, boolean isScan) throws HyracksDataException {
+ this.bufferCache = bufferCache;
+ this.fileId = fileId;
+
+ this.currentElementIxForScan = 0;
+ this.currentPageIxForScan = 0;
+ this.bufferStartPageId = 0;
+ this.bufferEndPageId = 0;
+ this.bufferStartElementIx = 0;
+ this.bufferNumLoadedPages = 0;
+ this.lastRandomSearchedElementIx = 0;
+ this.moreBlocksToRead = true;
+ this.invListFields = invListFields;
+ this.tuple = InvertedIndexUtils.createInvertedListTupleReference(invListFields);
+ this.buffers = new ArrayList<ByteBuffer>();
+ if (ctx == null && !isScan) {
+ throw HyracksDataException.create(ErrorCode.CANNOT_CONTINUE_TEXT_SEARCH_HYRACKS_TASK_IS_NULL);
+ }
+ if (!isScan) {
+ this.bufferManagerForSearch = TaskUtil.get(HyracksConstants.INVERTED_INDEX_SEARCH_FRAME_MANAGER, ctx);
+ if (bufferManagerForSearch == null) {
+ throw HyracksDataException.create(ErrorCode.CANNOT_CONTINUE_TEXT_SEARCH_BUFFER_MANAGER_IS_NULL);
+ }
+ } else {
+ this.bufferManagerForSearch = new SingleFrameBufferManager();
+ }
+ this.stats = stats;
+ }
+
+ /**
+ * Tries to allocate enough buffers to read the inverted list at once. If memory budget is not enough, this method
+ * stops allocating buffers. */
+ protected void allocateBuffers() throws HyracksDataException {
+ do {
+ ByteBuffer tmpBuffer = bufferManagerForSearch.acquireFrame(bufferCache.getPageSize());
+ if (tmpBuffer == null) {
+ // Budget exhausted
+ break;
+ }
+ Arrays.fill(tmpBuffer.array(), (byte) 0);
+ buffers.add(tmpBuffer);
+ } while (buffers.size() < numPages);
+ // At least there should be one frame to load a page from disk.
+ if (buffers.isEmpty()) {
+ throw HyracksDataException.create(ErrorCode.NOT_ENOUGH_BUDGET_FOR_TEXTSEARCH,
+ AbstractOnDiskInvertedListCursor.class.getName());
+ }
+ }
+
+ /**
+ * Deallocates all buffers. i.e. releases all buffers to the buffer manager.
+ */
+ protected void deallocateBuffers() throws HyracksDataException {
+ for (int i = 0; i < buffers.size(); i++) {
+ bufferManagerForSearch.releaseFrame(buffers.get(i));
+ buffers.set(i, null);
+ }
+ buffers.clear();
+ }
+
+ /**
+ * Clears the contents of the buffers.
+ */
+ protected void clearBuffers() throws HyracksDataException {
+ for (int i = 0; i < buffers.size(); i++) {
+ Arrays.fill(buffers.get(i).array(), (byte) 0);
+ buffers.get(i).clear();
+ }
+ }
+
+ /**
+ * Checks whether there are more elements to return. This is usually used for a sequential scan.
+ */
+ @Override
+ public boolean doHasNext() {
+ return currentElementIxForScan < numElements;
+ }
+
+ /**
+ * Prepares buffers to load pages. This method should not be called during the open()
+ * since it tries to allocate all available frames. If there are multiple concurrently opened
+ * cursors (e.g., a partitioned inverted index), this will cause an issue. An assumption of this cursor is
+ * that no two cursors are accessed at the same time even though they can be opened together.
+ */
+ @Override
+ public void prepareLoadPages() throws HyracksDataException {
+ // Resets the buffers if there is any.
+ clearBuffers();
+ if (numPages > buffers.size()) {
+ allocateBuffers();
+ }
+ }
+
+ /**
+ * Reads a part of the inverted list into the working memory via the buffer cache.
+ * This method reads the inverted list until it fills the current buffers.
+ */
+ @Override
+ public void loadPages() throws HyracksDataException {
+ // Conducts a load. Based on the size of the buffers, it may be possible to read the entire list.
+ // Resets the start page ID to load. At this moment, the variable bufferEndPageId holds
+ // the last page ID where the previous loadPages() stopped.
+ bufferStartPageId = bufferEndPageId + 1;
+ int currentBufferIdx = 0;
+ ByteBuffer tmpBuffer;
+ for (int i = bufferStartPageId; i <= endPageId; i++) {
+ page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, i), false);
+ stats.getPageCounter().update(1);
+ // Copies the content to the buffer (working memory).
+ // Assumption: processing inverted list takes time; so, we don't want to keep them on the buffer cache.
+ // Rather, we utilize the assigned working memory (buffers).
+ tmpBuffer = page.getBuffer();
+
+ // Copies the entire content of the page to the current buffer in the working memory.
+ System.arraycopy(tmpBuffer.array(), 0, buffers.get(currentBufferIdx).array(), 0,
+ buffers.get(currentBufferIdx).capacity());
+ buffers.get(currentBufferIdx).position(buffers.get(currentBufferIdx).capacity());
+
+ currentBufferIdx++;
+ bufferCache.unpin(page);
+ bufferEndPageId = i;
+
+ // Buffer full?
+ if (currentBufferIdx >= buffers.size()) {
+ break;
+ }
+ }
+
+ setBlockInfo();
+ }
+
+ /**
+ * Unloads the pages from the buffers (working memory). This will release all buffers.
+ */
+ @Override
+ public void unloadPages() throws HyracksDataException {
+ // Deallocates the buffer pages
+ deallocateBuffers();
+ }
+
+ /**
+ * Sets the disk-based inverted list information such as page ids and the number of elements
+ * for the given inverted list.
+ */
+ @Override
+ protected void setInvListInfo(int startPageId, int endPageId, int startOff, int numElements)
+ throws HyracksDataException {
+ this.startPageId = startPageId;
+ this.endPageId = endPageId;
+ this.startOff = startOff;
+ this.numElements = numElements;
+ this.currentElementIxForScan = 0;
+ this.currentPageIxForScan = 0;
+ this.bufferStartPageId = startPageId;
+ // Deducts 1 since the startPage would be set to bufferEndPageId + 1 in loadPages().
+ this.bufferEndPageId = startPageId - 1;
+ this.moreBlocksToRead = true;
+ this.numPages = endPageId - startPageId + 1;
+
+ for (ByteBuffer buffer : buffers) {
+ buffer.clear();
+ }
+ }
+
+ /**
+ * Updates the information about this block.
+ */
+ protected void setBlockInfo() {
+ bufferNumLoadedPages = bufferEndPageId - bufferStartPageId + 1;
+ lastRandomSearchedElementIx = bufferStartElementIx;
+ currentPageIxForScan = 0;
+ if (bufferEndPageId == endPageId) {
+ moreBlocksToRead = false;
+ }
+ }
+
+ /**
+ * Prints the current element (a debugging method).
+ */
+ @Override
+ @SuppressWarnings("rawtypes")
+ public String printCurrentElement(ISerializerDeserializer[] serdes) throws HyracksDataException {
+ StringBuilder strBuilder = new StringBuilder();
+ for (int i = 0; i < tuple.getFieldCount(); i++) {
+ ByteArrayInputStream inStream =
+ new ByteArrayInputStream(tuple.getFieldData(i), tuple.getFieldStart(i), tuple.getFieldLength(i));
+ DataInput dataIn = new DataInputStream(inStream);
+ Object o = serdes[i].deserialize(dataIn);
+ strBuilder.append(o.toString());
+ if (i + 1 < tuple.getFieldCount()) {
+ strBuilder.append(",");
+ }
+ }
+ return strBuilder.toString();
+ }
+
+ /**
+ * A compare function that is used to sort inverted list cursors
+ */
+ @Override
+ public int compareTo(IInvertedListCursor invListCursor) {
+ try {
+ return numElements - invListCursor.size();
+ } catch (HyracksDataException hde) {
+ throw new IllegalStateException(hde);
+ }
+ }
+
+ /**
+ * Gets the cardinality of the current inverted list.
+ */
+ @Override
+ public int size() throws HyracksDataException {
+ return numElements;
+ }
+
+ /**
+ * Gets the current tuple.
+ */
+ @Override
+ public ITupleReference doGetTuple() {
+ return tuple;
+ }
+
+ /**
+ * Closes the cursor.
+ */
+ @Override
+ public void doClose() throws HyracksDataException {
+ if (!buffers.isEmpty()) {
+ unloadPages();
+ }
+ }
+
+ /**
+ * Destroys the cursor.
+ */
+ @Override
+ public void doDestroy() throws HyracksDataException {
+ if (!buffers.isEmpty()) {
+ unloadPages();
+ }
+ }
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java
index 8e39b62..67312ce 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java
@@ -34,7 +34,7 @@
import org.apache.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
import org.apache.hyracks.storage.am.lsm.common.api.LSMOperationType;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
-import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
import org.apache.hyracks.storage.common.IIndexCursor;
import org.apache.hyracks.storage.common.ISearchPredicate;
@@ -176,12 +176,12 @@
}
@Override
- public InvertedListCursor createInvertedListCursor() {
+ public IInvertedListCursor createInvertedListCursor() {
throw new UnsupportedOperationException("Cannot create inverted list cursor on lsm inverted index.");
}
@Override
- public void openInvertedListCursor(InvertedListCursor listCursor, ITupleReference searchKey)
+ public void openInvertedListCursor(IInvertedListCursor listCursor, ITupleReference searchKey)
throws HyracksDataException {
throw new UnsupportedOperationException("Cannot open inverted list cursor on lsm inverted index.");
}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
index bfbb141..914a469 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
@@ -34,7 +34,7 @@
import org.apache.hyracks.storage.am.common.api.IPageManager;
import org.apache.hyracks.storage.am.common.ophelpers.IndexOperation;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInPlaceInvertedIndex;
-import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory;
import org.apache.hyracks.storage.common.IIndexAccessParameters;
import org.apache.hyracks.storage.common.IIndexBulkLoader;
@@ -148,19 +148,19 @@
}
@Override
- public InvertedListCursor createInvertedListCursor(IHyracksTaskContext ctx) {
+ public IInvertedListCursor createInvertedListCursor(IHyracksTaskContext ctx) {
return new InMemoryInvertedListCursor(invListTypeTraits.length, tokenTypeTraits.length);
}
@Override
- public InvertedListCursor createInvertedListRangeSearchCursor(IIndexCursorStats stats) {
+ public IInvertedListCursor createInvertedListRangeSearchCursor(IIndexCursorStats stats) {
// An in-memory index does not have a separate inverted list.
// Therefore, a different range-search cursor for an inverted list is not required.
return createInvertedListCursor(null);
}
@Override
- public void openInvertedListCursor(InvertedListCursor listCursor, ITupleReference searchKey,
+ public void openInvertedListCursor(IInvertedListCursor listCursor, ITupleReference searchKey,
IIndexOperationContext ictx) throws HyracksDataException {
InMemoryInvertedIndexOpContext ctx = (InMemoryInvertedIndexOpContext) ictx;
ctx.setOperation(IndexOperation.SEARCH);
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
index 0dd6b2c..5f0e1364 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
@@ -30,7 +30,7 @@
import org.apache.hyracks.storage.am.common.ophelpers.IndexOperation;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearcher;
-import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndexSearchCursor;
import org.apache.hyracks.storage.am.lsm.invertedindex.search.InvertedIndexSearchPredicate;
import org.apache.hyracks.storage.am.lsm.invertedindex.search.TOccurrenceSearcher;
@@ -84,12 +84,12 @@
}
@Override
- public InvertedListCursor createInvertedListCursor() {
+ public IInvertedListCursor createInvertedListCursor() {
return index.createInvertedListCursor(ctx);
}
@Override
- public void openInvertedListCursor(InvertedListCursor listCursor, ITupleReference searchKey)
+ public void openInvertedListCursor(IInvertedListCursor listCursor, ITupleReference searchKey)
throws HyracksDataException {
index.openInvertedListCursor(listCursor, searchKey, opCtx);
}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java
index c31a1e0..e1f301d 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java
@@ -33,11 +33,12 @@
import org.apache.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
import org.apache.hyracks.storage.am.btree.impls.RangePredicate;
import org.apache.hyracks.storage.am.common.tuples.ConcatenatingTupleReference;
-import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.impls.AbstractInvertedListCursor;
import org.apache.hyracks.storage.common.IIndexCursor;
import org.apache.hyracks.storage.common.MultiComparator;
-public class InMemoryInvertedListCursor extends InvertedListCursor {
+public class InMemoryInvertedListCursor extends AbstractInvertedListCursor {
private RangePredicate btreePred;
private BTreeAccessor btreeAccessor;
private IIndexCursor btreeCursor;
@@ -80,7 +81,7 @@
}
@Override
- public int compareTo(InvertedListCursor cursor) {
+ public int compareTo(IInvertedListCursor cursor) {
try {
return size() - cursor.size();
} catch (HyracksDataException hde) {
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListBuilderFactory.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/AbstractInvertedListBuilder.java
similarity index 66%
rename from hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListBuilderFactory.java
rename to hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/AbstractInvertedListBuilder.java
index 4e8b4f9..4c4c3af 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListBuilderFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/AbstractInvertedListBuilder.java
@@ -16,23 +16,34 @@
* specific language governing permissions and limitations
* under the License.
*/
-
package org.apache.hyracks.storage.am.lsm.invertedindex.ondisk;
import org.apache.hyracks.api.dataflow.value.ITypeTraits;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListBuilder;
-import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListBuilderFactory;
-public class FixedSizeElementInvertedListBuilderFactory implements IInvertedListBuilderFactory {
+public abstract class AbstractInvertedListBuilder implements IInvertedListBuilder {
+ protected byte[] targetBuf;
+ protected int pos;
+ protected ITypeTraits[] invListFields;
+ protected int listSize = 0;
- private final ITypeTraits[] invListFields;
-
- public FixedSizeElementInvertedListBuilderFactory(ITypeTraits[] invListFields) {
+ public AbstractInvertedListBuilder(ITypeTraits[] invListFields) {
this.invListFields = invListFields;
}
@Override
- public IInvertedListBuilder create() {
- return new FixedSizeElementInvertedListBuilder(invListFields);
+ public void setTargetBuffer(byte[] targetBuf, int startPos) {
+ this.targetBuf = targetBuf;
+ this.pos = startPos;
+ }
+
+ @Override
+ public int getListSize() {
+ return listSize;
+ }
+
+ @Override
+ public int getPos() {
+ return pos;
}
}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeFrameTupleAccessor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/AbstractInvertedListSearchResultFrameTupleAccessor.java
similarity index 60%
copy from hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeFrameTupleAccessor.java
copy to hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/AbstractInvertedListSearchResultFrameTupleAccessor.java
index 23854f9..3b16423 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeFrameTupleAccessor.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/AbstractInvertedListSearchResultFrameTupleAccessor.java
@@ -24,35 +24,31 @@
import org.apache.hyracks.api.comm.FrameHelper;
import org.apache.hyracks.api.comm.IFrameTupleAccessor;
import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
/**
- * This is a fixed-size tuple accessor class.
- * The frame structure: [4 bytes for minimum Hyracks frame count] [fixed-size tuple 1] ... [fixed-size tuple n] ...
+ * This is a frame tuple accessor class for inverted list.
+ * The frame structure: [4 bytes for minimum Hyracks frame count] [tuple 1] ... [tuple n] ...
* [4 bytes for the tuple count in a frame]
+ *
+ * The tuples can be fixed-size or variable-size.
+ * This class is mainly used to merge two inverted lists, e.g. searching the conjunction of two keywords "abc" AND "xyz"
*/
-public class FixedSizeFrameTupleAccessor implements IFrameTupleAccessor {
+public abstract class AbstractInvertedListSearchResultFrameTupleAccessor implements IFrameTupleAccessor {
- private final int frameSize;
- private ByteBuffer buffer;
+ protected final int frameSize;
+ protected ByteBuffer buffer;
- private final ITypeTraits[] fields;
- private final int[] fieldStartOffsets;
- private final int tupleSize;
+ protected final ITypeTraits[] fields;
- public FixedSizeFrameTupleAccessor(int frameSize, ITypeTraits[] fields) {
+ protected abstract void verifyTypeTraits() throws HyracksDataException;
+
+ public AbstractInvertedListSearchResultFrameTupleAccessor(int frameSize, ITypeTraits[] fields)
+ throws HyracksDataException {
this.frameSize = frameSize;
this.fields = fields;
- this.fieldStartOffsets = new int[fields.length];
- this.fieldStartOffsets[0] = 0;
- for (int i = 1; i < fields.length; i++) {
- fieldStartOffsets[i] = fieldStartOffsets[i - 1] + fields[i - 1].getFixedLength();
- }
- int tmp = 0;
- for (int i = 0; i < fields.length; i++) {
- tmp += fields[i].getFixedLength();
- }
- tupleSize = tmp;
+ verifyTypeTraits();
}
@Override
@@ -66,16 +62,6 @@
}
@Override
- public int getFieldEndOffset(int tupleIndex, int fIdx) {
- return getTupleStartOffset(tupleIndex) + fieldStartOffsets[fIdx] + fields[fIdx].getFixedLength();
- }
-
- @Override
- public int getFieldLength(int tupleIndex, int fIdx) {
- return fields[fIdx].getFixedLength();
- }
-
- @Override
public int getTupleLength(int tupleIndex) {
return getTupleEndOffset(tupleIndex) - getTupleStartOffset(tupleIndex);
}
@@ -86,11 +72,6 @@
}
@Override
- public int getFieldStartOffset(int tupleIndex, int fIdx) {
- return getTupleStartOffset(tupleIndex) + fieldStartOffsets[fIdx];
- }
-
- @Override
public int getTupleCount() {
return buffer != null ? buffer.getInt(FrameHelper.getTupleCountOffset(frameSize)) : 0;
}
@@ -101,11 +82,6 @@
}
@Override
- public int getTupleStartOffset(int tupleIndex) {
- return FixedSizeFrameTupleAppender.MINFRAME_COUNT_SIZE + tupleIndex * tupleSize;
- }
-
- @Override
public int getAbsoluteFieldStartOffset(int tupleIndex, int fIdx) {
return getTupleStartOffset(tupleIndex) + getFieldSlotsLength() + getFieldStartOffset(tupleIndex, fIdx);
}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeTupleReference.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/AbstractInvertedListTupleReference.java
similarity index 64%
rename from hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeTupleReference.java
rename to hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/AbstractInvertedListTupleReference.java
index 5908b5c..f5d8fa6 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeTupleReference.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/AbstractInvertedListTupleReference.java
@@ -20,27 +20,35 @@
package org.apache.hyracks.storage.am.lsm.invertedindex.ondisk;
import org.apache.hyracks.api.dataflow.value.ITypeTraits;
-import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListTupleReference;
-public class FixedSizeTupleReference implements ITupleReference {
+public abstract class AbstractInvertedListTupleReference implements IInvertedListTupleReference {
- private final ITypeTraits[] typeTraits;
- private final int[] fieldStartOffsets;
- private byte[] data;
- private int startOff;
+ protected final ITypeTraits[] typeTraits;
+ protected final int[] fieldStartOffsets;
+ protected byte[] data;
+ protected int startOff;
- public FixedSizeTupleReference(ITypeTraits[] typeTraits) {
+ // check if the type trait is fixed-size or variable-size
+ // throws an IllegalArgument exception if get unexpected traits
+ protected abstract void verifyTypeTrait() throws HyracksDataException;
+
+ public AbstractInvertedListTupleReference(ITypeTraits[] typeTraits) throws HyracksDataException {
this.typeTraits = typeTraits;
this.fieldStartOffsets = new int[typeTraits.length];
this.fieldStartOffsets[0] = 0;
- for (int i = 1; i < typeTraits.length; i++) {
- fieldStartOffsets[i] = fieldStartOffsets[i - 1] + typeTraits[i - 1].getFixedLength();
- }
+
+ verifyTypeTrait();
}
+ protected abstract void calculateFieldStartOffsets();
+
+ @Override
public void reset(byte[] data, int startOff) {
this.data = data;
this.startOff = startOff;
+ calculateFieldStartOffsets();
}
@Override
@@ -54,11 +62,6 @@
}
@Override
- public int getFieldLength(int fIdx) {
- return typeTraits[fIdx].getFixedLength();
- }
-
- @Override
public int getFieldStart(int fIdx) {
return startOff + fieldStartOffsets[fIdx];
}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListCursor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListCursor.java
deleted file mode 100644
index 6002dc8..0000000
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListCursor.java
+++ /dev/null
@@ -1,525 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hyracks.storage.am.lsm.invertedindex.ondisk;
-
-import java.io.ByteArrayInputStream;
-import java.io.DataInput;
-import java.io.DataInputStream;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Arrays;
-
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.dataflow.value.ITypeTraits;
-import org.apache.hyracks.api.exceptions.ErrorCode;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.api.util.HyracksConstants;
-import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
-import org.apache.hyracks.dataflow.common.utils.TaskUtil;
-import org.apache.hyracks.dataflow.std.buffermanager.ISimpleFrameBufferManager;
-import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor;
-import org.apache.hyracks.storage.common.IIndexCursorStats;
-import org.apache.hyracks.storage.common.MultiComparator;
-import org.apache.hyracks.storage.common.buffercache.IBufferCache;
-import org.apache.hyracks.storage.common.buffercache.ICachedPage;
-import org.apache.hyracks.storage.common.file.BufferedFileHandle;
-
-/**
- * A cursor class that traverse an inverted list that consists of fixed-size elements on disk
- *
- */
-public class FixedSizeElementInvertedListCursor extends InvertedListCursor {
-
- private final IBufferCache bufferCache;
- private final int fileId;
- private final int elementSize;
- // for sequential scan
- private int currentElementIxForScan;
- private int currentOffsetForScan;
- private int currentPageIxForScan;
- // the whole range of the given inverted list
- private int startPageId;
- private int endPageId;
- private int startOff;
- private int numElements;
- private int numPages;
- // the current range of the loaded pages in memory
- private int bufferStartPageId;
- private int bufferEndPageId;
- private int bufferStartElementIx;
- private int bufferEndElementIx;
- private int bufferNumLoadedPages;
-
- private final FixedSizeTupleReference tuple;
- // The last element in the current range in memory
- private final FixedSizeTupleReference bufferEndElementTuple;
- private ICachedPage page;
- // The last element index per page
- private int[] elementIndexes = new int[10];
- // buffer manager to conform to the memory budget
- private final ISimpleFrameBufferManager bufferManagerForSearch;
- private ArrayList<ByteBuffer> buffers;
- private boolean moreBlocksToRead = true;
- // The last searched element index (used for random traversal)
- private int lastRandomSearchedElementIx;
- private final IIndexCursorStats stats;
-
- public FixedSizeElementInvertedListCursor(IBufferCache bufferCache, int fileId, ITypeTraits[] invListFields,
- IHyracksTaskContext ctx, IIndexCursorStats stats) throws HyracksDataException {
- this.bufferCache = bufferCache;
- this.fileId = fileId;
- int tmpSize = 0;
- for (int i = 0; i < invListFields.length; i++) {
- tmpSize += invListFields[i].getFixedLength();
- }
- elementSize = tmpSize;
- this.currentOffsetForScan = -elementSize;
- this.currentElementIxForScan = 0;
- this.currentPageIxForScan = 0;
- this.bufferStartPageId = 0;
- this.bufferEndPageId = 0;
- this.bufferStartElementIx = 0;
- this.bufferEndElementIx = 0;
- this.bufferNumLoadedPages = 0;
- this.lastRandomSearchedElementIx = 0;
- this.moreBlocksToRead = true;
- this.tuple = new FixedSizeTupleReference(invListFields);
- this.bufferEndElementTuple = new FixedSizeTupleReference(invListFields);
- this.buffers = new ArrayList<ByteBuffer>();
- if (ctx == null) {
- throw HyracksDataException.create(ErrorCode.CANNOT_CONTINUE_TEXT_SEARCH_HYRACKS_TASK_IS_NULL);
- }
- this.bufferManagerForSearch = TaskUtil.get(HyracksConstants.INVERTED_INDEX_SEARCH_FRAME_MANAGER, ctx);
- if (bufferManagerForSearch == null) {
- throw HyracksDataException.create(ErrorCode.CANNOT_CONTINUE_TEXT_SEARCH_BUFFER_MANAGER_IS_NULL);
- }
- this.stats = stats;
- }
-
- /**
- * Tries to allocate enough buffers to read the inverted list at once. If memory budget is not enough, this method
- * stops allocating buffers.
- */
- private void allocateBuffers() throws HyracksDataException {
- do {
- ByteBuffer tmpBuffer = bufferManagerForSearch.acquireFrame(bufferCache.getPageSize());
- if (tmpBuffer == null) {
- // Budget exhausted
- break;
- }
- Arrays.fill(tmpBuffer.array(), (byte) 0);
- buffers.add(tmpBuffer);
- } while (buffers.size() < numPages);
- // At least there should be one frame to load a page from disk.
- if (buffers.isEmpty()) {
- throw HyracksDataException.create(ErrorCode.NOT_ENOUGH_BUDGET_FOR_TEXTSEARCH,
- FixedSizeElementInvertedListCursor.class.getName());
- }
- }
-
- /**
- * Deallocates all buffers. i.e. releases all buffers to the buffer manager.
- */
- private void deallocateBuffers() throws HyracksDataException {
- for (int i = 0; i < buffers.size(); i++) {
- bufferManagerForSearch.releaseFrame(buffers.get(i));
- buffers.set(i, null);
- }
- buffers.clear();
- }
-
- /**
- * Clears the contents of the buffers.
- */
- private void clearBuffers() throws HyracksDataException {
- for (int i = 0; i < buffers.size(); i++) {
- Arrays.fill(buffers.get(i).array(), (byte) 0);
- buffers.get(i).clear();
- }
- }
-
- /**
- * Checks whether there are more elements to return. This is usually used for a sequential scan.
- */
- @Override
- public boolean doHasNext() {
- return currentElementIxForScan < numElements;
- }
-
- /**
- * Returns the next element.
- */
- @Override
- public void doNext() throws HyracksDataException {
- if (currentOffsetForScan + 2 * elementSize > bufferCache.getPageSize()) {
- currentPageIxForScan++;
- currentOffsetForScan = 0;
- } else {
- currentOffsetForScan += elementSize;
- }
-
- // Needs to read the next block?
- if (currentElementIxForScan > bufferEndElementIx && endPageId > bufferEndPageId) {
- loadPages();
- currentOffsetForScan = 0;
- }
-
- currentElementIxForScan++;
-
- tuple.reset(buffers.get(currentPageIxForScan).array(), currentOffsetForScan);
- }
-
- /**
- * Prepares buffers to load pages. This method should not be called during the open()
- * since it tries to allocate all available frames. If there are multiple concurrently opened
- * cursors (e.g., a partitioned inverted index), this will cause an issue. An assumption of this cursor is
- * that no two cursors are accessed at the same time even though they can be opened together.
- */
- @Override
- public void prepareLoadPages() throws HyracksDataException {
- // Resets the buffers if there is any.
- clearBuffers();
- if (numPages > buffers.size()) {
- allocateBuffers();
- }
- }
-
- /**
- * Reads a part of the inverted list into the working memory via the buffer cache.
- * This method reads the inverted list until it fills the current buffers.
- */
- @Override
- public void loadPages() throws HyracksDataException {
- // Conducts a load. Based on the size of the buffers, it may be possible to read the entire list.
- // Resets the start page ID to load. At this moment, the variable bufferEndPageId holds
- // the last page ID where the previous loadPages() stopped.
- bufferStartPageId = bufferEndPageId + 1;
- if (bufferStartPageId > endPageId) {
- return;
- }
- int currentBufferIdx = 0;
- ByteBuffer tmpBuffer;
- for (int i = bufferStartPageId; i <= endPageId; i++) {
- page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, i), false);
- stats.getPageCounter().update(1);
- // Copies the content to the buffer (working memory).
- // Assumption: processing inverted list takes time; so, we don't want to keep them on the buffer cache.
- // Rather, we utilize the assigned working memory (buffers).
- tmpBuffer = page.getBuffer();
-
- // Copies the entire content of the page to the current buffer in the working memory.
- System.arraycopy(tmpBuffer.array(), 0, buffers.get(currentBufferIdx).array(), 0,
- buffers.get(currentBufferIdx).capacity());
- buffers.get(currentBufferIdx).position(buffers.get(currentBufferIdx).capacity());
-
- currentBufferIdx++;
- bufferCache.unpin(page);
- bufferEndPageId = i;
-
- // Buffer full?
- if (currentBufferIdx >= buffers.size()) {
- break;
- }
- }
- setBlockInfo();
- }
-
- /**
- * Updates the information about this block.
- */
- private void setBlockInfo() {
- bufferNumLoadedPages = bufferEndPageId - bufferStartPageId + 1;
- bufferStartElementIx =
- bufferStartPageId == startPageId ? 0 : elementIndexes[bufferStartPageId - startPageId - 1] + 1;
- lastRandomSearchedElementIx = bufferStartElementIx;
- bufferEndElementIx = elementIndexes[bufferEndPageId - startPageId];
- // Gets the final element tuple in this block.
- getElementAtIndex(bufferEndElementIx, bufferEndElementTuple);
- currentPageIxForScan = 0;
- currentOffsetForScan = bufferStartElementIx == 0 ? startOff - elementSize : -elementSize;
- if (bufferEndPageId == endPageId) {
- moreBlocksToRead = false;
- }
- }
-
- /**
- * Unloads the pages from the buffers (working memory). This will release all buffers.
- */
- @Override
- public void unloadPages() throws HyracksDataException {
- // Deallocates the buffer pages
- deallocateBuffers();
- }
-
- /**
- * Checks whether the search tuple is greater than the last element in the current block of the cursor.
- * If so, the cursor needs to load next block of the inverted list.
- *
- * @param searchTuple
- * @param invListCmp
- * @return true if the search tuple is greater than the last element in the current block of the cursor
- * false if the search tuple is equal to or less than the last element in the current block of the cursor
- * @throws HyracksDataException
- */
- private boolean needToReadNextBlock(ITupleReference searchTuple, MultiComparator invListCmp)
- throws HyracksDataException {
- if (moreBlocksToRead && invListCmp.compare(searchTuple, bufferEndElementTuple) > 0) {
- return true;
- }
- return false;
- }
-
- /**
- * Gets the tuple for the given element index.
- */
- private void getElementAtIndex(int elementIx, FixedSizeTupleReference tuple) {
- int currentPageIx =
- binarySearch(elementIndexes, bufferStartPageId - startPageId, bufferNumLoadedPages, elementIx);
- if (currentPageIx < 0) {
- throw new IndexOutOfBoundsException(
- "Requested index: " + elementIx + " from array with numElements: " + numElements);
- }
-
- int currentOff;
- if (currentPageIx == 0) {
- currentOff = startOff + elementIx * elementSize;
- } else {
- int relativeElementIx = elementIx - elementIndexes[currentPageIx - 1] - 1;
- currentOff = relativeElementIx * elementSize;
- }
- // Gets the actual index in the buffers since buffers.size() can be smaller than the total number of pages.
- int bufferIdx = currentPageIx % buffers.size();
- tuple.reset(buffers.get(bufferIdx).array(), currentOff);
- }
-
- /**
- * Checks whether the given tuple exists on this inverted list. This method is used when doing a random traversal.
- */
- @Override
- public boolean containsKey(ITupleReference searchTuple, MultiComparator invListCmp) throws HyracksDataException {
- // If the given element is greater than the last element in the current buffer, reads the next block.
- if (needToReadNextBlock(searchTuple, invListCmp)) {
- loadPages();
- }
- int mid = -1;
- int begin = lastRandomSearchedElementIx;
- int end = bufferEndElementIx;
-
- while (begin <= end) {
- mid = (begin + end) / 2;
- getElementAtIndex(mid, tuple);
- int cmp = invListCmp.compare(searchTuple, tuple);
- if (cmp < 0) {
- end = mid - 1;
- } else if (cmp > 0) {
- begin = mid + 1;
- } else {
- lastRandomSearchedElementIx = mid;
- return true;
- }
- }
-
- lastRandomSearchedElementIx = mid;
- return false;
- }
-
- /**
- * Opens the cursor for the given inverted list. After this open() call, prepreLoadPages() should be called
- * before loadPages() are called. For more details, check prepapreLoadPages().
- */
- @Override
- protected void setInvListInfo(int startPageId, int endPageId, int startOff, int numElements)
- throws HyracksDataException {
- this.startPageId = startPageId;
- this.endPageId = endPageId;
- this.startOff = startOff;
- this.numElements = numElements;
- this.currentElementIxForScan = 0;
- this.currentPageIxForScan = 0;
- this.currentOffsetForScan = startOff - elementSize;
- this.bufferStartPageId = startPageId;
- // Deducts 1 since the startPage would be set to bufferEndPageId + 1 in loadPages().
- this.bufferEndPageId = startPageId - 1;
- this.moreBlocksToRead = true;
- this.numPages = endPageId - startPageId + 1;
-
- if (numPages > elementIndexes.length) {
- elementIndexes = new int[numPages];
- }
-
- for (ByteBuffer buffer : buffers) {
- buffer.clear();
- }
-
- // Fills the last element index per page.
- // first page
- int cumulElements = (bufferCache.getPageSize() - startOff) / elementSize;
- // Deducts 1 because this is the index, not the number of elements.
- elementIndexes[0] = cumulElements - 1;
-
- // middle, full pages
- for (int i = 1; i < numPages - 1; i++) {
- elementIndexes[i] = elementIndexes[i - 1] + (bufferCache.getPageSize() / elementSize);
- }
-
- // last page
- // Deducts 1 because this is the index, not the number of elements.
- elementIndexes[numPages - 1] = numElements - 1;
- }
-
- /**
- * Prints the contents of the current inverted list (a debugging method).
- */
- @SuppressWarnings("rawtypes")
- @Override
- public String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException {
- int oldCurrentOff = currentOffsetForScan;
- int oldCurrentPageId = currentPageIxForScan;
- int oldCurrentElementIx = currentElementIxForScan;
-
- currentOffsetForScan = startOff - elementSize;
- currentPageIxForScan = 0;
- currentElementIxForScan = 0;
-
- StringBuilder strBuilder = new StringBuilder();
-
- while (hasNext()) {
- next();
- for (int i = 0; i < tuple.getFieldCount(); i++) {
- ByteArrayInputStream inStream = new ByteArrayInputStream(tuple.getFieldData(i), tuple.getFieldStart(i),
- tuple.getFieldLength(i));
- DataInput dataIn = new DataInputStream(inStream);
- Object o = serdes[i].deserialize(dataIn);
- strBuilder.append(o.toString());
- if (i + 1 < tuple.getFieldCount()) {
- strBuilder.append(",");
- }
- }
- strBuilder.append(" ");
- }
-
- // reset previous state
- currentOffsetForScan = oldCurrentOff;
- currentPageIxForScan = oldCurrentPageId;
- currentElementIxForScan = oldCurrentElementIx;
-
- return strBuilder.toString();
- }
-
- /**
- * Prints the current element (a debugging method).
- */
- @Override
- @SuppressWarnings("rawtypes")
- public String printCurrentElement(ISerializerDeserializer[] serdes) throws HyracksDataException {
- StringBuilder strBuilder = new StringBuilder();
- for (int i = 0; i < tuple.getFieldCount(); i++) {
- ByteArrayInputStream inStream =
- new ByteArrayInputStream(tuple.getFieldData(i), tuple.getFieldStart(i), tuple.getFieldLength(i));
- DataInput dataIn = new DataInputStream(inStream);
- Object o = serdes[i].deserialize(dataIn);
- strBuilder.append(o.toString());
- if (i + 1 < tuple.getFieldCount()) {
- strBuilder.append(",");
- }
- }
- return strBuilder.toString();
- }
-
- /**
- * Conducts a binary search to get the index of the given key.
- */
- private int binarySearch(int[] arr, int arrStart, int arrLength, int key) {
- int mid;
- int begin = arrStart;
- int end = arrStart + arrLength - 1;
-
- while (begin <= end) {
- mid = (begin + end) / 2;
- int cmp = (key - arr[mid]);
- if (cmp < 0) {
- end = mid - 1;
- } else if (cmp > 0) {
- begin = mid + 1;
- } else {
- return mid;
- }
- }
-
- if (begin > arr.length - 1) {
- return -1;
- }
- if (key < arr[begin]) {
- return begin;
- } else {
- return -1;
- }
- }
-
- /**
- * A compare function that is used to sort inverted list cursors
- */
- @Override
- public int compareTo(InvertedListCursor invListCursor) {
- try {
- return numElements - invListCursor.size();
- } catch (HyracksDataException hde) {
- throw new IllegalStateException(hde);
- }
- }
-
- /**
- * Gets the cardinality of the current inverted list.
- */
- @Override
- public int size() {
- return numElements;
- }
-
- /**
- * Gets the current tuple.
- */
- @Override
- public ITupleReference doGetTuple() {
- return tuple;
- }
-
- /**
- * Closes the cursor.
- */
- @Override
- public void doClose() throws HyracksDataException {
- if (!buffers.isEmpty()) {
- unloadPages();
- }
- }
-
- /**
- * Destroys the cursor.
- */
- @Override
- public void doDestroy() throws HyracksDataException {
- if (!buffers.isEmpty()) {
- unloadPages();
- }
- }
-
-}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/InvertedListBuilderFactory.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/InvertedListBuilderFactory.java
new file mode 100644
index 0000000..0d00ed3
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/InvertedListBuilderFactory.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.storage.am.lsm.invertedindex.ondisk;
+
+import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListBuilder;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListBuilderFactory;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.fixedsize.FixedSizeElementInvertedListBuilder;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.variablesize.VariableSizeElementInvertedListBuilder;
+import org.apache.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexUtils;
+
+public class InvertedListBuilderFactory implements IInvertedListBuilderFactory {
+
+ protected final ITypeTraits[] invListFields;
+ protected final ITypeTraits[] tokenTypeTraits;
+ private final boolean isFixedSize;
+
+ public InvertedListBuilderFactory(ITypeTraits[] tokenTypeTraits, ITypeTraits[] invListFields) {
+ this.tokenTypeTraits = tokenTypeTraits;
+ this.invListFields = invListFields;
+
+ isFixedSize = InvertedIndexUtils.checkTypeTraitsAllFixed(invListFields);
+ }
+
+ @Override
+ public IInvertedListBuilder create() throws HyracksDataException {
+ if (isFixedSize) {
+ return new FixedSizeElementInvertedListBuilder(invListFields);
+ } else {
+ return new VariableSizeElementInvertedListBuilder(tokenTypeTraits, invListFields);
+ }
+ }
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeFrameTupleAppender.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/InvertedListSearchResultFrameTupleAppender.java
similarity index 78%
rename from hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeFrameTupleAppender.java
rename to hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/InvertedListSearchResultFrameTupleAppender.java
index 85d8576..26c3f9c 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeFrameTupleAppender.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/InvertedListSearchResultFrameTupleAppender.java
@@ -23,15 +23,21 @@
import java.util.Arrays;
import org.apache.hyracks.api.comm.FrameHelper;
-import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListSearchResultFrameTupleAppender;
/**
+ * This class is mainly to write the intermediate results in the inverted-index **search** operation.
+ * For the inverted list building phrase, please refer to IInvertedListBuilder.
+ *
* An appender class for an inverted list. Each frame has two integer values at the beginning and at the end.
* The first represents the number of minimum Hyracks frames in a frame. Currently, we use 1 for this value.
* The latter represents the number of tuples in a frame. This design is required since we may need to use
* RunFileWriter and RunFileReader class during the inverted-index-search operation.
+ *
+ * Note that this appender is not aware of the tuple element type, and the length of the tuple is given by the caller
+ * at run time.
*/
-public class FixedSizeFrameTupleAppender {
+public class InvertedListSearchResultFrameTupleAppender implements IInvertedListSearchResultFrameTupleAppender {
// At the end of a frame, an integer value is written to keep the tuple count in this frame.
public static final int TUPLE_COUNT_SIZE = 4;
@@ -40,18 +46,12 @@
public static final int MINFRAME_COUNT_SIZE = 4;
private final int frameSize;
- private final int tupleSize;
private ByteBuffer buffer;
private int tupleCount;
private int tupleDataEndOffset;
- public FixedSizeFrameTupleAppender(int frameSize, ITypeTraits[] fields) {
+ public InvertedListSearchResultFrameTupleAppender(int frameSize) {
this.frameSize = frameSize;
- int tmp = 0;
- for (int i = 0; i < fields.length; i++) {
- tmp += fields[i].getFixedLength();
- }
- tupleSize = tmp;
}
public void reset(ByteBuffer buffer) {
@@ -72,16 +72,6 @@
this.tupleDataEndOffset = tupleDataEndOffset;
}
- public boolean append(byte[] bytes, int offset) {
- if (tupleDataEndOffset + tupleSize + TUPLE_COUNT_SIZE <= frameSize) {
- System.arraycopy(bytes, offset, buffer.array(), tupleDataEndOffset, tupleSize);
- tupleDataEndOffset += tupleSize;
- tupleCount++;
- return true;
- }
- return false;
- }
-
public boolean append(byte[] bytes, int offset, int length) {
if (tupleDataEndOffset + length + TUPLE_COUNT_SIZE <= frameSize) {
System.arraycopy(bytes, offset, buffer.array(), tupleDataEndOffset, length);
@@ -95,7 +85,6 @@
if (tupleDataEndOffset + 4 + TUPLE_COUNT_SIZE <= frameSize) {
buffer.putInt(tupleDataEndOffset, fieldValue);
tupleDataEndOffset += 4;
- tupleCount++;
return true;
}
return false;
@@ -105,7 +94,6 @@
if (tupleDataEndOffset + 8 + TUPLE_COUNT_SIZE <= frameSize) {
buffer.putLong(tupleDataEndOffset, fieldValue);
tupleDataEndOffset += 8;
- tupleCount++;
return true;
}
return false;
@@ -115,7 +103,6 @@
if (tupleDataEndOffset + 2 + TUPLE_COUNT_SIZE <= frameSize) {
buffer.putLong(tupleDataEndOffset, fieldValue);
tupleDataEndOffset += 2;
- tupleCount++;
return true;
}
return false;
@@ -125,21 +112,23 @@
if (tupleDataEndOffset + 1 + TUPLE_COUNT_SIZE <= frameSize) {
buffer.put(tupleDataEndOffset, fieldValue);
tupleDataEndOffset += 1;
- tupleCount++;
return true;
}
return false;
}
- // returns true if an entire tuple fits
- // returns false otherwise
- public boolean hasSpace() {
- return tupleDataEndOffset + tupleSize + TUPLE_COUNT_SIZE <= frameSize;
+ public boolean hasSpace(int length) {
+ return tupleDataEndOffset + length + TUPLE_COUNT_SIZE <= frameSize;
}
public void incrementTupleCount(int count) {
- buffer.putInt(FrameHelper.getTupleCountOffset(frameSize),
- buffer.getInt(FrameHelper.getTupleCountOffset(frameSize)) + count);
+ int tupleCountOffset = FrameHelper.getTupleCountOffset(frameSize);
+ int currentCount = buffer.getInt(tupleCountOffset);
+ int newCount = currentCount + count;
+ buffer.putInt(tupleCountOffset, newCount);
+
+ tupleCount += count;
+ assert tupleCount == newCount;
}
public int getTupleCount() {
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java
index ec1f143..0f26435 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java
@@ -47,11 +47,15 @@
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearcher;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListBuilder;
-import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
import org.apache.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndexSearchCursorInitialState;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.fixedsize.FixedSizeElementInvertedListScanCursor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.fixedsize.FixedSizeElementOnDiskInvertedListCursor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.variablesize.VariableSizeElementOnDiskInvertedListCursor;
import org.apache.hyracks.storage.am.lsm.invertedindex.search.InvertedIndexSearchPredicate;
import org.apache.hyracks.storage.am.lsm.invertedindex.search.TOccurrenceSearcher;
import org.apache.hyracks.storage.am.lsm.invertedindex.tuples.TokenKeyPairTuple;
+import org.apache.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexUtils;
import org.apache.hyracks.storage.common.IIndexAccessParameters;
import org.apache.hyracks.storage.common.IIndexAccessor;
import org.apache.hyracks.storage.common.IIndexBulkLoader;
@@ -189,18 +193,28 @@
}
@Override
- public InvertedListCursor createInvertedListCursor(IHyracksTaskContext ctx) throws HyracksDataException {
- return new FixedSizeElementInvertedListCursor(bufferCache, fileId, invListTypeTraits, ctx,
- NoOpIndexCursorStats.INSTANCE);
+ public IInvertedListCursor createInvertedListCursor(IHyracksTaskContext ctx) throws HyracksDataException {
+ if (InvertedIndexUtils.checkTypeTraitsAllFixed(invListTypeTraits)) {
+ return new FixedSizeElementOnDiskInvertedListCursor(bufferCache, fileId, invListTypeTraits, ctx,
+ NoOpIndexCursorStats.INSTANCE);
+ } else {
+ return new VariableSizeElementOnDiskInvertedListCursor(bufferCache, fileId, invListTypeTraits, ctx,
+ NoOpIndexCursorStats.INSTANCE);
+ }
}
@Override
- public InvertedListCursor createInvertedListRangeSearchCursor(IIndexCursorStats stats) throws HyracksDataException {
- return new FixedSizeElementInvertedListScanCursor(bufferCache, fileId, invListTypeTraits, stats);
+ public IInvertedListCursor createInvertedListRangeSearchCursor(IIndexCursorStats stats)
+ throws HyracksDataException {
+ if (InvertedIndexUtils.checkTypeTraitsAllFixed(invListTypeTraits)) {
+ return new FixedSizeElementInvertedListScanCursor(bufferCache, fileId, invListTypeTraits, stats);
+ } else {
+ return new VariableSizeElementOnDiskInvertedListCursor(bufferCache, fileId, invListTypeTraits, stats);
+ }
}
@Override
- public void openInvertedListCursor(InvertedListCursor listCursor, ITupleReference searchKey,
+ public void openInvertedListCursor(IInvertedListCursor listCursor, ITupleReference searchKey,
IIndexOperationContext ictx) throws HyracksDataException {
OnDiskInvertedIndexOpContext ctx = (OnDiskInvertedIndexOpContext) ictx;
ctx.getBtreePred().setLowKeyComparator(ctx.getSearchCmp());
@@ -222,7 +236,7 @@
}
}
- public void openInvertedListCursor(ITupleReference btreeTuple, InvertedListCursor listCursor,
+ public void openInvertedListCursor(ITupleReference btreeTuple, IInvertedListCursor listCursor,
OnDiskInvertedIndexOpContext opCtx) throws HyracksDataException {
int startPageId = IntegerPointable.getInteger(btreeTuple.getFieldData(invListStartPageIdField),
btreeTuple.getFieldStart(invListStartPageIdField));
@@ -512,12 +526,12 @@
}
@Override
- public InvertedListCursor createInvertedListCursor() throws HyracksDataException {
+ public IInvertedListCursor createInvertedListCursor() throws HyracksDataException {
return index.createInvertedListCursor(ctx);
}
@Override
- public void openInvertedListCursor(InvertedListCursor listCursor, ITupleReference searchKey)
+ public void openInvertedListCursor(IInvertedListCursor listCursor, ITupleReference searchKey)
throws HyracksDataException {
index.openInvertedListCursor(listCursor, searchKey, opCtx);
}
@@ -618,7 +632,7 @@
ArrayTupleReference prevTuple = new ArrayTupleReference();
IInvertedIndexAccessor invIndexAccessor = createAccessor(NoOpIndexAccessParameters.INSTANCE);
try {
- InvertedListCursor invListCursor = createInvertedListRangeSearchCursor(NoOpIndexCursorStats.INSTANCE);
+ IInvertedListCursor invListCursor = createInvertedListRangeSearchCursor(NoOpIndexCursorStats.INSTANCE);
MultiComparator invListCmp = MultiComparator.create(invListCmpFactories);
while (btreeCursor.hasNext()) {
btreeCursor.next();
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexRangeSearchCursor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexRangeSearchCursor.java
index 8ec9876..1d90180 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexRangeSearchCursor.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexRangeSearchCursor.java
@@ -25,7 +25,7 @@
import org.apache.hyracks.storage.am.btree.impls.RangePredicate;
import org.apache.hyracks.storage.am.common.api.IIndexOperationContext;
import org.apache.hyracks.storage.am.common.impls.IndexAccessParameters;
-import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
import org.apache.hyracks.storage.am.lsm.invertedindex.tuples.TokenKeyPairTuple;
import org.apache.hyracks.storage.common.EnforcedIndexCursor;
import org.apache.hyracks.storage.common.ICursorInitialState;
@@ -44,7 +44,7 @@
private final IIndexAccessor btreeAccessor;
private final OnDiskInvertedIndex invIndex;
private final IIndexOperationContext opCtx;
- private final InvertedListCursor invListRangeSearchCursor;
+ private final IInvertedListCursor invListRangeSearchCursor;
private boolean isInvListCursorOpen;
private final IIndexCursor btreeCursor;
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndex.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndex.java
index 1521887..fa5c365 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndex.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndex.java
@@ -29,8 +29,8 @@
import org.apache.hyracks.storage.am.common.api.IPageManagerFactory;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearcher;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListBuilder;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IPartitionedInvertedIndex;
-import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor;
import org.apache.hyracks.storage.am.lsm.invertedindex.search.InvertedIndexSearchPredicate;
import org.apache.hyracks.storage.am.lsm.invertedindex.search.InvertedListPartitions;
import org.apache.hyracks.storage.am.lsm.invertedindex.search.PartitionedTOccurrenceSearcher;
@@ -113,7 +113,7 @@
ITupleReference btreeTuple = ctx.getBtreeCursor().getTuple();
short numTokens = ShortPointable.getShort(btreeTuple.getFieldData(PARTITIONING_NUM_TOKENS_FIELD),
btreeTuple.getFieldStart(PARTITIONING_NUM_TOKENS_FIELD));
- InvertedListCursor invListCursor = partSearcher.getCachedInvertedListCursor();
+ IInvertedListCursor invListCursor = partSearcher.getCachedInvertedListCursor();
openInvertedListCursor(btreeTuple, invListCursor, ctx);
invListPartitions.addInvertedListCursor(invListCursor, numTokens);
tokenExists = true;
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListBuilder.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/fixedsize/FixedSizeElementInvertedListBuilder.java
similarity index 74%
rename from hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListBuilder.java
rename to hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/fixedsize/FixedSizeElementInvertedListBuilder.java
index 3889edd..de6b4c9 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListBuilder.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/fixedsize/FixedSizeElementInvertedListBuilder.java
@@ -17,20 +17,21 @@
* under the License.
*/
-package org.apache.hyracks.storage.am.lsm.invertedindex.ondisk;
+package org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.fixedsize;
import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
-import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListBuilder;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.AbstractInvertedListBuilder;
+import org.apache.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexUtils;
-public class FixedSizeElementInvertedListBuilder implements IInvertedListBuilder {
+public class FixedSizeElementInvertedListBuilder extends AbstractInvertedListBuilder {
private final int listElementSize;
- private int listSize = 0;
- private byte[] targetBuf;
- private int pos;
+ public FixedSizeElementInvertedListBuilder(ITypeTraits[] invListFields) throws HyracksDataException {
+ super(invListFields);
+ InvertedIndexUtils.verifyAllFixedSizeTypeTrait(invListFields);
- public FixedSizeElementInvertedListBuilder(ITypeTraits[] invListFields) {
int tmp = 0;
for (int i = 0; i < invListFields.length; i++) {
tmp += invListFields[i].getFixedLength();
@@ -39,7 +40,7 @@
}
@Override
- public boolean startNewList(ITupleReference tuple, int tokenField) {
+ public boolean startNewList(ITupleReference tuple, int numTokenFields) {
if (pos + listElementSize > targetBuf.length) {
return false;
} else {
@@ -67,18 +68,7 @@
}
@Override
- public void setTargetBuffer(byte[] targetBuf, int startPos) {
- this.targetBuf = targetBuf;
- this.pos = startPos;
- }
-
- @Override
- public int getListSize() {
- return listSize;
- }
-
- @Override
- public int getPos() {
- return pos;
+ public boolean isFixedSize() {
+ return true;
}
}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListScanCursor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/fixedsize/FixedSizeElementInvertedListScanCursor.java
similarity index 90%
rename from hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListScanCursor.java
rename to hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/fixedsize/FixedSizeElementInvertedListScanCursor.java
index 583fb90..c084e45 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListScanCursor.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/fixedsize/FixedSizeElementInvertedListScanCursor.java
@@ -17,13 +17,15 @@
* under the License.
*/
-package org.apache.hyracks.storage.am.lsm.invertedindex.ondisk;
+package org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.fixedsize;
import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
import org.apache.hyracks.api.dataflow.value.ITypeTraits;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
-import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListTupleReference;
+import org.apache.hyracks.storage.am.lsm.invertedindex.impls.AbstractInvertedListCursor;
import org.apache.hyracks.storage.common.IIndexCursorStats;
import org.apache.hyracks.storage.common.MultiComparator;
import org.apache.hyracks.storage.common.buffercache.IBufferCache;
@@ -35,7 +37,7 @@
* conduct a binary search. It only supports the scan operation. The main purpose of this cursor is
* doing a full-scan of an inverted list during a storage-component-merge process.
*/
-public class FixedSizeElementInvertedListScanCursor extends InvertedListCursor {
+public class FixedSizeElementInvertedListScanCursor extends AbstractInvertedListCursor {
protected final IBufferCache bufferCache;
protected final int fileId;
@@ -50,7 +52,7 @@
protected int numElements;
protected int numPages;
- protected final FixedSizeTupleReference tuple;
+ protected final IInvertedListTupleReference tuple;
protected ICachedPage page;
protected boolean pinned;
@@ -74,7 +76,7 @@
this.startOff = 0;
this.numElements = 0;
this.numPages = 0;
- this.tuple = new FixedSizeTupleReference(invListFields);
+ this.tuple = new FixedSizeInvertedListTupleReference(invListFields);
this.pinned = false;
this.stats = stats;
}
@@ -147,7 +149,7 @@
}
@Override
- public int compareTo(InvertedListCursor invListCursor) {
+ public int compareTo(IInvertedListCursor invListCursor) {
try {
return numElements - invListCursor.size();
} catch (HyracksDataException hde) {
@@ -193,4 +195,4 @@
return null;
}
-}
+}
\ No newline at end of file
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/fixedsize/FixedSizeElementOnDiskInvertedListCursor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/fixedsize/FixedSizeElementOnDiskInvertedListCursor.java
new file mode 100644
index 0000000..11c0901
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/fixedsize/FixedSizeElementOnDiskInvertedListCursor.java
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.fixedsize;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
+import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListTupleReference;
+import org.apache.hyracks.storage.am.lsm.invertedindex.impls.AbstractOnDiskInvertedListCursor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexUtils;
+import org.apache.hyracks.storage.common.IIndexCursorStats;
+import org.apache.hyracks.storage.common.MultiComparator;
+import org.apache.hyracks.storage.common.buffercache.IBufferCache;
+
+/**
+ * A cursor class that traverse an inverted list that consists of fixed-size elements on disk
+ *
+ */
+public class FixedSizeElementOnDiskInvertedListCursor extends AbstractOnDiskInvertedListCursor {
+
+ private final int elementSize;
+ private int bufferEndElementIx;
+ // The last element in the current range in memory
+ protected final IInvertedListTupleReference bufferEndElementTuple;
+ // The last element index per page
+ private int[] elementIndexes = new int[10];
+
+ public FixedSizeElementOnDiskInvertedListCursor(IBufferCache bufferCache, int fileId, ITypeTraits[] invListFields,
+ IHyracksTaskContext ctx, IIndexCursorStats stats) throws HyracksDataException {
+ super(bufferCache, fileId, invListFields, ctx, stats);
+
+ this.bufferEndElementIx = 0;
+ this.bufferEndElementTuple = InvertedIndexUtils.createInvertedListTupleReference(invListFields);
+
+ int tmpSize = 0;
+ for (int i = 0; i < invListFields.length; i++) {
+ tmpSize += invListFields[i].getFixedLength();
+ }
+ elementSize = tmpSize;
+ this.currentOffsetForScan = -elementSize;
+ }
+
+ /**
+ * Returns the next element.
+ */
+ @Override
+ public void doNext() throws HyracksDataException {
+ if (currentOffsetForScan + 2 * elementSize > bufferCache.getPageSize()) {
+ currentPageIxForScan++;
+ currentOffsetForScan = 0;
+ } else {
+ currentOffsetForScan += elementSize;
+ }
+
+ // Needs to read the next block?
+ if (currentElementIxForScan > bufferEndElementIx && endPageId > bufferEndPageId) {
+ loadPages();
+ currentOffsetForScan = 0;
+ }
+
+ currentElementIxForScan++;
+
+ tuple.reset(buffers.get(currentPageIxForScan).array(), currentOffsetForScan);
+ }
+
+ /**
+ * Updates the information about this block.
+ */
+ @Override
+ protected void setBlockInfo() {
+ super.setBlockInfo();
+
+ bufferStartElementIx =
+ bufferStartPageId == startPageId ? 0 : elementIndexes[bufferStartPageId - startPageId - 1] + 1;
+ bufferEndElementIx = elementIndexes[bufferEndPageId - startPageId];
+ // Gets the final element tuple in this block.
+ getElementAtIndex(bufferEndElementIx, bufferEndElementTuple);
+ }
+
+ /**
+ * Gets the tuple for the given element index.
+ */
+ private void getElementAtIndex(int elementIx, IInvertedListTupleReference tuple) {
+ int currentPageIx =
+ binarySearch(elementIndexes, bufferStartPageId - startPageId, bufferNumLoadedPages, elementIx);
+ if (currentPageIx < 0) {
+ throw new IndexOutOfBoundsException(
+ "Requested index: " + elementIx + " from array with numElements: " + numElements);
+ }
+
+ int currentOff;
+ if (currentPageIx == 0) {
+ currentOff = startOff + elementIx * elementSize;
+ } else {
+ int relativeElementIx = elementIx - elementIndexes[currentPageIx - 1] - 1;
+ currentOff = relativeElementIx * elementSize;
+ }
+ // Gets the actual index in the buffers since buffers.size() can be smaller than the total number of pages.
+ int bufferIdx = currentPageIx % buffers.size();
+ tuple.reset(buffers.get(bufferIdx).array(), currentOff);
+ }
+
+ /**
+ * Checks whether the given tuple exists on this inverted list. This method is used when doing a random traversal.
+ */
+ @Override
+ public boolean containsKey(ITupleReference searchTuple, MultiComparator invListCmp) throws HyracksDataException {
+ // If the given element is greater than the last element in the current buffer, reads the next block.
+ if (needToReadNextBlock(searchTuple, invListCmp)) {
+ loadPages();
+ }
+ int mid = -1;
+ int begin = lastRandomSearchedElementIx;
+ int end = bufferEndElementIx;
+
+ while (begin <= end) {
+ mid = (begin + end) / 2;
+ getElementAtIndex(mid, tuple);
+ int cmp = invListCmp.compare(searchTuple, tuple);
+ if (cmp < 0) {
+ end = mid - 1;
+ } else if (cmp > 0) {
+ begin = mid + 1;
+ } else {
+ lastRandomSearchedElementIx = mid;
+ return true;
+ }
+ }
+
+ lastRandomSearchedElementIx = mid;
+ return false;
+ }
+
+ /**
+ * Checks whether the search tuple is greater than the last element in the current block of the cursor.
+ * If so, the cursor needs to load next block of the inverted list.
+ *
+ * @param searchTuple
+ * @param invListCmp
+ * @return true if the search tuple is greater than the last element in the current block of the cursor
+ * false if the search tuple is equal to or less than the last element in the current block of the cursor
+ * @throws HyracksDataException
+ */
+ private boolean needToReadNextBlock(ITupleReference searchTuple, MultiComparator invListCmp)
+ throws HyracksDataException {
+ if (moreBlocksToRead && invListCmp.compare(searchTuple, bufferEndElementTuple) > 0) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Opens the cursor for the given inverted list. After this open() call, prepreLoadPages() should be called
+ * before loadPages() are called. For more details, check prepapreLoadPages().
+ */
+ @Override
+ protected void setInvListInfo(int startPageId, int endPageId, int startOff, int numElements)
+ throws HyracksDataException {
+ super.setInvListInfo(startPageId, endPageId, startOff, numElements);
+
+ if (numPages > elementIndexes.length) {
+ elementIndexes = new int[numPages];
+ }
+ this.currentOffsetForScan = startOff - elementSize;
+ // Fills the last element index per page.
+ // first page
+ int cumulElements = (bufferCache.getPageSize() - startOff) / elementSize;
+ // Deducts 1 because this is the index, not the number of elements.
+ elementIndexes[0] = cumulElements - 1;
+
+ // middle, full pages
+ for (int i = 1; i < numPages - 1; i++) {
+ elementIndexes[i] = elementIndexes[i - 1] + (bufferCache.getPageSize() / elementSize);
+ }
+
+ // last page
+ // Deducts 1 because this is the index, not the number of elements.
+ elementIndexes[numPages - 1] = numElements - 1;
+ }
+
+ /**
+ * Prints the contents of the current inverted list (a debugging method).
+ */
+ @SuppressWarnings("rawtypes")
+ @Override
+ public String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException {
+ int oldCurrentOff = currentOffsetForScan;
+ int oldCurrentPageId = currentPageIxForScan;
+ int oldCurrentElementIx = currentElementIxForScan;
+
+ currentOffsetForScan = startOff - elementSize;
+ currentPageIxForScan = 0;
+ currentElementIxForScan = 0;
+
+ StringBuilder strBuilder = new StringBuilder();
+
+ while (hasNext()) {
+ next();
+ for (int i = 0; i < tuple.getFieldCount(); i++) {
+ ByteArrayInputStream inStream = new ByteArrayInputStream(tuple.getFieldData(i), tuple.getFieldStart(i),
+ tuple.getFieldLength(i));
+ DataInput dataIn = new DataInputStream(inStream);
+ Object o = serdes[i].deserialize(dataIn);
+ strBuilder.append(o.toString());
+ if (i + 1 < tuple.getFieldCount()) {
+ strBuilder.append(",");
+ }
+ }
+ strBuilder.append(" ");
+ }
+
+ // reset previous state
+ currentOffsetForScan = oldCurrentOff;
+ currentPageIxForScan = oldCurrentPageId;
+ currentElementIxForScan = oldCurrentElementIx;
+
+ return strBuilder.toString();
+ }
+
+ /**
+ * Conducts a binary search to get the index of the given key.
+ */
+ private int binarySearch(int[] arr, int arrStart, int arrLength, int key) {
+ int mid;
+ int begin = arrStart;
+ int end = arrStart + arrLength - 1;
+
+ while (begin <= end) {
+ mid = (begin + end) / 2;
+ int cmp = (key - arr[mid]);
+ if (cmp < 0) {
+ end = mid - 1;
+ } else if (cmp > 0) {
+ begin = mid + 1;
+ } else {
+ return mid;
+ }
+ }
+
+ if (begin > arr.length - 1) {
+ return -1;
+ }
+ if (key < arr[begin]) {
+ return begin;
+ } else {
+ return -1;
+ }
+ }
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeFrameTupleAccessor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/fixedsize/FixedSizeInvertedListSearchResultFrameTupleAccessor.java
similarity index 66%
rename from hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeFrameTupleAccessor.java
rename to hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/fixedsize/FixedSizeInvertedListSearchResultFrameTupleAccessor.java
index 23854f9..e7a3f5d 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeFrameTupleAccessor.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/fixedsize/FixedSizeInvertedListSearchResultFrameTupleAccessor.java
@@ -17,31 +17,30 @@
* under the License.
*/
-package org.apache.hyracks.storage.am.lsm.invertedindex.ondisk;
+package org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.fixedsize;
-import java.nio.ByteBuffer;
-
-import org.apache.hyracks.api.comm.FrameHelper;
-import org.apache.hyracks.api.comm.IFrameTupleAccessor;
import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.api.exceptions.ErrorCode;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.AbstractInvertedListSearchResultFrameTupleAccessor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.InvertedListSearchResultFrameTupleAppender;
+import org.apache.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexUtils;
/**
- * This is a fixed-size tuple accessor class.
+ * This is a fixed-size tuple accessor class to get one inverted list.
* The frame structure: [4 bytes for minimum Hyracks frame count] [fixed-size tuple 1] ... [fixed-size tuple n] ...
* [4 bytes for the tuple count in a frame]
*/
-public class FixedSizeFrameTupleAccessor implements IFrameTupleAccessor {
+public class FixedSizeInvertedListSearchResultFrameTupleAccessor
+ extends AbstractInvertedListSearchResultFrameTupleAccessor {
- private final int frameSize;
- private ByteBuffer buffer;
-
- private final ITypeTraits[] fields;
- private final int[] fieldStartOffsets;
private final int tupleSize;
+ private final int[] fieldStartOffsets;
- public FixedSizeFrameTupleAccessor(int frameSize, ITypeTraits[] fields) {
- this.frameSize = frameSize;
- this.fields = fields;
+ public FixedSizeInvertedListSearchResultFrameTupleAccessor(int frameSize, ITypeTraits[] fields)
+ throws HyracksDataException {
+ super(frameSize, fields);
+
this.fieldStartOffsets = new int[fields.length];
this.fieldStartOffsets[0] = 0;
for (int i = 1; i < fields.length; i++) {
@@ -56,13 +55,13 @@
}
@Override
- public ByteBuffer getBuffer() {
- return buffer;
- }
+ protected void verifyTypeTraits() throws HyracksDataException {
+ InvertedIndexUtils.verifyAllFixedSizeTypeTrait(fields);
- @Override
- public int getFieldCount() {
- return fields.length;
+ if (InvertedIndexUtils.checkTypeTraitsAllFixed(fields) == false) {
+ throw HyracksDataException.create(ErrorCode.INVALID_INVERTED_LIST_TYPE_TRAITS,
+ InvertedIndexUtils.EXPECT_ALL_FIX_GET_VAR_SIZE);
+ }
}
@Override
@@ -91,27 +90,17 @@
}
@Override
- public int getTupleCount() {
- return buffer != null ? buffer.getInt(FrameHelper.getTupleCountOffset(frameSize)) : 0;
- }
-
- @Override
public int getTupleEndOffset(int tupleIndex) {
return getFieldEndOffset(tupleIndex, fields.length - 1);
}
@Override
public int getTupleStartOffset(int tupleIndex) {
- return FixedSizeFrameTupleAppender.MINFRAME_COUNT_SIZE + tupleIndex * tupleSize;
+ return InvertedListSearchResultFrameTupleAppender.MINFRAME_COUNT_SIZE + tupleIndex * tupleSize;
}
@Override
public int getAbsoluteFieldStartOffset(int tupleIndex, int fIdx) {
return getTupleStartOffset(tupleIndex) + getFieldSlotsLength() + getFieldStartOffset(tupleIndex, fIdx);
}
-
- @Override
- public void reset(ByteBuffer buffer) {
- this.buffer = buffer;
- }
}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/fixedsize/FixedSizeInvertedListTupleReference.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/fixedsize/FixedSizeInvertedListTupleReference.java
new file mode 100644
index 0000000..9abedbb
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/fixedsize/FixedSizeInvertedListTupleReference.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.fixedsize;
+
+import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.AbstractInvertedListTupleReference;
+import org.apache.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexUtils;
+
+public class FixedSizeInvertedListTupleReference extends AbstractInvertedListTupleReference {
+
+ public FixedSizeInvertedListTupleReference(ITypeTraits[] typeTraits) throws HyracksDataException {
+ super(typeTraits);
+ }
+
+ @Override
+ protected void verifyTypeTrait() throws HyracksDataException {
+ InvertedIndexUtils.verifyAllFixedSizeTypeTrait(typeTraits);
+ }
+
+ @Override
+ protected void calculateFieldStartOffsets() {
+ for (int i = 1; i < typeTraits.length; i++) {
+ fieldStartOffsets[i] = fieldStartOffsets[i - 1] + typeTraits[i - 1].getFixedLength();
+ }
+ }
+
+ @Override
+ public int getFieldLength(int fIdx) {
+ return typeTraits[fIdx].getFixedLength();
+ }
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/variablesize/VariableSizeElementInvertedListBuilder.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/variablesize/VariableSizeElementInvertedListBuilder.java
new file mode 100644
index 0000000..cfbb779
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/variablesize/VariableSizeElementInvertedListBuilder.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.variablesize;
+
+import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
+import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleWriter;
+import org.apache.hyracks.storage.am.common.tuples.TypeAwareTupleWriter;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.AbstractInvertedListBuilder;
+import org.apache.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexUtils;
+
+// The last 4 bytes in the frame is reserved for the end offset (exclusive) of the last record in the current frame
+// i.e. the trailing space after the last record and before the last 4 bytes will be treated as empty
+public class VariableSizeElementInvertedListBuilder extends AbstractInvertedListBuilder {
+ private ITreeIndexTupleWriter writer;
+ protected final ITypeTraits[] allFields;
+
+ // The tokenTypeTraits is necessary because the underlying TypeAwareTupleWriter requires all the type traits of the tuple
+ // even if the first a few fields in the tuple are never accessed by the writer
+ public VariableSizeElementInvertedListBuilder(ITypeTraits[] tokenTypeTraits, ITypeTraits[] invListFields)
+ throws HyracksDataException {
+ super(invListFields);
+
+ this.allFields = new ITypeTraits[invListFields.length + tokenTypeTraits.length];
+ for (int i = 0; i < tokenTypeTraits.length; i++) {
+ allFields[i] = tokenTypeTraits[i];
+ }
+ for (int i = 0; i < invListFields.length; i++) {
+ allFields[i + tokenTypeTraits.length] = invListFields[i];
+ }
+ this.writer = new TypeAwareTupleWriter(allFields);
+
+ InvertedIndexUtils.verifyHasVarSizeTypeTrait(invListFields);
+ }
+
+ @Override
+ public boolean startNewList(ITupleReference tuple, int numTokenFields) {
+ if (!checkEnoughSpace(tuple, numTokenFields, tuple.getFieldCount() - numTokenFields)) {
+ return false;
+ } else {
+ listSize = 0;
+ return true;
+ }
+ }
+
+ private boolean checkEnoughSpace(ITupleReference tuple, int numTokenFields, int numElementFields) {
+ int numBytesRequired = writer.bytesRequired(tuple, numTokenFields, numElementFields);
+ return checkEnoughSpace(numBytesRequired);
+ }
+
+ private boolean checkEnoughSpace(int numBytesRequired) {
+ // The last 4 bytes are reserved for the end offset of the last record in the current page
+ if (pos + numBytesRequired + 4 > targetBuf.length) {
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public boolean appendElement(ITupleReference tuple, int numTokenFields, int numElementFields) {
+ int numBytesRequired = writer.bytesRequired(tuple, numTokenFields, numElementFields);
+
+ if (!checkEnoughSpace(numBytesRequired)) {
+ return false;
+ }
+
+ pos += writer.writeTupleFields(tuple, numTokenFields, numElementFields, targetBuf, pos);
+ listSize++;
+
+ InvertedIndexUtils.setInvertedListFrameEndOffset(targetBuf, pos);
+ return true;
+ }
+
+ @Override
+ public boolean isFixedSize() {
+ return false;
+ }
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/variablesize/VariableSizeElementOnDiskInvertedListCursor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/variablesize/VariableSizeElementOnDiskInvertedListCursor.java
new file mode 100644
index 0000000..ee2bc58
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/variablesize/VariableSizeElementOnDiskInvertedListCursor.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.variablesize;
+
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
+import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
+import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleWriter;
+import org.apache.hyracks.storage.am.common.tuples.TypeAwareTupleWriter;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListTupleReference;
+import org.apache.hyracks.storage.am.lsm.invertedindex.impls.AbstractOnDiskInvertedListCursor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexUtils;
+import org.apache.hyracks.storage.common.ICursorInitialState;
+import org.apache.hyracks.storage.common.IIndexCursorStats;
+import org.apache.hyracks.storage.common.ISearchPredicate;
+import org.apache.hyracks.storage.common.MultiComparator;
+import org.apache.hyracks.storage.common.buffercache.IBufferCache;
+
+/**
+ * A cursor class that traverse an inverted list that consists of variable-size elements on disk
+ *
+ */
+
+public class VariableSizeElementOnDiskInvertedListCursor extends AbstractOnDiskInvertedListCursor {
+
+ // The scan offset is set to 0 when initialized, and we need such an isInit flag
+ // to avoid increasing the offset for the first element in the list when calling next()
+ private boolean isInit;
+ private IInvertedListTupleReference tupleReference;
+ private ITreeIndexTupleWriter tupleWriter;
+
+ public VariableSizeElementOnDiskInvertedListCursor(IBufferCache bufferCache, int fileId,
+ ITypeTraits[] invListFields, IIndexCursorStats stats) throws HyracksDataException {
+ super(bufferCache, fileId, invListFields, stats);
+ this.isInit = true;
+ this.tupleReference = new VariableSizeInvertedListTupleReference(invListFields);
+ this.tupleWriter = new TypeAwareTupleWriter(invListFields);
+ }
+
+ public VariableSizeElementOnDiskInvertedListCursor(IBufferCache bufferCache, int fileId,
+ ITypeTraits[] invListFields, IHyracksTaskContext ctx, IIndexCursorStats stats) throws HyracksDataException {
+ super(bufferCache, fileId, invListFields, ctx, stats);
+ isInit = true;
+ }
+
+ @Override
+ protected void doOpen(ICursorInitialState initialState, ISearchPredicate searchPred) throws HyracksDataException {
+ super.doOpen(initialState, searchPred);
+
+ // Note that the cursors can be re-used in the upper-layer callers so we need to reset the state variables when open()
+ currentElementIxForScan = 0;
+ isInit = true;
+ this.tupleReference = new VariableSizeInvertedListTupleReference(invListFields);
+ this.tupleWriter = new TypeAwareTupleWriter(invListFields);
+ }
+
+ /**
+ * Returns the next element.
+ */
+ @Override
+ public void doNext() throws HyracksDataException {
+ // init state for the first element: keep the currentOffsetForScan at 0
+ if (isInit) {
+ isInit = false;
+ } else {
+ tupleReference.reset(buffers.get(currentPageIxForScan).array(), currentOffsetForScan);
+ currentOffsetForScan += tupleWriter.bytesRequired(tupleReference);
+ }
+
+ int currentPageEndOffset =
+ InvertedIndexUtils.getInvertedListFrameEndOffset(buffers.get(currentPageIxForScan).array());
+ assert currentOffsetForScan <= currentPageEndOffset;
+ // We reach the end of the current frame, turn to the next frame
+ if (currentOffsetForScan >= currentPageEndOffset) {
+ currentPageIxForScan++;
+ currentOffsetForScan = 0;
+ }
+
+ // Needs to read the next block?
+ if (currentPageIxForScan >= buffers.size() && endPageId > bufferEndPageId) {
+ loadPages();
+ currentOffsetForScan = 0;
+ }
+
+ currentElementIxForScan++;
+ tuple.reset(buffers.get(currentPageIxForScan).array(), currentOffsetForScan);
+ }
+
+ /**
+ * Updates the information about this block.
+ */
+ @Override
+ protected void setBlockInfo() {
+ super.setBlockInfo();
+ currentOffsetForScan = bufferStartElementIx == 0 ? startOff : 0;
+ }
+
+ /**
+ * Checks whether the given tuple exists on this inverted list. This method is used when doing a random traversal.
+ */
+ @Override
+ public boolean containsKey(ITupleReference searchTuple, MultiComparator invListCmp) throws HyracksDataException {
+ if (isInit) {
+ // when isInit, the tuple is null, call next to fetch one tuple
+ next();
+ }
+ while (hasNext()) {
+ int cmp = invListCmp.compare(searchTuple, tuple);
+ if (cmp < 0) {
+ return false;
+ } else if (cmp == 0) {
+ return true;
+ }
+ // ToDo: here we get the tuple first and then call next() later because the upper-layer caller in InvertedListMerger already called next()
+ // However, this is not consistent with other use cases of next() in AsterixDB
+ // Maybe we need to fix the upper layer InvertedListMerger part to call next() first then getTuple()
+ // to follow the convention to use cursor
+ next();
+ }
+
+ if (tuple != null) {
+ int cmp = invListCmp.compare(searchTuple, tuple);
+ if (cmp < 0) {
+ return false;
+ } else if (cmp == 0) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Opens the cursor for the given inverted list. After this open() call, prepreLoadPages() should be called
+ * before loadPages() are called. For more details, check prepapreLoadPages().
+ */
+ @Override
+ protected void setInvListInfo(int startPageId, int endPageId, int startOff, int numElements)
+ throws HyracksDataException {
+ super.setInvListInfo(startPageId, endPageId, startOff, numElements);
+
+ this.currentOffsetForScan = startOff;
+ }
+
+ /**
+ * Prints the contents of the current inverted list (a debugging method).
+ */
+ @SuppressWarnings("rawtypes")
+ @Override
+ public String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException {
+ // Will implement later if necessary
+ return "";
+ }
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/variablesize/VariableSizeInvertedListSearchResultFrameTupleAccessor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/variablesize/VariableSizeInvertedListSearchResultFrameTupleAccessor.java
new file mode 100644
index 0000000..ab71c0a
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/variablesize/VariableSizeInvertedListSearchResultFrameTupleAccessor.java
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.variablesize;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleWriter;
+import org.apache.hyracks.storage.am.common.tuples.TypeAwareTupleWriter;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListTupleReference;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.AbstractInvertedListSearchResultFrameTupleAccessor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.InvertedListSearchResultFrameTupleAppender;
+import org.apache.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexUtils;
+
+/**
+ * This is a variable-size tuple accessor class.
+ * The frame structure: [4 bytes for minimum Hyracks frame count] [variable-size tuple 1] ... [variable-size tuple n] ...
+ * [4 bytes for the tuple count in a frame]
+ *
+ * This frame accessor is mainly used to merge two inverted lists, e.g. when searching the conjunction of two keywords ("abc" AND "xyz")
+ *
+ * For such a variable-size tuple accessor, for now it supports to get the position of the next tuple only,
+ * i.e. supports iteration instead of random access to the tuples
+ * because the in-page tuple offsets are not available (not stored on disk) until we scan the tuples one by one
+ */
+public class VariableSizeInvertedListSearchResultFrameTupleAccessor
+ extends AbstractInvertedListSearchResultFrameTupleAccessor {
+ // ToDo: use a scanner model to read tuples one by one.
+ // It is not necessary to support random access because it is used only when merging lists
+ // In fact, now we need to scan the frame twice to get the tupleStartOffsets
+ // and then use this offsets for a scan purpose (no random access needed in the upper layer) only
+
+ private int[] tupleStartOffsets;
+ private int tupleCount;
+ private int lastTupleLen;
+ private IInvertedListTupleReference tupleReference;
+ private ITreeIndexTupleWriter tupleWriter;
+
+ public VariableSizeInvertedListSearchResultFrameTupleAccessor(int frameSize, ITypeTraits[] fields)
+ throws HyracksDataException {
+ super(frameSize, fields);
+
+ this.tupleWriter = new TypeAwareTupleWriter(fields);
+ this.tupleReference = new VariableSizeInvertedListTupleReference(fields);
+ }
+
+ @Override
+ protected void verifyTypeTraits() throws HyracksDataException {
+ InvertedIndexUtils.verifyHasVarSizeTypeTrait(fields);
+ }
+
+ private int getTupleLengthAtPos(int startPos) {
+ tupleReference.reset(buffer.array(), startPos);
+ return tupleWriter.bytesRequired(tupleReference);
+ }
+
+ @Override
+ public void reset(ByteBuffer buffer) {
+ super.reset(buffer);
+
+ tupleCount = getTupleCount();
+ tupleStartOffsets = new int[tupleCount];
+
+ if (tupleCount > 0) {
+ int startOff = InvertedListSearchResultFrameTupleAppender.MINFRAME_COUNT_SIZE;
+ int pos = startOff;
+ tupleStartOffsets[0] = 0;
+ int firstTupleLen = getTupleLengthAtPos(pos);
+ lastTupleLen = firstTupleLen;
+
+ for (int i = 1; i < tupleCount; i++) {
+ int len = getTupleLengthAtPos(pos);
+ tupleStartOffsets[i] = tupleStartOffsets[i - 1] + len;
+ if (i == tupleCount - 1) {
+ lastTupleLen = len;
+ }
+
+ pos += len;
+ }
+ }
+ }
+
+ @Override
+ public int getTupleStartOffset(int tupleIndex) {
+ return InvertedListSearchResultFrameTupleAppender.MINFRAME_COUNT_SIZE + tupleStartOffsets[tupleIndex];
+ }
+
+ @Override
+ public int getTupleEndOffset(int tupleIndex) {
+ if (tupleIndex == fields.length - 1) {
+ return InvertedListSearchResultFrameTupleAppender.MINFRAME_COUNT_SIZE + tupleStartOffsets[tupleIndex]
+ + lastTupleLen;
+ } else if (tupleIndex < 0) {
+ return InvertedListSearchResultFrameTupleAppender.MINFRAME_COUNT_SIZE;
+ }
+ return InvertedListSearchResultFrameTupleAppender.MINFRAME_COUNT_SIZE + tupleStartOffsets[tupleIndex + 1];
+ }
+
+ @Override
+ public int getTupleLength(int tupleIndex) {
+ return getTupleEndOffset(tupleIndex) - getTupleStartOffset(tupleIndex);
+ }
+
+ @Override
+ public int getFieldSlotsLength() {
+ return 0;
+ }
+
+ @Override
+ public int getFieldStartOffset(int tupleIndex, int fIdx) {
+ tupleReference.reset(buffer.array(), getTupleStartOffset(tupleIndex));
+ return tupleReference.getFieldStart(fIdx);
+ }
+
+ @Override
+ public int getFieldEndOffset(int tupleIndex, int fIdx) {
+ tupleReference.reset(buffer.array(), getTupleStartOffset(tupleIndex));
+ return tupleReference.getFieldStart(fIdx) + tupleReference.getFieldLength(fIdx);
+ }
+
+ @Override
+ public int getFieldLength(int tupleIndex, int fIdx) {
+ return getFieldEndOffset(tupleIndex, fIdx) - getFieldStartOffset(tupleIndex, fIdx);
+ }
+
+ @Override
+ public int getAbsoluteFieldStartOffset(int tupleIndex, int fIdx) {
+ return getTupleStartOffset(tupleIndex) + getFieldSlotsLength() + getFieldStartOffset(tupleIndex, fIdx);
+ }
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/variablesize/VariableSizeInvertedListTupleReference.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/variablesize/VariableSizeInvertedListTupleReference.java
new file mode 100644
index 0000000..8e1db66
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/variablesize/VariableSizeInvertedListTupleReference.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.variablesize;
+
+import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleReference;
+import org.apache.hyracks.storage.am.common.tuples.TypeAwareTupleReference;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.AbstractInvertedListTupleReference;
+import org.apache.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexUtils;
+
+public class VariableSizeInvertedListTupleReference extends AbstractInvertedListTupleReference {
+
+ private ITreeIndexTupleReference tupleReference;
+
+ @Override
+ protected void verifyTypeTrait() throws HyracksDataException {
+ InvertedIndexUtils.verifyHasVarSizeTypeTrait(typeTraits);
+ }
+
+ public VariableSizeInvertedListTupleReference(ITypeTraits[] typeTraits) throws HyracksDataException {
+ super(typeTraits);
+
+ this.tupleReference = new TypeAwareTupleReference(typeTraits);
+ }
+
+ @Override
+ protected void calculateFieldStartOffsets() {
+ tupleReference.resetByTupleOffset(data, startOff);
+ }
+
+ @Override
+ public int getFieldCount() {
+ return typeTraits.length;
+ }
+
+ @Override
+ public byte[] getFieldData(int fIdx) {
+ return data;
+ }
+
+ @Override
+ public int getFieldLength(int fIdx) {
+ return tupleReference.getFieldLength(fIdx);
+ }
+
+ @Override
+ public int getFieldStart(int fIdx) {
+ return tupleReference.getFieldStart(fIdx);
+ }
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java
index 06ea4eb..1a08fd0 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java
@@ -42,14 +42,14 @@
import org.apache.hyracks.dataflow.std.buffermanager.ISimpleFrameBufferManager;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInPlaceInvertedIndex;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearcher;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListTupleReference;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IObjectFactory;
-import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor;
-import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeFrameTupleAccessor;
-import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeTupleReference;
import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.DelimitedUTF8StringBinaryTokenizer;
import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IToken;
import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.TokenizerInfo.TokenizerType;
+import org.apache.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexUtils;
import org.apache.hyracks.storage.am.lsm.invertedindex.util.ObjectCache;
import org.apache.hyracks.storage.common.MultiComparator;
@@ -76,21 +76,21 @@
protected int occurrenceThreshold;
- protected final IObjectFactory<InvertedListCursor> invListCursorFactory;
- protected final ObjectCache<InvertedListCursor> invListCursorCache;
+ protected final IObjectFactory<IInvertedListCursor> invListCursorFactory;
+ protected final ObjectCache<IInvertedListCursor> invListCursorCache;
protected final ISimpleFrameBufferManager bufferManager;
protected boolean isFinishedSearch;
// For a single inverted list case
- protected InvertedListCursor singleInvListCursor;
+ protected IInvertedListCursor singleInvListCursor;
protected boolean isSingleInvertedList;
// To read the final search result
protected ByteBuffer searchResultBuffer;
protected int searchResultTupleIndex = 0;
protected final IFrameTupleAccessor searchResultFta;
- protected FixedSizeTupleReference searchResultTuple;
+ protected IInvertedListTupleReference searchResultTuple;
public AbstractTOccurrenceSearcher(IInPlaceInvertedIndex invIndex, IHyracksTaskContext ctx)
throws HyracksDataException {
@@ -118,9 +118,9 @@
this.queryTokenAppender = new FrameTupleAppenderAccessor(QUERY_TOKEN_REC_DESC);
this.queryTokenAppender.reset(queryTokenFrame, true);
this.isSingleInvertedList = false;
- this.searchResultTuple = new FixedSizeTupleReference(invIndex.getInvListTypeTraits());
- this.searchResultFta =
- new FixedSizeFrameTupleAccessor(ctx.getInitialFrameSize(), invIndex.getInvListTypeTraits());
+ this.searchResultTuple = InvertedIndexUtils.createInvertedListTupleReference(invIndex.getInvListTypeTraits());
+ this.searchResultFta = InvertedIndexUtils.createInvertedListFrameTupleAccessor(ctx.getInitialFrameSize(),
+ invIndex.getInvListTypeTraits());
}
protected void tokenizeQuery(InvertedIndexSearchPredicate searchPred) throws HyracksDataException {
@@ -178,7 +178,7 @@
public void printNewResults(int maxResultBufIdx, List<ByteBuffer> buffer) {
StringBuffer strBuffer = new StringBuffer();
- FixedSizeFrameTupleAccessor resultFrameTupleAcc = finalSearchResult.getAccessor();
+ IFrameTupleAccessor resultFrameTupleAcc = finalSearchResult.getAccessor();
for (int i = 0; i <= maxResultBufIdx; i++) {
ByteBuffer testBuf = buffer.get(i);
resultFrameTupleAcc.reset(testBuf);
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedIndexFinalSearchResult.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedIndexFinalSearchResult.java
index 55acb33..e5f4622 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedIndexFinalSearchResult.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedIndexFinalSearchResult.java
@@ -23,7 +23,6 @@
import org.apache.hyracks.api.context.IHyracksTaskContext;
import org.apache.hyracks.api.dataflow.value.ITypeTraits;
-import org.apache.hyracks.api.exceptions.ErrorCode;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
import org.apache.hyracks.dataflow.std.buffermanager.ISimpleFrameBufferManager;
@@ -45,12 +44,9 @@
@Override
protected void initTypeTraits(ITypeTraits[] invListFields) {
typeTraits = new ITypeTraits[invListFields.length];
- int tmp = 0;
for (int i = 0; i < invListFields.length; i++) {
typeTraits[i] = invListFields[i];
- tmp += invListFields[i].getFixedLength();
}
- invListElementSize = tmp;
}
/**
@@ -76,13 +72,13 @@
*/
@Override
public boolean append(ITupleReference invListElement, int count) throws HyracksDataException {
- // Pauses the addition of this tuple if the current page is full.
- if (!appender.hasSpace()) {
+ int numBytesRequired = getNumBytesRequired(invListElement);
+ if (!appender.hasSpace(numBytesRequired)) {
return false;
}
- // Appends the given inverted-list element.
- if (!appender.append(invListElement.getFieldData(0), invListElement.getFieldStart(0), invListElementSize)) {
- throw HyracksDataException.create(ErrorCode.CANNOT_ADD_ELEMENT_TO_INVERTED_INDEX_SEARCH_RESULT);
+
+ if (!appendInvertedListElement(invListElement)) {
+ return false;
}
appender.incrementTupleCount(1);
numResults++;
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedIndexSearchResult.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedIndexSearchResult.java
index 527d624..707d85d 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedIndexSearchResult.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedIndexSearchResult.java
@@ -25,6 +25,7 @@
import java.util.ListIterator;
import org.apache.hyracks.api.comm.IFrame;
+import org.apache.hyracks.api.comm.IFrameTupleAccessor;
import org.apache.hyracks.api.context.IHyracksTaskContext;
import org.apache.hyracks.api.dataflow.value.ITypeTraits;
import org.apache.hyracks.api.exceptions.ErrorCode;
@@ -36,9 +37,12 @@
import org.apache.hyracks.dataflow.common.io.RunFileWriter;
import org.apache.hyracks.dataflow.std.buffermanager.BufferManagerBackedVSizeFrame;
import org.apache.hyracks.dataflow.std.buffermanager.ISimpleFrameBufferManager;
-import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeFrameTupleAccessor;
-import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeFrameTupleAppender;
-import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeTupleReference;
+import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleWriter;
+import org.apache.hyracks.storage.am.common.tuples.TypeAwareTupleWriter;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListSearchResultFrameTupleAppender;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListTupleReference;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.InvertedListSearchResultFrameTupleAppender;
+import org.apache.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexUtils;
/**
* Disk-based or in-memory based storage for intermediate and final results of inverted-index
@@ -50,18 +54,18 @@
// I/O buffer's index in the buffers
protected static final int IO_BUFFER_IDX = 0;
protected static final String FILE_PREFIX = "InvertedIndexSearchResult";
+
protected final IHyracksTaskContext ctx;
- protected final FixedSizeFrameTupleAppender appender;
- protected final FixedSizeFrameTupleAccessor accessor;
- protected final FixedSizeTupleReference tuple;
+ protected final IInvertedListSearchResultFrameTupleAppender appender;
+ protected final IFrameTupleAccessor accessor;
+ protected final IInvertedListTupleReference tuple;
protected final ISimpleFrameBufferManager bufferManager;
protected ITypeTraits[] typeTraits;
- protected int invListElementSize;
+ protected ITypeTraits[] invListFields;
protected int currentWriterBufIdx;
protected int currentReaderBufIdx;
protected int numResults;
- protected int numPossibleElementPerPage;
// Read and Write I/O buffer
protected IFrame ioBufferFrame = null;
protected ByteBuffer ioBuffer = null;
@@ -75,14 +79,19 @@
protected boolean isInReadMode;
protected boolean isWriteFinished;
protected boolean isFileOpened;
+ // Used for variable-size element in the inverted list
+ protected ITreeIndexTupleWriter tupleWriter;
+ protected byte[] tempBytes;
public InvertedIndexSearchResult(ITypeTraits[] invListFields, IHyracksTaskContext ctx,
ISimpleFrameBufferManager bufferManager) throws HyracksDataException {
+ this.invListFields = invListFields;
+ this.tupleWriter = new TypeAwareTupleWriter(invListFields);
initTypeTraits(invListFields);
this.ctx = ctx;
- appender = new FixedSizeFrameTupleAppender(ctx.getInitialFrameSize(), typeTraits);
- accessor = new FixedSizeFrameTupleAccessor(ctx.getInitialFrameSize(), typeTraits);
- tuple = new FixedSizeTupleReference(typeTraits);
+ appender = new InvertedListSearchResultFrameTupleAppender(ctx.getInitialFrameSize());
+ accessor = InvertedIndexUtils.createInvertedListFrameTupleAccessor(ctx.getInitialFrameSize(), typeTraits);
+ tuple = InvertedIndexUtils.createInvertedListTupleReference(typeTraits);
this.bufferManager = bufferManager;
this.isInReadMode = false;
this.isWriteFinished = false;
@@ -94,7 +103,7 @@
this.currentWriterBufIdx = 0;
this.currentReaderBufIdx = 0;
this.numResults = 0;
- calculateNumElementPerPage();
+ this.tempBytes = new byte[ctx.getInitialFrameSize()];
// Allocates one frame for read/write operation.
prepareIOBuffer();
}
@@ -105,16 +114,33 @@
*/
protected void initTypeTraits(ITypeTraits[] invListFields) {
typeTraits = new ITypeTraits[invListFields.length + 1];
- int tmp = 0;
for (int i = 0; i < invListFields.length; i++) {
typeTraits[i] = invListFields[i];
- tmp += invListFields[i].getFixedLength();
}
- invListElementSize = tmp;
// Integer for counting occurrences.
typeTraits[invListFields.length] = IntegerPointable.TYPE_TRAITS;
}
+ // If all the inverted list fileds are fixed-size, then return the number of expected pages
+ // Otherwise, return -1
+ public int getExpectedNumPages(int numExpectedElements) {
+ if (InvertedIndexUtils.checkTypeTraitsAllFixed(invListFields)) {
+ int sizeElement = 0;
+ for (int i = 0; i < invListFields.length; i++) {
+ sizeElement += invListFields[i].getFixedLength();
+ }
+
+ int frameSize = ctx.getInitialFrameSize();
+ // The count of Minframe, and the count of tuples in a frame should be deducted.
+ frameSize = frameSize - InvertedListSearchResultFrameTupleAppender.MINFRAME_COUNT_SIZE
+ - InvertedListSearchResultFrameTupleAppender.TUPLE_COUNT_SIZE;
+ int numPossibleElementPerPage = (int) Math.floor((double) frameSize / (sizeElement + ELEMENT_COUNT_SIZE));
+ return (int) Math.ceil((double) numExpectedElements / numPossibleElementPerPage);
+ } else {
+ return -1;
+ }
+ }
+
/**
* Prepares the write operation. Tries to allocate buffers for the expected number of pages.
* If that is possible, all operations will be executed in memory.
@@ -127,7 +153,15 @@
}
// Intermediate results? disk or in-memory based
// Allocates more buffers.
- isInMemoryOpMode = tryAllocateBuffers(numExpectedPages);
+ if (InvertedIndexUtils.checkTypeTraitsAllFixed(typeTraits)) {
+ isInMemoryOpMode = tryAllocateBuffers(numExpectedPages);
+ } else {
+ // When one of the type traits is variable length, disable the in memory mode
+ // because the length of the inverted list is unknown, and thus may exceed the memory budget
+ // A better way to do so might be to flush to disk when out-of-memory on the fly
+ // instead of deciding the in memory mode or not before we merge the results
+ isInMemoryOpMode = false;
+ }
if (!isInMemoryOpMode) {
// Not enough number of buffers. Switch to the file I/O mode.
createAndOpenFile();
@@ -136,14 +170,47 @@
isWriteFinished = false;
}
+ protected int getNumBytesRequired(ITupleReference invListElement) {
+ if (invListFields[0].isFixedLength()) {
+ return invListElement.getFieldLength(0);
+ } else {
+ return tupleWriter.bytesRequired(invListElement, 0, 1);
+ }
+ }
+
+ protected boolean appendInvertedListElement(ITupleReference invListElement) throws HyracksDataException {
+ int numBytesRequired = getNumBytesRequired(invListElement);
+
+ // Appends inverted-list element.
+ if (invListFields[0].isFixedLength()) {
+ if (!appender.append(invListElement.getFieldData(0), invListElement.getFieldStart(0),
+ invListElement.getFieldLength(0))) {
+ throw HyracksDataException.create(ErrorCode.CANNOT_ADD_ELEMENT_TO_INVERTED_INDEX_SEARCH_RESULT);
+ }
+ } else {
+ tupleWriter.writeTupleFields(invListElement, 0, 1, tempBytes, 0);
+ if (!appender.append(tempBytes, 0, numBytesRequired)) {
+ throw HyracksDataException.create(ErrorCode.CANNOT_ADD_ELEMENT_TO_INVERTED_INDEX_SEARCH_RESULT);
+ }
+ }
+
+ return true;
+ }
+
/**
* Appends an element and its count to the current frame of this result. The boolean value is necessary for
- * the final search result case since the append() of that class is overriding this method.
+ * the final search result case since the append() of that class is **overriding** this method.
+ *
+ * Note that if the the buffer is run out, then this method will automatically write to the next buffer.
+ * This is different from the append() method in the final search result which will simply return false.
*/
public boolean append(ITupleReference invListElement, int count) throws HyracksDataException {
+
+ int numBytesRequired = getNumBytesRequired(invListElement);
ByteBuffer currentBuffer;
// Moves to the next page if the current page is full.
- if (!appender.hasSpace()) {
+ // + 4 for the count
+ if (!appender.hasSpace(numBytesRequired + 4)) {
currentWriterBufIdx++;
if (isInMemoryOpMode) {
currentBuffer = buffers.get(currentWriterBufIdx);
@@ -153,10 +220,9 @@
}
appender.reset(currentBuffer);
}
- // Appends inverted-list element.
- if (!appender.append(invListElement.getFieldData(0), invListElement.getFieldStart(0), invListElementSize)) {
- throw HyracksDataException.create(ErrorCode.CANNOT_ADD_ELEMENT_TO_INVERTED_INDEX_SEARCH_RESULT);
- }
+
+ appendInvertedListElement(invListElement);
+
// Appends count.
if (!appender.append(count)) {
throw HyracksDataException.create(ErrorCode.CANNOT_ADD_ELEMENT_TO_INVERTED_INDEX_SEARCH_RESULT);
@@ -292,23 +358,6 @@
}
/**
- * Gets the expected number of pages if all elements are created as a result.
- * An assumption is that there are no common elements between the previous result and the cursor.
- */
- public int getExpectedNumPages(int numExpectedElements) {
- return (int) Math.ceil((double) numExpectedElements / numPossibleElementPerPage);
- }
-
- // Gets the number of possible elements per page based on the inverted list element size.
- protected void calculateNumElementPerPage() {
- int frameSize = ctx.getInitialFrameSize();
- // The count of Minframe, and the count of tuples in a frame should be deducted.
- frameSize = frameSize - FixedSizeFrameTupleAppender.MINFRAME_COUNT_SIZE
- - FixedSizeFrameTupleAppender.TUPLE_COUNT_SIZE;
- numPossibleElementPerPage = (int) Math.floor((double) frameSize / (invListElementSize + ELEMENT_COUNT_SIZE));
- }
-
- /**
* Allocates the buffer for read/write operation and initializes the buffers array that will be used keep a result.
*/
protected void prepareIOBuffer() throws HyracksDataException {
@@ -334,12 +383,28 @@
* Tries to allocate buffers to accommodate the results in memory.
*/
protected boolean tryAllocateBuffers(int numExpectedPages) throws HyracksDataException {
+ assert numExpectedPages >= 0;
+
boolean allBufferAllocated = true;
while (buffers.size() < numExpectedPages) {
+ // Currently, the buffers (optional, needs multiple pages, for in-memory mode)
+ // and the ioBuffer (must-have, needs one page only, for disk IO, related code is in the above prepareIOBuffer())
+ // are both acquired from the same bufferManager.
+ // It may be possible that one search result acquires all the frame for its in-memory usage,
+ // and then the next search result cannot even get one frame for its disk IO usage.
+ // In this case, the second search result will exit with an out-of-memory error.
+ //
+ // To avoid the above issue, maybe we need to create **two** buffer managers, one to manage in-memory frame usage,
+ // and the other to manage on-disk frame usage to guarantee that every search result has at least one disk IO frame.
+ // Or, to make things simpler, we can let the search result manage its own disk IO frame,
+ // i.e. create the frame on its own rather than acquire from the buffer manager.
+ // In this case, we cannot reuse frames, but each search result needs only one IO frame,
+ // and the number of search result is pretty limited (e.g. one result per query keyword).
ByteBuffer tmpBuffer = bufferManager.acquireFrame(ctx.getInitialFrameSize());
if (tmpBuffer == null) {
// Budget exhausted
allBufferAllocated = false;
+ deallocateBuffers();
break;
} else {
clearBuffer(tmpBuffer);
@@ -390,15 +455,15 @@
}
}
- public FixedSizeFrameTupleAccessor getAccessor() {
+ public IFrameTupleAccessor getAccessor() {
return accessor;
}
- public FixedSizeFrameTupleAppender getAppender() {
+ public IInvertedListSearchResultFrameTupleAppender getAppender() {
return appender;
}
- public FixedSizeTupleReference getTuple() {
+ public IInvertedListTupleReference getTuple() {
return tuple;
}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListCursorFactory.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListCursorFactory.java
index 6d61f32..4f53d41 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListCursorFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListCursorFactory.java
@@ -22,10 +22,10 @@
import org.apache.hyracks.api.context.IHyracksTaskContext;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInPlaceInvertedIndex;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IObjectFactory;
-import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor;
-public class InvertedListCursorFactory implements IObjectFactory<InvertedListCursor> {
+public class InvertedListCursorFactory implements IObjectFactory<IInvertedListCursor> {
private final IInPlaceInvertedIndex invIndex;
private final IHyracksTaskContext ctx;
@@ -36,7 +36,7 @@
}
@Override
- public InvertedListCursor create() throws HyracksDataException {
+ public IInvertedListCursor create() throws HyracksDataException {
return invIndex.createInvertedListCursor(ctx);
}
}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListMerger.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListMerger.java
index f3ca8b0..deea12a 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListMerger.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListMerger.java
@@ -23,6 +23,7 @@
import java.util.Collections;
import java.util.List;
+import org.apache.hyracks.api.comm.IFrameTupleAccessor;
import org.apache.hyracks.api.context.IHyracksTaskContext;
import org.apache.hyracks.api.exceptions.ErrorCode;
import org.apache.hyracks.api.exceptions.HyracksDataException;
@@ -30,9 +31,8 @@
import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
import org.apache.hyracks.dataflow.std.buffermanager.ISimpleFrameBufferManager;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
-import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor;
-import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeFrameTupleAccessor;
-import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeTupleReference;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListTupleReference;
import org.apache.hyracks.storage.common.MultiComparator;
/**
@@ -54,7 +54,7 @@
protected InvertedIndexFinalSearchResult finalSearchResult;
// To Keep the status of this merge process since we only calculate one frame at a time in case of the final result
- protected InvertedListCursor finalInvListCursor;
+ protected IInvertedListCursor finalInvListCursor;
protected int occurrenceThreshold;
protected int numInvertedLists;
protected int invListIdx;
@@ -63,12 +63,18 @@
protected int maxPrevBufIdx;
protected int numExpectedPages;
protected ByteBuffer prevCurrentBuffer;
- protected FixedSizeFrameTupleAccessor resultFrameTupleAcc;
- protected FixedSizeTupleReference resultTuple;
+ protected IFrameTupleAccessor resultFrameTupleAcc;
+ protected IInvertedListTupleReference resultTuple;
protected boolean advanceCursor;
protected boolean advancePrevResult;
- protected int resultTidx;
- protected int invListTidx;
+ // ToDo: we need those tuple indexes because when generating the intermediate result,
+ // the resultFrameTupleAcc API needs tuple index.
+ // In fact, the elements in resultFrameTupleAcc are retrieved sequentially one by one instead of accessed randomly.
+ // Maybe we can wrap the tuple index and the resultFrameTupleAcc into a new class so that we can iterate the elements
+ // in resultFrameTupleAcc and avoid having the tuple indexes here.
+ // We need to be very careful when handling those tuple indexes here.
+ protected int resultTupleIdx;
+ protected int invListTupleIdx;
protected int invListTupleCount;
protected ITupleReference invListTuple;
protected int prevResultFrameTupleCount;
@@ -98,8 +104,9 @@
* false otherwise.
* @throws HyracksDataException
*/
- public boolean merge(List<InvertedListCursor> invListCursors, int occurrenceThreshold, int numPrefixLists,
+ public boolean merge(List<IInvertedListCursor> invListCursors, int occurrenceThreshold, int numPrefixLists,
InvertedIndexFinalSearchResult finalSearchResult) throws HyracksDataException {
+
Collections.sort(invListCursors);
int numInvLists = invListCursors.size();
InvertedIndexSearchResult result = null;
@@ -125,7 +132,7 @@
result = finalSearchResult;
isFinalList = true;
}
- InvertedListCursor invListCursor = invListCursors.get(i);
+ IInvertedListCursor invListCursor = invListCursors.get(i);
// Track whether an exception is occurred.
boolean finishedTryBlock = false;
try {
@@ -219,9 +226,9 @@
* @return true only if all processing for the final list for a partition is done.
* false otherwise.
*/
- protected boolean mergeSuffixListProbe(InvertedListCursor invListCursor, InvertedIndexSearchResult prevSearchResult,
- InvertedIndexSearchResult newSearchResult, int invListIx, int numInvLists, int occurrenceThreshold,
- boolean isFinalList) throws HyracksDataException {
+ protected boolean mergeSuffixListProbe(IInvertedListCursor invListCursor,
+ InvertedIndexSearchResult prevSearchResult, InvertedIndexSearchResult newSearchResult, int invListIx,
+ int numInvLists, int occurrenceThreshold, boolean isFinalList) throws HyracksDataException {
if (isProcessingFinished) {
return true;
}
@@ -229,8 +236,8 @@
initMergingOneList(invListCursor, prevSearchResult, newSearchResult, isFinalList, invListIx, numInvLists,
occurrenceThreshold, processType.SUFFIX_LIST_PROBE);
- while (resultTidx < prevResultFrameTupleCount) {
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+ while (resultTupleIdx < prevResultFrameTupleCount) {
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTupleIdx));
int count = getCount(resultTuple);
if (invListCursor.containsKey(resultTuple, invListCmp)) {
// Found the same tuple again on the current list. Increases the count by one.
@@ -249,7 +256,7 @@
return false;
}
}
- resultTidx++;
+ resultTupleIdx++;
checkPrevResultAndFetchNextFrame(prevSearchResult);
}
@@ -263,7 +270,7 @@
* @return true only if all processing for the final list for a partition is done.
* false otherwise.
*/
- protected boolean mergeSuffixListScan(InvertedListCursor invListCursor, InvertedIndexSearchResult prevSearchResult,
+ protected boolean mergeSuffixListScan(IInvertedListCursor invListCursor, InvertedIndexSearchResult prevSearchResult,
InvertedIndexSearchResult newSearchResult, int invListIx, int numInvLists, int occurrenceThreshold,
boolean isFinalList) throws HyracksDataException {
if (isProcessingFinished) {
@@ -276,9 +283,9 @@
int cmp;
int count;
- while (invListTidx < invListTupleCount && resultTidx < prevResultFrameTupleCount) {
+ while (invListTupleIdx < invListTupleCount && resultTupleIdx < prevResultFrameTupleCount) {
invListTuple = invListCursor.getTuple();
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTupleIdx));
cmp = invListCmp.compare(invListTuple, resultTuple);
if (cmp == 0) {
// Found the same tuple again on the current list. Increases the count by one.
@@ -316,8 +323,8 @@
// append remaining elements from previous result set
// These remaining elements can be a part of the answer if they will be found again in the remaining lists.
- while (resultTidx < prevResultFrameTupleCount) {
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+ while (resultTupleIdx < prevResultFrameTupleCount) {
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTupleIdx));
count = getCount(resultTuple);
if (count + numInvLists - invListIx > occurrenceThreshold) {
if (!newSearchResult.append(resultTuple, count)) {
@@ -326,7 +333,7 @@
return false;
}
}
- resultTidx++;
+ resultTupleIdx++;
checkPrevResultAndFetchNextFrame(prevSearchResult);
}
@@ -338,7 +345,7 @@
* then generates a new result by applying UNIONALL operation on these two. This method returns true
* only if all processing for the given final list is done. Otherwise, it returns false.
*/
- protected boolean mergePrefixList(InvertedListCursor invListCursor, InvertedIndexSearchResult prevSearchResult,
+ protected boolean mergePrefixList(IInvertedListCursor invListCursor, InvertedIndexSearchResult prevSearchResult,
InvertedIndexSearchResult newSearchResult, boolean isFinalList) throws HyracksDataException {
if (isProcessingFinished) {
return true;
@@ -350,9 +357,9 @@
int cmp;
int count;
// Traverses the inverted list and the previous result at the same time.
- while (invListTidx < invListTupleCount && resultTidx < prevResultFrameTupleCount) {
+ while (invListTupleIdx < invListTupleCount && resultTupleIdx < prevResultFrameTupleCount) {
invListTuple = invListCursor.getTuple();
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTupleIdx));
cmp = invListCmp.compare(invListTuple, resultTuple);
// Found the same tuple again on the current list: count + 1. Both the result and the cursor advances.
if (cmp == 0) {
@@ -393,29 +400,29 @@
// append remaining new elements from inverted list
//
- while (invListTidx < invListTupleCount) {
+ while (invListTupleIdx < invListTupleCount) {
invListTuple = invListCursor.getTuple();
if (!newSearchResult.append(invListTuple, 1)) {
// For a final result, needs to pause when a frame becomes full to let the caller
// consume the frame. SearchResult.append() should only return false for this case.
return false;
}
- invListTidx++;
+ invListTupleIdx++;
if (invListCursor.hasNext()) {
invListCursor.next();
}
}
// append remaining elements from previous result set
- while (resultTidx < prevResultFrameTupleCount) {
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+ while (resultTupleIdx < prevResultFrameTupleCount) {
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTupleIdx));
count = getCount(resultTuple);
if (!newSearchResult.append(resultTuple, count)) {
// For a final result, needs to pause when a frame becomes full to let the caller
// consume the frame. SearchResult.append() should only return false for this case.
return false;
}
- resultTidx++;
+ resultTupleIdx++;
checkPrevResultAndFetchNextFrame(prevSearchResult);
}
@@ -425,7 +432,7 @@
/**
* Initializes necessary information for each merging operation (prefix_list) for a list.
*/
- protected void initMergingOneList(InvertedListCursor invListCursor, InvertedIndexSearchResult prevSearchResult,
+ protected void initMergingOneList(IInvertedListCursor invListCursor, InvertedIndexSearchResult prevSearchResult,
InvertedIndexSearchResult newSearchResult, boolean isFinalList, processType mergeOpType)
throws HyracksDataException {
initMergingOneList(invListCursor, prevSearchResult, newSearchResult, isFinalList, 0, 0, 0, mergeOpType);
@@ -434,7 +441,7 @@
/**
* Initializes necessary information for each merging operation (suffix_list_probe or suffix_list_scan) for a list.
*/
- protected void initMergingOneList(InvertedListCursor invListCursor, InvertedIndexSearchResult prevSearchResult,
+ protected void initMergingOneList(IInvertedListCursor invListCursor, InvertedIndexSearchResult prevSearchResult,
InvertedIndexSearchResult newSearchResult, boolean isFinalList, int invListIx, int numInvLists,
int occurrenceThreshold, processType mergeOpType) throws HyracksDataException {
// Each inverted list will be visited only once except the final inverted list.
@@ -455,9 +462,9 @@
resultTuple = prevSearchResult.getTuple();
advanceCursor = true;
advancePrevResult = false;
- resultTidx = 0;
+ resultTupleIdx = 0;
resultFrameTupleAcc.reset(prevCurrentBuffer);
- invListTidx = 0;
+ invListTupleIdx = 0;
numInvertedLists = numInvLists;
invListIdx = invListIx;
prevResultFrameTupleCount = prevCurrentBuffer == null ? 0 : resultFrameTupleAcc.getTupleCount();
@@ -485,7 +492,7 @@
* false otherwise
*/
protected boolean finishMergingOneList(boolean isFinalList, InvertedIndexSearchResult prevSearchResult,
- InvertedIndexSearchResult newSearchResult, InvertedListCursor invListCursor) throws HyracksDataException {
+ InvertedIndexSearchResult newSearchResult, IInvertedListCursor invListCursor) throws HyracksDataException {
prevSearchResult.closeResultRead(false);
invListCursor.close();
// Final search result can be called multiple times for partitioned occurrence searcher case
@@ -506,14 +513,14 @@
* Also fetches next element from the inverted list cursor.
*/
protected void advancePrevResultAndList(boolean advancePrevResult, boolean advanceCursor,
- InvertedIndexSearchResult prevSearchResult, InvertedListCursor invListCursor) throws HyracksDataException {
+ InvertedIndexSearchResult prevSearchResult, IInvertedListCursor invListCursor) throws HyracksDataException {
if (advancePrevResult) {
- resultTidx++;
+ resultTupleIdx++;
checkPrevResultAndFetchNextFrame(prevSearchResult);
}
if (advanceCursor) {
- invListTidx++;
+ invListTupleIdx++;
if (invListCursor.hasNext()) {
invListCursor.next();
}
@@ -525,13 +532,13 @@
*/
protected void checkPrevResultAndFetchNextFrame(InvertedIndexSearchResult prevSearchResult)
throws HyracksDataException {
- if (resultTidx >= prevResultFrameTupleCount) {
+ if (resultTupleIdx >= prevResultFrameTupleCount) {
prevBufIdx++;
if (prevBufIdx <= maxPrevBufIdx) {
prevCurrentBuffer = prevSearchResult.getNextFrame();
resultFrameTupleAcc.reset(prevCurrentBuffer);
prevResultFrameTupleCount = resultFrameTupleAcc.getTupleCount();
- resultTidx = 0;
+ resultTupleIdx = 0;
}
}
}
@@ -539,7 +546,7 @@
/**
* Gets the count of the given tuple in the previous search result.
*/
- protected int getCount(FixedSizeTupleReference resultTuple) {
+ protected int getCount(IInvertedListTupleReference resultTuple) {
return IntegerPointable.getInteger(resultTuple.getFieldData(0),
resultTuple.getFieldStart(resultTuple.getFieldCount() - 1));
}
@@ -585,8 +592,8 @@
resultTuple = null;
advanceCursor = false;
advancePrevResult = false;
- resultTidx = 0;
- invListTidx = 0;
+ resultTupleIdx = 0;
+ invListTupleIdx = 0;
prevResultFrameTupleCount = 0;
finalInvListCursor = null;
finalSearchResult = null;
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListPartitions.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListPartitions.java
index fef4baf..aef18c3 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListPartitions.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListPartitions.java
@@ -23,8 +23,8 @@
import java.util.Arrays;
import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IObjectFactory;
-import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor;
import org.apache.hyracks.storage.am.lsm.invertedindex.util.ObjectCache;
/**
@@ -36,15 +36,15 @@
private final int PARTITIONS_SLACK_SIZE = 10;
private final int OBJECT_CACHE_INIT_SIZE = 10;
private final int OBJECT_CACHE_EXPAND_SIZE = 10;
- private final IObjectFactory<ArrayList<InvertedListCursor>> arrayListFactory;
- private final ObjectCache<ArrayList<InvertedListCursor>> arrayListCache;
- private ArrayList<InvertedListCursor>[] partitions;
+ private final IObjectFactory<ArrayList<IInvertedListCursor>> arrayListFactory;
+ private final ObjectCache<ArrayList<IInvertedListCursor>> arrayListCache;
+ private ArrayList<IInvertedListCursor>[] partitions;
private short minValidPartitionIndex;
private short maxValidPartitionIndex;
public InvertedListPartitions() throws HyracksDataException {
- this.arrayListFactory = new ArrayListFactory<InvertedListCursor>();
- this.arrayListCache = new ObjectCache<ArrayList<InvertedListCursor>>(arrayListFactory, OBJECT_CACHE_INIT_SIZE,
+ this.arrayListFactory = new ArrayListFactory<IInvertedListCursor>();
+ this.arrayListCache = new ObjectCache<ArrayList<IInvertedListCursor>>(arrayListFactory, OBJECT_CACHE_INIT_SIZE,
OBJECT_CACHE_EXPAND_SIZE);
}
@@ -57,7 +57,7 @@
} else {
initialSize = numTokensUpperBound + 1;
}
- partitions = (ArrayList<InvertedListCursor>[]) new ArrayList[initialSize];
+ partitions = (ArrayList<IInvertedListCursor>[]) new ArrayList[initialSize];
} else {
if (numTokensUpperBound + 1 >= partitions.length) {
partitions = Arrays.copyOf(partitions, numTokensUpperBound + 1);
@@ -69,11 +69,11 @@
maxValidPartitionIndex = Short.MIN_VALUE;
}
- public void addInvertedListCursor(InvertedListCursor listCursor, short numTokens) throws HyracksDataException {
+ public void addInvertedListCursor(IInvertedListCursor listCursor, short numTokens) throws HyracksDataException {
if (numTokens + 1 >= partitions.length) {
partitions = Arrays.copyOf(partitions, numTokens + PARTITIONS_SLACK_SIZE);
}
- ArrayList<InvertedListCursor> partitionCursors = partitions[numTokens];
+ ArrayList<IInvertedListCursor> partitionCursors = partitions[numTokens];
if (partitionCursors == null) {
partitionCursors = arrayListCache.getNext();
partitionCursors.clear();
@@ -89,7 +89,7 @@
partitionCursors.add(listCursor);
}
- public ArrayList<InvertedListCursor>[] getPartitions() {
+ public ArrayList<IInvertedListCursor>[] getPartitions() {
return partitions;
}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java
index 3d8c35a..5ab63a2 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java
@@ -33,8 +33,8 @@
import org.apache.hyracks.storage.am.common.tuples.ConcatenatingTupleReference;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInPlaceInvertedIndex;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IPartitionedInvertedIndex;
-import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor;
import org.apache.hyracks.storage.common.IIndexCursor;
/**
@@ -56,7 +56,7 @@
protected int numPrefixLists;
protected boolean isFinalPartIdx;
protected boolean needToReadNewPart;
- List<InvertedListCursor>[] partitionCursors;
+ List<IInvertedListCursor>[] partitionCursors;
IInvertedIndexSearchModifier searchModifier;
public PartitionedTOccurrenceSearcher(IInPlaceInvertedIndex invIndex, IHyracksTaskContext ctx)
@@ -153,7 +153,7 @@
// Prune partition because no element in it can satisfy the occurrence threshold.
// An opened cursor should be closed.
if (partitionCursors[i].size() < occurrenceThreshold) {
- for (InvertedListCursor cursor : partitionCursors[i]) {
+ for (IInvertedListCursor cursor : partitionCursors[i]) {
cursor.close();
}
continue;
@@ -244,7 +244,7 @@
// Prune partition because no element in it can satisfy the occurrence threshold.
// An opened cursor should be closed.
if (partitionCursors[i].size() < occurrenceThreshold) {
- for (InvertedListCursor cursor : partitionCursors[i]) {
+ for (IInvertedListCursor cursor : partitionCursors[i]) {
cursor.close();
}
continue;
@@ -301,14 +301,14 @@
}
private void closeCursorsInPartitions(InvertedListPartitions parts) throws HyracksDataException {
- List<InvertedListCursor>[] partCursors = parts.getPartitions();
+ List<IInvertedListCursor>[] partCursors = parts.getPartitions();
short start = parts.getMinValidPartitionIndex();
short end = parts.getMaxValidPartitionIndex();
for (int i = start; i <= end; i++) {
if (partCursors[i] == null) {
continue;
}
- for (InvertedListCursor cursor : partCursors[i]) {
+ for (IInvertedListCursor cursor : partCursors[i]) {
cursor.close();
}
}
@@ -331,7 +331,7 @@
return fullHighSearchKey;
}
- public InvertedListCursor getCachedInvertedListCursor() throws HyracksDataException {
+ public IInvertedListCursor getCachedInvertedListCursor() throws HyracksDataException {
return invListCursorCache.getNext();
}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcher.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcher.java
index 9808ae1..289b8d2 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcher.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcher.java
@@ -27,7 +27,7 @@
import org.apache.hyracks.storage.am.common.api.IIndexOperationContext;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInPlaceInvertedIndex;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
-import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
import org.apache.hyracks.storage.common.IIndexCursor;
/**
@@ -35,10 +35,12 @@
*/
public class TOccurrenceSearcher extends AbstractTOccurrenceSearcher {
- protected final ArrayList<InvertedListCursor> invListCursors = new ArrayList<>();
+ protected final ArrayList<IInvertedListCursor> invListCursors = new ArrayList<>();
+ protected InvertedListCursorFactory invertedListCursorFactory;
public TOccurrenceSearcher(IInPlaceInvertedIndex invIndex, IHyracksTaskContext ctx) throws HyracksDataException {
super(invIndex, ctx);
+ invertedListCursorFactory = new InvertedListCursorFactory(invIndex, ctx);
}
@Override
@@ -52,7 +54,7 @@
invListCursorCache.reset();
for (int i = 0; i < numQueryTokens; i++) {
searchKey.reset(queryTokenAppender, i);
- InvertedListCursor invListCursor = invListCursorCache.getNext();
+ IInvertedListCursor invListCursor = invertedListCursorFactory.create();
invIndex.openInvertedListCursor(invListCursor, searchKey, ictx);
invListCursors.add(invListCursor);
}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java
index c9cd959..a9a01e9 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java
@@ -21,8 +21,10 @@
import java.util.List;
+import org.apache.hyracks.api.comm.IFrameTupleAccessor;
import org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.api.exceptions.ErrorCode;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.io.FileReference;
import org.apache.hyracks.api.io.IIOManager;
@@ -49,24 +51,34 @@
import org.apache.hyracks.storage.am.lsm.common.impls.LSMComponentFilterManager;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListBuilder;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListBuilderFactory;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListTupleReference;
import org.apache.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndex;
import org.apache.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndexDiskComponentFactory;
import org.apache.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndexFileManager;
import org.apache.hyracks.storage.am.lsm.invertedindex.impls.PartitionedLSMInvertedIndex;
import org.apache.hyracks.storage.am.lsm.invertedindex.inmemory.InMemoryInvertedIndex;
import org.apache.hyracks.storage.am.lsm.invertedindex.inmemory.PartitionedInMemoryInvertedIndex;
-import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeElementInvertedListBuilder;
-import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeElementInvertedListBuilderFactory;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.InvertedListBuilderFactory;
import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndex;
import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndexFactory;
import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.PartitionedOnDiskInvertedIndex;
import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.PartitionedOnDiskInvertedIndexFactory;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.fixedsize.FixedSizeElementInvertedListBuilder;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.fixedsize.FixedSizeInvertedListSearchResultFrameTupleAccessor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.fixedsize.FixedSizeInvertedListTupleReference;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.variablesize.VariableSizeInvertedListSearchResultFrameTupleAccessor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.variablesize.VariableSizeInvertedListTupleReference;
import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory;
import org.apache.hyracks.storage.common.buffercache.IBufferCache;
import org.apache.hyracks.util.trace.ITracer;
public class InvertedIndexUtils {
+ public static final String EXPECT_ALL_FIX_GET_VAR_SIZE =
+ "expecting all type traits to be fixed-size while getting at least one variable-length one";
+ public static final String EXPECT_VAR_GET_ALL_FIX_SIZE =
+ "expecting at least one variable-size type trait while all are fixed-size";
+
public static InMemoryInvertedIndex createInMemoryBTreeInvertedindex(IBufferCache memBufferCache,
IPageManager virtualFreePageManager, ITypeTraits[] invListTypeTraits,
IBinaryComparatorFactory[] invListCmpFactories, ITypeTraits[] tokenTypeTraits,
@@ -148,7 +160,7 @@
new LSMInvertedIndexFileManager(ioManager, onDiskDirFileRef, deletedKeysBTreeFactory);
IInvertedListBuilderFactory invListBuilderFactory =
- new FixedSizeElementInvertedListBuilderFactory(invListTypeTraits);
+ new InvertedListBuilderFactory(tokenTypeTraits, invListTypeTraits);
OnDiskInvertedIndexFactory invIndexFactory =
new OnDiskInvertedIndexFactory(ioManager, diskBufferCache, invListBuilderFactory, invListTypeTraits,
invListCmpFactories, tokenTypeTraits, tokenCmpFactories, fileManager, pageManagerFactory);
@@ -197,7 +209,7 @@
new LSMInvertedIndexFileManager(ioManager, onDiskDirFileRef, deletedKeysBTreeFactory);
IInvertedListBuilderFactory invListBuilderFactory =
- new FixedSizeElementInvertedListBuilderFactory(invListTypeTraits);
+ new InvertedListBuilderFactory(tokenTypeTraits, invListTypeTraits);
PartitionedOnDiskInvertedIndexFactory invIndexFactory = new PartitionedOnDiskInvertedIndexFactory(ioManager,
diskBufferCache, invListBuilderFactory, invListTypeTraits, invListCmpFactories, tokenTypeTraits,
tokenCmpFactories, fileManager, pageManagerFactory);
@@ -220,4 +232,63 @@
mergePolicy, opTracker, ioScheduler, ioOpCallbackFactory, pageWriteCallbackFactory, invertedIndexFields,
filterFields, filterFieldsForNonBulkLoadOps, invertedIndexFieldsForNonBulkLoadOps, durable, tracer);
}
+
+ public static boolean checkTypeTraitsAllFixed(ITypeTraits[] typeTraits) {
+ for (int i = 0; i < typeTraits.length; i++) {
+ if (!typeTraits[i].isFixedLength()) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public static void verifyAllFixedSizeTypeTrait(ITypeTraits[] typeTraits) throws HyracksDataException {
+ if (InvertedIndexUtils.checkTypeTraitsAllFixed(typeTraits) == false) {
+ throw HyracksDataException.create(ErrorCode.INVALID_INVERTED_LIST_TYPE_TRAITS,
+ InvertedIndexUtils.EXPECT_ALL_FIX_GET_VAR_SIZE);
+ }
+ }
+
+ public static void verifyHasVarSizeTypeTrait(ITypeTraits[] typeTraits) throws HyracksDataException {
+ if (InvertedIndexUtils.checkTypeTraitsAllFixed(typeTraits) == true) {
+ throw HyracksDataException.create(ErrorCode.INVALID_INVERTED_LIST_TYPE_TRAITS,
+ InvertedIndexUtils.EXPECT_VAR_GET_ALL_FIX_SIZE);
+ }
+ }
+
+ public static IInvertedListTupleReference createInvertedListTupleReference(ITypeTraits[] typeTraits)
+ throws HyracksDataException {
+ if (checkTypeTraitsAllFixed(typeTraits)) {
+ return new FixedSizeInvertedListTupleReference(typeTraits);
+ } else {
+ return new VariableSizeInvertedListTupleReference(typeTraits);
+ }
+ }
+
+ public static IFrameTupleAccessor createInvertedListFrameTupleAccessor(int frameSize, ITypeTraits[] typeTraits)
+ throws HyracksDataException {
+ if (checkTypeTraitsAllFixed(typeTraits)) {
+ return new FixedSizeInvertedListSearchResultFrameTupleAccessor(frameSize, typeTraits);
+ } else {
+ return new VariableSizeInvertedListSearchResultFrameTupleAccessor(frameSize, typeTraits);
+ }
+ }
+
+ public static void setInvertedListFrameEndOffset(byte[] bytes, int pos) {
+ int off = bytes.length - 4;
+ bytes[off++] = (byte) (pos >> 24);
+ bytes[off++] = (byte) (pos >> 16);
+ bytes[off++] = (byte) (pos >> 8);
+ bytes[off] = (byte) (pos);
+ }
+
+ public static int getInvertedListFrameEndOffset(byte[] bytes) {
+ int p = bytes.length - 4;
+ int offsetFrameEnd = 0;
+ for (int i = 0; i < 4; i++) {
+ offsetFrameEnd = (offsetFrameEnd << 8) + (bytes[p++] & 0xFF);
+ }
+
+ return offsetFrameEnd;
+ }
}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/IIndexCursor.java b/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/IIndexCursor.java
index 00c5dce..8414ead 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/IIndexCursor.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/IIndexCursor.java
@@ -32,6 +32,8 @@
* --try{
* ---while (cursor.hasNext()){
* ----cursor.next()
+ * ----ITupleReference tuple = cursor.getTuple()
+ * ----work with tuple...
* ---}
* --} finally{
* ---cursor.close();
diff --git a/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/InvertedIndexUtilsTest.java b/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/InvertedIndexUtilsTest.java
new file mode 100644
index 0000000..b004a43
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/InvertedIndexUtilsTest.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.lsm.invertedindex.ondisk;
+
+import org.apache.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexUtils;
+import org.junit.Test;
+
+public class InvertedIndexUtilsTest {
+
+ @Test
+ public void testSetFrameEndOffset() {
+ int frameEnd = 4321;
+ byte[] bytes = new byte[1024];
+
+ InvertedIndexUtils.setInvertedListFrameEndOffset(bytes, frameEnd);
+ assert frameEnd == InvertedIndexUtils.getInvertedListFrameEndOffset(bytes);
+
+ frameEnd = 56789;
+ InvertedIndexUtils.setInvertedListFrameEndOffset(bytes, frameEnd);
+ assert frameEnd == InvertedIndexUtils.getInvertedListFrameEndOffset(bytes);
+ }
+
+}
\ No newline at end of file
diff --git a/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeFrameTupleTest.java b/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/InvertedListTupleFrameTupleTest.java
similarity index 84%
rename from hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeFrameTupleTest.java
rename to hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/InvertedListTupleFrameTupleTest.java
index c0a4e75..0015b7a 100644
--- a/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeFrameTupleTest.java
+++ b/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/InvertedListTupleFrameTupleTest.java
@@ -25,10 +25,11 @@
import org.apache.hyracks.api.dataflow.value.ITypeTraits;
import org.apache.hyracks.data.std.primitive.IntegerPointable;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.fixedsize.FixedSizeInvertedListSearchResultFrameTupleAccessor;
import org.junit.Assert;
import org.junit.Test;
-public class FixedSizeFrameTupleTest {
+public class InvertedListTupleFrameTupleTest {
private static int FRAME_SIZE = 4096;
@@ -48,8 +49,9 @@
ITypeTraits[] fields = new ITypeTraits[1];
fields[0] = IntegerPointable.TYPE_TRAITS;
- FixedSizeFrameTupleAppender ftapp = new FixedSizeFrameTupleAppender(FRAME_SIZE, fields);
- FixedSizeFrameTupleAccessor ftacc = new FixedSizeFrameTupleAccessor(FRAME_SIZE, fields);
+ InvertedListSearchResultFrameTupleAppender ftapp = new InvertedListSearchResultFrameTupleAppender(FRAME_SIZE);
+ FixedSizeInvertedListSearchResultFrameTupleAccessor ftacc =
+ new FixedSizeInvertedListSearchResultFrameTupleAccessor(FRAME_SIZE, fields);
boolean frameHasSpace = true;
diff --git a/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexLifecycleTest.java b/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexLifecycleTest.java
index f5d12eb..0c626df 100644
--- a/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexLifecycleTest.java
+++ b/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexLifecycleTest.java
@@ -30,6 +30,7 @@
import org.apache.hyracks.storage.am.common.api.ITreeIndexFrame;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListBuilder;
import org.apache.hyracks.storage.am.lsm.invertedindex.common.LSMInvertedIndexTestHarness;
+import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.fixedsize.FixedSizeElementInvertedListBuilder;
public class OnDiskInvertedIndexLifecycleTest extends AbstractIndexLifecycleTest {
diff --git a/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java b/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
index 8dc9b07..a5e47b5 100644
--- a/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
+++ b/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
@@ -86,7 +86,7 @@
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
-import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor;
+import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
import org.apache.hyracks.storage.am.lsm.invertedindex.common.LSMInvertedIndexTestHarness;
import org.apache.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndexAccessor;
import org.apache.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndexMergeCursor;
@@ -376,7 +376,7 @@
ArrayTupleBuilder searchKeyBuilder = new ArrayTupleBuilder(tokenFieldCount);
ArrayTupleReference searchKey = new ArrayTupleReference();
// Cursor over inverted list from actual index.
- InvertedListCursor actualInvListCursor = invIndexAccessor.createInvertedListCursor();
+ IInvertedListCursor actualInvListCursor = invIndexAccessor.createInvertedListCursor();
// Helpers for generating a serialized inverted-list element from a CheckTuple from the expected index.
ArrayTupleBuilder expectedBuilder = new ArrayTupleBuilder(fieldSerdes.length);