Fixed ASTERIXDB-1249 and ASTERIXDB-1250: self index-nested-loop join correctly identifies the outer and the inner branch.
The first dataset becomes the outer branch.
The second dataset becomes the inner branch.
The optimizer for index-nested-loop join now only try to use an index from the inner branch.
Change-Id: I0d4291197c2bcfbcdcde998c5952af41960c4ad7
Reviewed-on: https://asterix-gerrit.ics.uci.edu/576
Reviewed-by: Yingyi Bu <buyingyi@gmail.com>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-edit-distance.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-edit-distance.aql
index 991dd0b..6640508 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-edit-distance.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-edit-distance.aql
@@ -19,7 +19,7 @@
/*
* Description : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance function of their authors.
* DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join.
- * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary.
+ * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary.
* Success : Yes
*/
@@ -28,7 +28,7 @@
use dataverse test;
create type DBLPType as closed {
- id: int32,
+ id: int32,
dblpid: string,
title: string,
authors: string,
@@ -36,7 +36,7 @@
}
create type CSXType as closed {
- id: int32,
+ id: int32,
csxid: string,
title: string,
authors: string,
@@ -51,7 +51,7 @@
write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_ngram-edit-distance.adm";
-for $a in dataset('DBLP')
for $b in dataset('CSX')
+for $a in dataset('DBLP')
where edit-distance($a.authors, $b.authors) < 3 and $a.id < $b.id
return {"aauthors": $a.authors, "bauthors": $b.authors}
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-fuzzyeq-jaccard.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-fuzzyeq-jaccard.aql
index 635b331..72e4b69 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-fuzzyeq-jaccard.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-fuzzyeq-jaccard.aql
@@ -29,7 +29,7 @@
set import-private-functions 'true';
create type DBLPType as closed {
- id: int32,
+ id: int32,
dblpid: string,
title: string,
authors: string,
@@ -37,7 +37,7 @@
}
create type CSXType as closed {
- id: int32,
+ id: int32,
csxid: string,
title: string,
authors: string,
@@ -55,7 +55,7 @@
set simfunction 'jaccard';
set simthreshold '0.5f';
-for $a in dataset('DBLP')
for $b in dataset('CSX')
+for $a in dataset('DBLP')
where gram-tokens($a.title, 3, false) ~= gram-tokens($b.title, 3, false) and $a.id < $b.id
return {"atitle": $a.title, "btitle": $b.title}
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-jaccard.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-jaccard.aql
index 5449ea4..5f0c612 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-jaccard.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-jaccard.aql
@@ -29,7 +29,7 @@
set import-private-functions 'true';
create type DBLPType as closed {
- id: int32,
+ id: int32,
dblpid: string,
title: string,
authors: string,
@@ -37,7 +37,7 @@
}
create type CSXType as closed {
- id: int32,
+ id: int32,
csxid: string,
title: string,
authors: string,
@@ -52,8 +52,8 @@
write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_ngram-jaccard.adm";
-for $a in dataset('DBLP')
for $b in dataset('CSX')
+for $a in dataset('DBLP')
where similarity-jaccard(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false)) >= 0.5f
and $a.id < $b.id
return {"atitle": $a.title, "btitle": $b.title}
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-edit-distance.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-edit-distance.aql
index 8d7bb42..0e7ee97 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-edit-distance.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-edit-distance.aql
@@ -28,13 +28,13 @@
use dataverse test;
create type AddressType as closed {
- number: int32,
+ number: int32,
street: string,
city: string
}
create type CustomerType as closed {
- cid: int32,
+ cid: int32,
name: string,
age: int32?,
address: AddressType?,
@@ -43,14 +43,14 @@
}
create dataset Customers(CustomerType) primary key cid;
-
+
create dataset Customers2(CustomerType) primary key cid;
create index interests_index on Customers(interests) type keyword;
write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_olist-edit-distance.adm";
-for $a in dataset('Customers')
for $b in dataset('Customers2')
+for $a in dataset('Customers')
where edit-distance($a.interests, $b.interests) <= 2 and $a.cid < $b.cid
return {"ainterests": $a.interests, "binterests": $b.interests}
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-fuzzyeq-edit-distance.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-fuzzyeq-edit-distance.aql
index 416e89f..0021360 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-fuzzyeq-edit-distance.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-fuzzyeq-edit-distance.aql
@@ -28,13 +28,13 @@
use dataverse test;
create type AddressType as closed {
- number: int32,
+ number: int32,
street: string,
city: string
}
create type CustomerType as closed {
- cid: int32,
+ cid: int32,
name: string,
age: int32?,
address: AddressType?,
@@ -43,7 +43,7 @@
}
create dataset Customers(CustomerType) primary key cid;
-
+
create dataset Customers2(CustomerType) primary key cid;
create index interests_index on Customers(interests) type keyword;
@@ -53,7 +53,7 @@
set simfunction 'edit-distance';
set simthreshold '3';
-for $a in dataset('Customers')
for $b in dataset('Customers2')
+for $a in dataset('Customers')
where $a.interests ~= $b.interests and $a.cid < $b.cid
return {"ainterests": $a.interests, "binterests": $b.interests}
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-fuzzyeq-jaccard.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-fuzzyeq-jaccard.aql
index 48f30a5..ba31c49 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-fuzzyeq-jaccard.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-fuzzyeq-jaccard.aql
@@ -28,13 +28,13 @@
use dataverse test;
create type AddressType as closed {
- number: int32,
+ number: int32,
street: string,
city: string
}
create type CustomerType as closed {
- cid: int32,
+ cid: int32,
name: string,
age: int32?,
address: AddressType?,
@@ -43,7 +43,7 @@
}
create dataset Customers(CustomerType) primary key cid;
-
+
create dataset Customers2(CustomerType) primary key cid;
create index interests_index on Customers(interests) type keyword;
@@ -53,7 +53,7 @@
set simfunction 'jaccard';
set simthreshold '0.7f';
-for $a in dataset('Customers')
for $b in dataset('Customers2')
+for $a in dataset('Customers')
where $a.interests /*+ indexnl */ ~= $b.interests and $a.cid < $b.cid
return {"ainterests": $a.interests, "binterests": $b.interests}
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-jaccard.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-jaccard.aql
index a6767bd..eec411d 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-jaccard.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-jaccard.aql
@@ -28,13 +28,13 @@
use dataverse test;
create type AddressType as closed {
- number: int32,
+ number: int32,
street: string,
city: string
}
create type CustomerType as closed {
- cid: int32,
+ cid: int32,
name: string,
age: int32?,
address: AddressType?,
@@ -43,14 +43,14 @@
}
create dataset Customers(CustomerType) primary key cid;
-
+
create dataset Customers2(CustomerType) primary key cid;
create index interests_index on Customers(interests) type keyword;
write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_olist-jaccard.adm";
-for $a in dataset('Customers')
for $b in dataset('Customers2')
+for $a in dataset('Customers')
where /*+ indexnl */ similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
return {"ainterests": $a.interests, "binterests": $b.interests}
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ulist-fuzzyeq-jaccard.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ulist-fuzzyeq-jaccard.aql
index 907afce..f2983ce 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ulist-fuzzyeq-jaccard.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ulist-fuzzyeq-jaccard.aql
@@ -28,13 +28,13 @@
use dataverse test;
create type AddressType as closed {
- number: int32,
+ number: int32,
street: string,
city: string
}
create type CustomerType as closed {
- cid: int32,
+ cid: int32,
name: string,
age: int32?,
address: AddressType?,
@@ -43,7 +43,7 @@
}
create dataset Customers(CustomerType) primary key cid;
-
+
create dataset Customers2(CustomerType) primary key cid;
create index interests_index on Customers(interests) type keyword;
@@ -53,7 +53,7 @@
set simfunction 'jaccard';
set simthreshold '0.7f';
-for $a in dataset('Customers')
for $b in dataset('Customers2')
+for $a in dataset('Customers')
where $a.interests /*+ indexnl */ ~= $b.interests and $a.cid < $b.cid
return {"ainterests": $a.interests, "binterests": $b.interests}
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ulist-jaccard.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ulist-jaccard.aql
index 2bc78e0..557990c 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ulist-jaccard.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ulist-jaccard.aql
@@ -28,13 +28,13 @@
use dataverse test;
create type AddressType as closed {
- number: int32,
+ number: int32,
street: string,
city: string
}
create type CustomerType as closed {
- cid: int32,
+ cid: int32,
name: string,
age: int32?,
address: AddressType?,
@@ -43,14 +43,14 @@
}
create dataset Customers(CustomerType) primary key cid;
-
+
create dataset Customers2(CustomerType) primary key cid;
create index interests_index on Customers(interests) type keyword;
write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_ulist-jaccard.adm";
-for $a in dataset('Customers')
for $b in dataset('Customers2')
+for $a in dataset('Customers')
where /*+ indexnl */ similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
return {"ainterests": $a.interests, "binterests": $b.interests}
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/word-fuzzyeq-jaccard.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/word-fuzzyeq-jaccard.aql
index 0640ee0..1b90252 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/word-fuzzyeq-jaccard.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/word-fuzzyeq-jaccard.aql
@@ -28,7 +28,7 @@
use dataverse test;
create type DBLPType as closed {
- id: int32,
+ id: int32,
dblpid: string,
title: string,
authors: string,
@@ -36,7 +36,7 @@
}
create type CSXType as closed {
- id: int32,
+ id: int32,
csxid: string,
title: string,
authors: string,
@@ -54,7 +54,7 @@
set simfunction 'jaccard';
set simthreshold '0.5f';
-for $a in dataset('DBLP')
for $b in dataset('CSX')
+for $a in dataset('DBLP')
where word-tokens($a.title) ~= word-tokens($b.title) and $a.id < $b.id
return {"atitle": $a.title, "btitle": $b.title}
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/word-jaccard.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/word-jaccard.aql
index 0902832..4558d49 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/word-jaccard.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/word-jaccard.aql
@@ -28,7 +28,7 @@
use dataverse test;
create type DBLPType as closed {
- id: int32,
+ id: int32,
dblpid: string,
title: string,
authors: string,
@@ -36,7 +36,7 @@
}
create type CSXType as closed {
- id: int32,
+ id: int32,
csxid: string,
title: string,
authors: string,
@@ -51,8 +51,8 @@
write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_word-jaccard.adm";
-for $a in dataset('DBLP')
for $b in dataset('CSX')
+for $a in dataset('DBLP')
where similarity-jaccard(word-tokens($a.title), word-tokens($b.title)) >= 0.5f
and $a.id < $b.id
return {"atitle": $a.title, "btitle": $b.title}