Fixed ASTERIXDB-1249 and ASTERIXDB-1250: self index-nested-loop join correctly identifies the outer and the inner branch.
The first dataset becomes the outer branch.
The second dataset becomes the inner branch.
The optimizer for index-nested-loop join now only try to use an index from the inner branch.
Change-Id: I0d4291197c2bcfbcdcde998c5952af41960c4ad7
Reviewed-on: https://asterix-gerrit.ics.uci.edu/576
Reviewed-by: Yingyi Bu <buyingyi@gmail.com>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_01.aql
index e3d2c61..ed212cc 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_01.aql
@@ -27,7 +27,7 @@
use dataverse test;
create type DBLPType as closed {
- id: int32,
+ id: int32,
dblpid: string,
title: string,
authors: string,
@@ -35,7 +35,7 @@
}
create type CSXType as closed {
- id: int32,
+ id: int32,
csxid: string,
title: string,
authors: string,
@@ -50,7 +50,7 @@
write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance-check_01.adm";
-for $a in dataset('DBLP')
for $b in dataset('CSX')
+for $a in dataset('DBLP')
where edit-distance-check($a.authors, $b.authors, 3)[0] and $a.id < $b.id
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_03.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_03.aql
index 62964d9..95b2912 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_03.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_03.aql
@@ -27,7 +27,7 @@
use dataverse test;
create type DBLPType as closed {
- id: int32,
+ id: int32,
dblpid: string,
title: string,
authors: string,
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-contains.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-contains.aql
index 4178083..7d9c5d2 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-contains.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-contains.aql
@@ -50,7 +50,7 @@
write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance-contains.adm";
-for $a in dataset('DBLP')
for $b in dataset('CSX')
+for $a in dataset('DBLP')
where edit-distance-contains($a.authors, $b.authors, 3)[0] and $a.id < $b.id
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_01.aql
index 4a1ace5..391f165 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_01.aql
@@ -27,7 +27,7 @@
use dataverse test;
create type DBLPType as closed {
- id: int32,
+ id: int32,
dblpid: string,
title: string,
authors: string,
@@ -35,7 +35,7 @@
}
create type CSXType as closed {
- id: int32,
+ id: int32,
csxid: string,
title: string,
authors: string,
@@ -50,7 +50,7 @@
write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance_01.adm";
-for $a in dataset('DBLP')
for $b in dataset('CSX')
+for $a in dataset('DBLP')
where edit-distance($a.authors, $b.authors) < 3 and $a.id < $b.id
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_02.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_02.aql
index cfed751..50ed8d5 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_02.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_02.aql
@@ -27,7 +27,7 @@
use dataverse test;
create type DBLPType as closed {
- id: int32,
+ id: int32,
dblpid: string,
title: string,
authors: string,
@@ -35,7 +35,7 @@
}
create type CSXType as closed {
- id: int32,
+ id: int32,
csxid: string,
title: string,
authors: string,
@@ -53,7 +53,7 @@
set simfunction 'edit-distance';
set simthreshold '3';
-for $a in dataset('DBLP')
for $b in dataset('CSX')
+for $a in dataset('DBLP')
where $a.authors ~= $b.authors and $a.id < $b.id
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_01.aql
index 93e3a5f..a9aa579 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_01.aql
@@ -28,7 +28,7 @@
set import-private-functions 'true';
create type DBLPType as closed {
- id: int32,
+ id: int32,
dblpid: string,
title: string,
authors: string,
@@ -36,7 +36,7 @@
}
create type CSXType as closed {
- id: int32,
+ id: int32,
csxid: string,
title: string,
authors: string,
@@ -54,7 +54,7 @@
set simfunction 'jaccard';
set simthreshold '0.5f';
-for $a in dataset('DBLP')
for $b in dataset('CSX')
+for $a in dataset('DBLP')
where gram-tokens($a.title, 3, false) ~= gram-tokens($b.title, 3, false) and $a.id < $b.id
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_01.aql
index 629b72a..07e6c18 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_01.aql
@@ -28,7 +28,7 @@
set import-private-functions 'true';
create type DBLPType as closed {
- id: int32,
+ id: int32,
dblpid: string,
title: string,
authors: string,
@@ -36,7 +36,7 @@
}
create type CSXType as closed {
- id: int32,
+ id: int32,
csxid: string,
title: string,
authors: string,
@@ -51,8 +51,8 @@
write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard-check_01.adm";
-for $a in dataset('DBLP')
for $b in dataset('CSX')
+for $a in dataset('DBLP')
where similarity-jaccard-check(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false), 0.5f)[0]
and $a.id < $b.id
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_01.aql
index 36ba05c..a68d600 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_01.aql
@@ -28,7 +28,7 @@
set import-private-functions 'true';
create type DBLPType as closed {
- id: int32,
+ id: int32,
dblpid: string,
title: string,
authors: string,
@@ -36,7 +36,7 @@
}
create type CSXType as closed {
- id: int32,
+ id: int32,
csxid: string,
title: string,
authors: string,
@@ -51,8 +51,8 @@
write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard_01.adm";
-for $a in dataset('DBLP')
for $b in dataset('CSX')
+for $a in dataset('DBLP')
where similarity-jaccard(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false)) >= 0.5f
and $a.id < $b.id
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_01.aql
index 10a7c91..d02cee7 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_01.aql
@@ -27,13 +27,13 @@
use dataverse test;
create type AddressType as closed {
- number: int32,
+ number: int32,
street: string,
city: string
}
create type CustomerType as closed {
- cid: int32,
+ cid: int32,
name: string,
age: int32?,
address: AddressType?,
@@ -42,14 +42,14 @@
}
create dataset Customers(CustomerType) primary key cid;
-
+
create dataset Customers2(CustomerType) primary key cid;
create index interests_index on Customers(interests) type keyword;
write output to asterix_nc1:"rttest/inverted-index-join_olist-edit-distance-check_01.adm";
-for $a in dataset('Customers')
for $b in dataset('Customers2')
+for $a in dataset('Customers')
where edit-distance-check($a.interests, $b.interests, 3)[0] and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_01.aql
index 8628ed4..b46fbc1 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_01.aql
@@ -27,13 +27,13 @@
use dataverse test;
create type AddressType as closed {
- number: int32,
+ number: int32,
street: string,
city: string
}
create type CustomerType as closed {
- cid: int32,
+ cid: int32,
name: string,
age: int32?,
address: AddressType?,
@@ -42,14 +42,14 @@
}
create dataset Customers(CustomerType) primary key cid;
-
+
create dataset Customers2(CustomerType) primary key cid;
create index interests_index on Customers(interests) type keyword;
write output to asterix_nc1:"rttest/inverted-index-join_olist-edit-distance_01.adm";
-for $a in dataset('Customers')
for $b in dataset('Customers2')
+for $a in dataset('Customers')
where edit-distance($a.interests, $b.interests) <= 2 and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-edit-distance_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-edit-distance_01.aql
index 05d275b..840e9c7 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-edit-distance_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-edit-distance_01.aql
@@ -27,13 +27,13 @@
use dataverse test;
create type AddressType as closed {
- number: int32,
+ number: int32,
street: string,
city: string
}
create type CustomerType as closed {
- cid: int32,
+ cid: int32,
name: string,
age: int32?,
address: AddressType?,
@@ -42,7 +42,7 @@
}
create dataset Customers(CustomerType) primary key cid;
-
+
create dataset Customers2(CustomerType) primary key cid;
create index interests_index on Customers(interests) type keyword;
@@ -52,7 +52,7 @@
set simfunction 'edit-distance';
set simthreshold '3';
-for $a in dataset('Customers')
for $b in dataset('Customers2')
+for $a in dataset('Customers')
where $a.interests ~= $b.interests and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_01.aql
index 09cc6e4..fb39ddf 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-fuzzyeq-jaccard_01.aql
@@ -27,13 +27,13 @@
use dataverse test;
create type AddressType as closed {
- number: int32,
+ number: int32,
street: string,
city: string
}
create type CustomerType as closed {
- cid: int32,
+ cid: int32,
name: string,
age: int32?,
address: AddressType?,
@@ -42,7 +42,7 @@
}
create dataset Customers(CustomerType) primary key cid;
-
+
create dataset Customers2(CustomerType) primary key cid;
create index interests_index on Customers(interests) type keyword;
@@ -52,7 +52,7 @@
set simfunction 'jaccard';
set simthreshold '0.7f';
-for $a in dataset('Customers')
for $b in dataset('Customers2')
+for $a in dataset('Customers')
where $a.interests /*+ indexnl */ ~= $b.interests and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_01.aql
index 0459a4b..14e3dc5 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard-check_01.aql
@@ -27,13 +27,13 @@
use dataverse test;
create type AddressType as closed {
- number: int32,
+ number: int32,
street: string,
city: string
}
create type CustomerType as closed {
- cid: int32,
+ cid: int32,
name: string,
age: int32?,
address: AddressType?,
@@ -42,14 +42,14 @@
}
create dataset Customers(CustomerType) primary key cid;
-
+
create dataset Customers2(CustomerType) primary key cid;
create index interests_index on Customers(interests) type keyword;
write output to asterix_nc1:"rttest/inverted-index-join_olist-jaccard-check_01.adm";
-for $a in dataset('Customers')
for $b in dataset('Customers2')
+for $a in dataset('Customers')
where /*+ indexnl */ similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_01.aql
index c76880d..4c865f6 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-jaccard_01.aql
@@ -27,13 +27,13 @@
use dataverse test;
create type AddressType as closed {
- number: int32,
+ number: int32,
street: string,
city: string
}
create type CustomerType as closed {
- cid: int32,
+ cid: int32,
name: string,
age: int32?,
address: AddressType?,
@@ -42,14 +42,14 @@
}
create dataset Customers(CustomerType) primary key cid;
-
+
create dataset Customers2(CustomerType) primary key cid;
create index interests_index on Customers(interests) type keyword;
write output to asterix_nc1:"rttest/inverted-index-join_olist-jaccard_01.adm";
-for $a in dataset('Customers')
for $b in dataset('Customers2')
+for $a in dataset('Customers')
where /*+ indexnl */ similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_01.aql
index aacd110..edd1e94 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-fuzzyeq-jaccard_01.aql
@@ -27,13 +27,13 @@
use dataverse test;
create type AddressType as closed {
- number: int32,
+ number: int32,
street: string,
city: string
}
create type CustomerType as closed {
- cid: int32,
+ cid: int32,
name: string,
age: int32?,
address: AddressType?,
@@ -42,7 +42,7 @@
}
create dataset Customers(CustomerType) primary key cid;
-
+
create dataset Customers2(CustomerType) primary key cid;
create index interests_index on Customers(interests) type keyword;
@@ -52,7 +52,7 @@
set simfunction 'jaccard';
set simthreshold '0.7f';
-for $a in dataset('Customers')
for $b in dataset('Customers2')
+for $a in dataset('Customers')
where $a.interests /*+ indexnl */ ~= $b.interests and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_01.aql
index 9560995..53ff785 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard-check_01.aql
@@ -27,13 +27,13 @@
use dataverse test;
create type AddressType as closed {
- number: int32,
+ number: int32,
street: string,
city: string
}
create type CustomerType as closed {
- cid: int32,
+ cid: int32,
name: string,
age: int32?,
address: AddressType?,
@@ -42,14 +42,14 @@
}
create dataset Customers(CustomerType) primary key cid;
-
+
create dataset Customers2(CustomerType) primary key cid;
create index interests_index on Customers(interests) type keyword;
write output to asterix_nc1:"rttest/inverted-index-join_ulist-jaccard-check_01.adm";
-for $a in dataset('Customers')
for $b in dataset('Customers2')
+for $a in dataset('Customers')
where /*+ indexnl */ similarity-jaccard-check($a.interests, $b.interests, 0.7f)[0] and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_01.aql
index 887ac20..e23334f 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_01.aql
@@ -27,13 +27,13 @@
use dataverse test;
create type AddressType as closed {
- number: int32,
+ number: int32,
street: string,
city: string
}
create type CustomerType as closed {
- cid: int32,
+ cid: int32,
name: string,
age: int32?,
address: AddressType?,
@@ -42,14 +42,14 @@
}
create dataset Customers(CustomerType) primary key cid;
-
+
create dataset Customers2(CustomerType) primary key cid;
create index interests_index on Customers(interests) type keyword;
write output to asterix_nc1:"rttest/inverted-index-join_ulist-jaccard_01.adm";
-for $a in dataset('Customers')
for $b in dataset('Customers2')
+for $a in dataset('Customers')
where /*+ indexnl */ similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_01.aql
index d9a5afa..29bd3d0 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_01.aql
@@ -27,7 +27,7 @@
use dataverse test;
create type DBLPType as closed {
- id: int32,
+ id: int32,
dblpid: string,
title: string,
authors: string,
@@ -35,7 +35,7 @@
}
create type CSXType as closed {
- id: int32,
+ id: int32,
csxid: string,
title: string,
authors: string,
@@ -53,7 +53,7 @@
set simfunction 'jaccard';
set simthreshold '0.5f';
-for $a in dataset('DBLP')
for $b in dataset('CSX')
+for $a in dataset('DBLP')
where word-tokens($a.title) ~= word-tokens($b.title) and $a.id < $b.id
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_01.aql
index 78be4fe..989f250 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_01.aql
@@ -27,7 +27,7 @@
use dataverse test;
create type DBLPType as closed {
- id: int32,
+ id: int32,
dblpid: string,
title: string,
authors: string,
@@ -35,7 +35,7 @@
}
create type CSXType as closed {
- id: int32,
+ id: int32,
csxid: string,
title: string,
authors: string,
@@ -50,8 +50,8 @@
write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard-check_01.adm";
-for $a in dataset('DBLP')
for $b in dataset('CSX')
+for $a in dataset('DBLP')
where similarity-jaccard-check(word-tokens($a.title), word-tokens($b.title), 0.5f)[0]
and $a.id < $b.id
return {"arec": $a, "brec": $b }
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_01.aql
index 21d5c3e..e2c373a 100644
--- a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_01.aql
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_01.aql
@@ -27,7 +27,7 @@
use dataverse test;
create type DBLPType as closed {
- id: int32,
+ id: int32,
dblpid: string,
title: string,
authors: string,
@@ -35,7 +35,7 @@
}
create type CSXType as closed {
- id: int32,
+ id: int32,
csxid: string,
title: string,
authors: string,
@@ -50,8 +50,8 @@
write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard_01.adm";
-for $a in dataset('DBLP')
for $b in dataset('CSX')
+for $a in dataset('DBLP')
where similarity-jaccard(word-tokens($a.title), word-tokens($b.title)) >= 0.5f
and $a.id < $b.id
return {"arec": $a, "brec": $b }