ASTERIXDB-1485: add a regression test.

Change-Id: Id7505ae3842fc659adac3debfcf9c857aff5fd26
Reviewed-on: https://asterix-gerrit.ics.uci.edu/1066
Reviewed-by: Jianfeng Jia <jianfeng.jia@gmail.com>
Sonar-Qube: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-app/data/twitter/sample.adm b/asterixdb/asterix-app/data/twitter/sample.adm
new file mode 100644
index 0000000..a0ffeb7
--- /dev/null
+++ b/asterixdb/asterix-app/data/twitter/sample.adm
@@ -0,0 +1,7 @@
+{ "create_at": datetime("2012-05-01T09:15:07.000Z"), "id": 197358499422928896, "text": "I hate writing document", "in_reply_to_status": -1, "in_reply_to_user": -1, "favorite_count": -1, "coordinate": point("-81.6430449, 38.3092672"), "retweet_count": 0, "lang": "null", "is_retweet": false, "hashtags": {{ "hate", "document" }}, "user_mentions": null, "user": { "id": 331998689, "name": "ImAGlenardenNigga", "screen_name": "WhiteBoyTurntUp", "lang": "en", "location": "Glenarden D $M$ V ", "create_at": date("2011-07-08"), "description": "#TeamNAS #TeamTatted #TeamTakin #TeamLightSkin #TeamRollUp #TeamGDHU #TeamGlenarden #Follow My Folk's @EfffYou_PayMe & Go Follow My Short Stuff @_Chinkyy ", "followers_count": 1629, "friends_count": 1542, "statues_count": 40754 }, "place": { "country": "United States", "country_code": "United States", "full_name": "Charleston, WV", "id": "44439f1538ac3ca0", "name": "Charleston", "place_type": "city", "bounding_box": rectangle("-81.727777,38.281139 -81.559673,38.405759") }, "geo_tag": { "stateID": 54, "stateName": "West Virgnia", "countyID": 54039, "countyName": "Kanawha", "cityID": 5414600, "cityName": "Charleston" } }
+{ "create_at": datetime("2012-05-02T09:15:08.000Z"), "id": 197358503617241088, "text": "✍ UK [contract] Java/ATG Developer x 4 at http://t.co/TvUNu4UR ✔ #jobs", "in_reply_to_status": -1, "in_reply_to_user": -1, "favorite_count": -1, "coordinate": point("0.13017578, 51.26358251"), "retweet_count": 0, "lang": "null", "is_retweet": false, "hashtags": {{ "jobs" }}, "user_mentions": null, "user": { "id": 80654241, "name": "david morgan", "screen_name": "adsbringcust", "lang": "en", "location": "Northampton, UK", "create_at": date("2009-10-07"), "description": "see my latest site:", "followers_count": 1765, "friends_count": 1998, "statues_count": 77360 }, "place": { "country": "United Kingdom", "country_code": "United Kingdom", "full_name": "Sevenoaks, Kent", "id": "5747f33800b71f4b", "name": "Sevenoaks", "place_type": "city", "bounding_box": rectangle("0.033526,51.13179 0.344757,51.417971) }, "geo_tag": { "stateID": 2, "stateName": "Alaska", "countyID": 2016, "countyName": "Aleutians West", "cityID": null, "cityName": null } }
+{ "create_at": datetime("2012-05-03T09:15:16.000Z"), "id": 197358537167482881, "text": "I like writing code", "in_reply_to_status": -1, "in_reply_to_user": -1, "favorite_count": -1, "coordinate": point("-77.3015925, 38.6525867"), "retweet_count": 0, "lang": "null", "is_retweet": false, "hashtags": null, "user_mentions": null, "user": { "id": 227806764, "name": "DominiqueHalliburton", "screen_name": "NdeaaLovee_", "lang": "en", "location": "", "create_at": date("2010-12-17"), "description": "Better run, better run, faster then my bullet ︻┳═一\r\nBabyy ima BOSS, idk wht they do", "followers_count": 327, "friends_count": 316, "statues_count": 5692 }, "place": { "country": "United States", "country_code": "United States", "full_name": "Maryland, US", "id": "dea1eac2d7ef8878", "name": "Maryland", "place_type": "admin", "bounding_box": rectangle("-79.487651,37.886605 -74.986282,39.723037") }, "geo_tag": { "stateID": 51, "stateName": "Virginia", "countyID": 51153, "countyName": "Prince William", "cityID": 5121088, "cityName": "Dale City" } }
+{ "create_at": datetime("2012-05-04T09:15:25.000Z"), "id": 197358574933000192, "text": "I'm at Спортмастер (Москва, Россия)", "in_reply_to_status": -1, "in_reply_to_user": -1, "favorite_count": -1, "coordinate": point("37.74370193, 55.64500063"), "retweet_count": 0, "lang": "null", "is_retweet": false, "hashtags": null, "user_mentions": null, "user": { "id": 94982186, "name": "Liana", "screen_name": "Lianochka_", "lang": "en", "location": "Russia, Moscow", "create_at": date("2009-12-06"), "description": "", "followers_count": 74, "friends_count": 54, "statues_count": 3570 }, "place": { "country": "Russia", "country_code": "Russia", "full_name": "Спортмастер, Moscow", "id": "c2b9829738f1d427", "name": "Спортмастер", "place_type": "poi", "bounding_box": rectangle("37.7437789,55.6453589 37.743779,55.645359") }, "geo_tag": { "stateID": 2, "stateName": "Alaska", "countyID": 2016, "countyName": "Aleutians West", "cityID": null, "cityName": null } }
+{ "create_at": datetime("2012-05-05T09:15:36.000Z"), "id": 197358621032583168, "text": "#thankyougame I thank everyone who has stuck by my side <3", "in_reply_to_status": -1, "in_reply_to_user": -1, "favorite_count": -1, "coordinate": point("-70.8766775, 42.8107381"), "retweet_count": 0, "lang": "null", "is_retweet": false, "hashtags": {{ "thankyougame" }}, "user_mentions": null, "user": { "id": 567441471, "name": "kelsey w harrington", "screen_name": "KelsWHarrington", "lang": "en", "location": "Massachusetts", "create_at": date("2012-04-30"), "description": "", "followers_count": 6, "friends_count": 39, "statues_count": 19 }, "place": { "country": "United States", "country_code": "United States", "full_name": "Newburyport, MA", "id": "96bf65a35e9304b6", "name": "Newburyport", "place_type": "city", "bounding_box": rectangle("-70.940799,42.787019 -70.808423,42.841039") }, "geo_tag": { "stateID": 25, "stateName": "Massachusetts", "countyID": 25009, "countyName": "Essex", "cityID": 2545245, "cityName": "Newburyport" } }
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/flwor/query-ASTERIXDB-1485/query-ASTERIXDB-1485.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/flwor/query-ASTERIXDB-1485/query-ASTERIXDB-1485.1.ddl.aql
new file mode 100644
index 0000000..5153d4a
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/flwor/query-ASTERIXDB-1485/query-ASTERIXDB-1485.1.ddl.aql
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type typeUser if not exists as open {
+    id: int64,
+    name: string,
+    screen_name : string,
+    lang : string,
+    location: string,
+    create_at: date,
+    description: string,
+    followers_count: int32,
+    friends_count: int32,
+    statues_count: int64
+}
+
+create type typePlace if not exists as open{
+    country : string,
+    country_code : string,
+    full_name : string,
+    id : string,
+    name : string,
+    place_type : string,
+    bounding_box : rectangle
+}
+
+create type typeGeoTag if not exists as open {
+    stateID: int32,
+    stateName: string,
+    countyID: int32,
+    countyName: string,
+    cityID: int32?,
+    cityName: string?
+}
+
+create type typeTweet if not exists as open{
+    create_at : datetime,
+    id: int64,
+    "text": string,
+    in_reply_to_status : int64,
+    in_reply_to_user : int64,
+    favorite_count : int64,
+    coordinate: point?,
+    retweet_count : int64,
+    lang : string,
+    is_retweet: boolean,
+    hashtags : {{ string }} ?,
+    user_mentions : {{ int64 }} ? ,
+    user : typeUser,
+    place : typePlace?,
+    geo_tag: typeGeoTag
+}
+
+create dataset ds_tweet(typeTweet) if not exists primary key id;
+
+create index text_idx if not exists on ds_tweet("text") type keyword;
+create index location_idx if not exists on ds_tweet(coordinate) type rtree;
+create index time_idx if not exists on ds_tweet(create_at) type btree;
+create index state_idx if not exists on ds_tweet(geo_tag.stateID) type btree;
+create index county_idx if not exists on ds_tweet(geo_tag.countyID) type btree;
+create index city_idx if not exists on ds_tweet(geo_tag.cityID) type btree;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/flwor/query-ASTERIXDB-1485/query-ASTERIXDB-1485.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/flwor/query-ASTERIXDB-1485/query-ASTERIXDB-1485.2.update.aql
new file mode 100644
index 0000000..9d3c8a1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/flwor/query-ASTERIXDB-1485/query-ASTERIXDB-1485.2.update.aql
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+use dataverse test;
+
+load dataset ds_tweet using localfs
+(("path"="asterix_nc1://data/twitter/sample.adm"),("format"="adm"));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/flwor/query-ASTERIXDB-1485/query-ASTERIXDB-1485.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/flwor/query-ASTERIXDB-1485/query-ASTERIXDB-1485.3.query.aql
new file mode 100644
index 0000000..777a4f0
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/flwor/query-ASTERIXDB-1485/query-ASTERIXDB-1485.3.query.aql
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test
+
+let $common := (
+   for $t in dataset ds_tweet
+   where similarity-jaccard(word-tokens($t."text"), word-tokens("document")) > 0.0
+      and contains($t."text", "hate")
+   let $set := [ 72,37,51,24,11,10,34,42,9,44,15,48,35,4,40,6,20,32,8,49,12,22,28,1,13,45,5,47,21,29,54,17,18,39,19,
+                 55,26,27,31,56,41,46,16,30,53,38,25,36,50,33,23,2 ]
+   for $sid in $set
+   where $t.geo_tag.stateID = $sid
+   where
+     $t."create_at">= datetime("2012-04-30T18:53:42.894Z")
+     and $t."create_at" <= datetime("2012-05-03T19:25:11.000Z")
+   return $t
+)
+
+let $hashtag := (
+  for $t in $common
+  where not(is-null($t.hashtags))
+  for $h in $t.hashtags
+  group by $tag := $h with $h
+  let $c := count($h)
+  order by $c desc, $tag
+  limit 50
+  return { "key": $tag, "count" : $c}
+)
+return $hashtag
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/flwor/query-ASTERIXDB-1485/query-ASTERIXDB-1485.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/flwor/query-ASTERIXDB-1485/query-ASTERIXDB-1485.1.adm
new file mode 100644
index 0000000..411a882
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/flwor/query-ASTERIXDB-1485/query-ASTERIXDB-1485.1.adm
@@ -0,0 +1 @@
+[ { "key": "document", "count": 1 }, { "key": "hate", "count": 1 } ]
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
index 9b441c3..4a2f78e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -348,6 +348,11 @@
         <output-dir compare="Text">query-ASTERIXDB-883</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="flwor">
+      <compilation-unit name="query-ASTERIXDB-1485">
+        <output-dir compare="Text">query-ASTERIXDB-1485</output-dir>
+      </compilation-unit>
+    </test-case>
   </test-group>
   <test-group name="union">
     <test-case FilePath="union">