[NO ISSUE] User-defined Function Documentation update
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
1. Updated the UDF documentation to be consisten with current master.
2. Cleaned default UDF package to remove useless UDFs.
3. Added the example in documentation as a test case for IT.
4. Reorganized the documentation to keep up with the new structure.
5. Minor changes to other documentation pages to keep style consistent.
Change-Id: I17b1b4d639ca38689298ce88145257e794eb90e1
Reviewed-on: https://asterix-gerrit.ics.uci.edu/2804
Sonar-Qube: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Contrib: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Taewoo Kim <wangsaeu@gmail.com>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/mysum/mysum.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/mysum/mysum.3.query.sqlpp
index 9402e1f..a6a1cdc 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/mysum/mysum.3.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/mysum/mysum.3.query.sqlpp
@@ -18,5 +18,4 @@
*/
use externallibtest;
-let x=testlib#mysum(3,4)
-select VALUE x;
+testlib#mysum(3,4);
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.1.ddl.sqlpp
index 4fdc669..3bc33de 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.1.ddl.sqlpp
@@ -22,34 +22,20 @@
* Date : 4th Oct 2017
*/
-drop dataverse externallibtest if exists;
-create dataverse externallibtest;
-use externallibtest;
+drop dataverse udfs if exists;
+create dataverse udfs;
+use udfs;
-create type TweetInputType as open {
- id: string,
- username : string,
- location : string,
- text : string,
- timestamp : string
-};
-
-create type TweetOutputType as open {
- id: string,
- username : string,
- location : string,
- text : string,
- timestamp : string,
- topics : {{string}}
+create type TweetType if not exists as open {
+ id: int64
};
create feed TweetFeed with
{
"adapter-name" : "localfs",
- "type-name" : "TweetInputType",
- "path" : "asterix_nc1://data/twitter/obamatweets.adm",
+ "type-name" : "TweetType",
+ "path" : "asterix_nc1://data/twitter/extrasmalltweets.txt",
"format" : "adm"
};
-create dataset TweetsFeedIngest(TweetOutputType)
-primary key id;
+create dataset ProcessedTweets(TweetType) primary key id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.2.lib.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.2.lib.sqlpp
index d1e0e87..4f0c6d3 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.2.lib.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.2.lib.sqlpp
@@ -16,4 +16,4 @@
* specific language governing permissions and limitations
* under the License.
*/
-install externallibtest testlib target/data/externallib/asterix-external-data-testlib.zip
\ No newline at end of file
+install udfs testlib target/data/externallib/asterix-external-data-testlib.zip
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.3.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.3.update.sqlpp
index 0d46387..1407514b 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.3.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.3.update.sqlpp
@@ -21,10 +21,8 @@
* Expected Res : Success
* Date : 4th Oct 2017
*/
-use externallibtest;
+use udfs;
-SET `compiler.parallelism` "5";
-
-connect feed TweetFeed to dataset TweetsFeedIngest apply function `testlib#parseTweet`;
+connect feed TweetFeed to dataset ProcessedTweets apply function testlib#addMentionedUsers;
start feed TweetFeed;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.5.pollquery.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.5.pollquery.sqlpp
index 607e5bd..b95294a 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.5.pollquery.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.5.pollquery.sqlpp
@@ -22,8 +22,8 @@
* Date : 4th Oct 2017
*/
// polltimeoutsecs=5
-use externallibtest;
+use udfs;
-select value t from TweetsFeedIngest t
+select value t from ProcessedTweets t
ORDER BY t.id;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.6.lib.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.6.lib.sqlpp
index 86af80f..98c334d 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.6.lib.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.6.lib.sqlpp
@@ -16,4 +16,4 @@
* specific language governing permissions and limitations
* under the License.
*/
-uninstall externallibtest testlib
\ No newline at end of file
+uninstall udfs testlib
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.7.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.7.ddl.sqlpp
index 2a7acef..128c793 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.7.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.7.ddl.sqlpp
@@ -16,4 +16,4 @@
* specific language governing permissions and limitations
* under the License.
*/
-drop dataverse externallibtest if exists;
+drop dataverse udfs if exists;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/validate-default-library/validate-default-library.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/validate-default-library/validate-default-library.1.adm
index f0ad2b2..9b2714a 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/validate-default-library/validate-default-library.1.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/validate-default-library/validate-default-library.1.adm
@@ -1,5 +1,5 @@
-{ "Function": { "DataverseName": "externallibtest", "Name": "testlib#addHashTags", "Arity": "1", "Params": [ "Tweet" ], "ReturnType": "ProcessedTweet", "Definition": "org.apache.asterix.external.library.AddHashTagsFactory", "Language": "JAVA", "Kind": "SCALAR", "Dependencies": [ [ ], [ ] ] } }
{ "Function": { "DataverseName": "externallibtest", "Name": "testlib#addHashTagsInPlace", "Arity": "1", "Params": [ "Tweet" ], "ReturnType": "ProcessedTweet", "Definition": "org.apache.asterix.external.library.AddHashTagsInPlaceFactory", "Language": "JAVA", "Kind": "SCALAR", "Dependencies": [ [ ], [ ] ] } }
+{ "Function": { "DataverseName": "externallibtest", "Name": "testlib#addMentionedUsers", "Arity": "1", "Params": [ "TweetType" ], "ReturnType": "TweetType", "Definition": "org.apache.asterix.external.library.AddMentionedUsersFactory", "Language": "JAVA", "Kind": "SCALAR", "Dependencies": [ [ ], [ ] ] } }
{ "Function": { "DataverseName": "externallibtest", "Name": "testlib#allTypes", "Arity": "1", "Params": [ "AllType" ], "ReturnType": "AllType", "Definition": "org.apache.asterix.external.library.AllTypesFactory", "Language": "JAVA", "Kind": "SCALAR", "Dependencies": [ [ ], [ ] ] } }
{ "Function": { "DataverseName": "externallibtest", "Name": "testlib#echoDelay", "Arity": "1", "Params": [ "TweetMessageType" ], "ReturnType": "TweetMessageType", "Definition": "org.apache.asterix.external.library.EchoDelayFactory", "Language": "JAVA", "Kind": "SCALAR", "Dependencies": [ [ ], [ ] ] } }
{ "Function": { "DataverseName": "externallibtest", "Name": "testlib#fnameDetector", "Arity": "1", "Params": [ "InputRecordType" ], "ReturnType": "DetectResultType", "Definition": "org.apache.asterix.external.library.KeywordsDetectorFactory", "Language": "JAVA", "Kind": "SCALAR", "Dependencies": [ [ ], [ ] ] } }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/feed-with-external-function/feed-with-external-function.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/feed-with-external-function/feed-with-external-function.1.adm
index 1291213..0f7eb82 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/feed-with-external-function/feed-with-external-function.1.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/feed-with-external-function/feed-with-external-function.1.adm
@@ -1,12 +1,21 @@
-{ "id": "nc1:1", "username": "BronsonMike", "location": "", "text": "@GottaLaff @reutersus Christie and obama just foul weather friends", "timestamp": "Thu Dec 06 16:53:06 PST 2012", "topics": {{ }} }
-{ "id": "nc1:100", "username": "KidrauhlProuds", "location": "", "text": "RT @01Direclieber: A filha do Michael Jackson uma Belieber,a filha do Eminem e uma Belieber,as filhas de Obama sao Beliebers, e a filha do meu pai e Belieber", "timestamp": "Thu Dec 06 16:53:16 PST 2012", "topics": {{ }} }
-{ "id": "nc1:102", "username": "jaysauce82", "location": "", "text": "Not voting for President Obama #BadDecision", "timestamp": "Thu Dec 06 16:53:16 PST 2012", "topics": {{ "#BadDecision" }} }
-{ "id": "nc1:104", "username": "princeofsupras", "location": "", "text": "RT @01Direclieber: A filha do Michael Jackson e uma Belieber,a filha do Eminem e uma Belieber,as filhas de Obama sao Beliebers, e a filha do meu pai e Belieber", "timestamp": "Thu Dec 06 16:53:15 PST 2012", "topics": {{ }} }
-{ "id": "nc1:106", "username": "GulfDogs", "location": "", "text": "Obama Admin Knew Libyan Terrorists Had US-Provided Weaponsteaparty #tcot #ccot #NewGuards #BreitbartArmy #patriotwttp://t.co/vJxzrQUE", "timestamp": "Thu Dec 06 16:53:14 PST 2012", "topics": {{ "#tcot", "#ccot", "#NewGuards", "#BreitbartArmy", "#patriotwttp://t.co/vJxzrQUE" }} }
-{ "id": "nc1:108", "username": "Laugzpz", "location": "", "text": "@AlfredoJalife Maestro Obama se hace de la vista gorda, es un acuerdo de siempre creo yo.", "timestamp": "Thu Dec 06 16:53:14 PST 2012", "topics": {{ }} }
-{ "id": "nc1:11", "username": "magarika", "location": "", "text": "RT @ken24xavier: Obama tells SOROS - our plan is ALMOST finished http://t.co/WvzK0GtU", "timestamp": "Thu Dec 06 16:53:05 PST 2012", "topics": {{ }} }
-{ "id": "nc1:111", "username": "ToucanMall", "location": "", "text": "RT @WorldWar3Watch: Michelle Obama Gets More Grammy Nominations Than Justin ... #Obama #WW3 http://t.co/0Wv2GKij", "timestamp": "Thu Dec 06 16:53:13 PST 2012", "topics": {{ "#Obama", "#WW3" }} }
-{ "id": "nc1:113", "username": "ToucanMall", "location": "", "text": "RT @ObamaPalooza: Tiffany Shared What $2,000 Meant to Her ... and the President Stopped by to Talk About It http://t.co/sgT7lsNV #Obama", "timestamp": "Thu Dec 06 16:53:12 PST 2012", "topics": {{ "#Obama" }} }
-{ "id": "nc1:115", "username": "thewildpitch", "location": "", "text": "RT @RevkahJC: Dennis Miller: Obama Should Just Say He Wants To Tax Successful People http://t.co/Ihlemy9Y", "timestamp": "Thu Dec 06 16:53:11 PST 2012", "topics": {{ }} }
-{ "id": "nc1:117", "username": "Rnugent24", "location": "", "text": "RT @ConservativeQuo: unemployment is above 8% again. I wonder how long it will take for Obama to start blaming Bush? 3-2-1 #tcot #antiobama", "timestamp": "Thu Dec 06 16:53:10 PST 2012", "topics": {{ "#tcot", "#antiobama" }} }
-{ "id": "nc1:119", "username": "ToucanMall", "location": "", "text": "RT @Newitrsdotcom: I hope #Obama will win re-election... Other four years without meaningless #wars", "timestamp": "Thu Dec 06 16:53:09 PST 2012", "topics": {{ "#Obama", "#wars" }} }
+{ "id": 21, "tweetid": 69902639026020352, "loc": point("34.5,-100.5"), "time": datetime("2011-05-15T16:11:02.000Z"), "text": "thats that smokers cough maam <<<<<--- @x_incredibleL :: Allergies. i got that "cough" lol", "mentionedUsers": [ "@x_incredibleL" ] }
+{ "id": 22, "tweetid": 69988755800465408, "loc": point("34.5,-97.5"), "time": datetime("2011-05-15T21:53:14.000Z"), "text": "Allergies fuckin over me..#damn", "mentionedUsers": [ ] }
+{ "id": 23, "tweetid": 69940039605432320, "loc": point("34.5,-97.5"), "time": datetime("2011-05-15T18:39:39.000Z"), "text": "Natural Asthma Remedy - Deal With Your Asthma in a Natural Way.. Allergies", "mentionedUsers": [ ] }
+{ "id": 24, "tweetid": 69834276929159169, "loc": point("25.5,-100.5"), "time": datetime("2011-05-15T11:39:23.000Z"), "text": "Damn Allergies... sneezing like crazy! >_<", "mentionedUsers": [ ] }
+{ "id": 25, "tweetid": 69950146787553281, "loc": point("25.5,-97.5"), "time": datetime("2011-05-15T19:19:49.000Z"), "text": "pass me an asthma pump", "mentionedUsers": [ ] }
+{ "id": 26, "tweetid": 69754524767756289, "loc": point("25.5,-97.5"), "time": datetime("2011-05-15T06:22:29.000Z"), "text": "Never knew allergies could actually keep me from sleeping", "mentionedUsers": [ ] }
+{ "id": 27, "tweetid": 69999864498487297, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T22:37:22.000Z"), "text": "@ItsCrystal320 gooodd mommy! Except my allergies have been acting up :( and Im having issues with you know who. Smh nothing new. Lol", "mentionedUsers": [ "@ItsCrystal320" ] }
+{ "id": 28, "tweetid": 69996796616777728, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T22:25:11.000Z"), "text": "My allergies act up so much while Im in this house!!! Idk why! Sneezing, now my eye is swollen!! Smh.", "mentionedUsers": [ ] }
+{ "id": 29, "tweetid": 69977295351316480, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T21:07:41.000Z"), "text": "@GOLDenNote6 lmmmaaaoooo!!!! nnnnooo! ur the one that needs the asthma pump!", "mentionedUsers": [ "@GOLDenNote6" ] }
+{ "id": 30, "tweetid": 69972022586912768, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T20:46:44.000Z"), "text": "@TinaLee90 hell yeah ! He snapped cause she got allergies and heavy she be snorting and coughing while he trying to study", "mentionedUsers": [ "@TinaLee90" ] }
+{ "id": 31, "tweetid": 69965044678524928, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T20:19:01.000Z"), "text": "Back home and my ears begin to itch!!! Omg allergies go away please! #thingsicanlivewithout", "mentionedUsers": [ ] }
+{ "id": 32, "tweetid": 69961997680246784, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T20:06:54.000Z"), "text": "@BravoAndy allergies acting up again or you just digging the glasses? Haha u rock it though!", "mentionedUsers": [ "@BravoAndy" ] }
+{ "id": 33, "tweetid": 69946356248215552, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T19:04:45.000Z"), "text": "My allergies act up at the worst times -_-", "mentionedUsers": [ ] }
+{ "id": 34, "tweetid": 69929466691993600, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T17:57:38.000Z"), "text": "Hate being sick!!! -_____- I hate you allergies! :/", "mentionedUsers": [ ] }
+{ "id": 35, "tweetid": 69928014615556096, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T17:51:52.000Z"), "text": "Allergies please go away :(", "mentionedUsers": [ ] }
+{ "id": 36, "tweetid": 69916338092654592, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T17:05:28.000Z"), "text": "I feel tired....i got asthma :( but it was still an awesome birthday", "mentionedUsers": [ ] }
+{ "id": 37, "tweetid": 69911241975529474, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T16:45:13.000Z"), "text": "Cant stand that asthma commercial with the gold fish -__-", "mentionedUsers": [ ] }
+{ "id": 38, "tweetid": 69910467233062912, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T16:42:08.000Z"), "text": "@PapisFavWave whats wrong? Got a cold? Asthma ?", "mentionedUsers": [ "@PapisFavWave" ] }
+{ "id": 39, "tweetid": 69908652202536961, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T16:34:56.000Z"), "text": "My allergies are killing me!", "mentionedUsers": [ ] }
+{ "id": 40, "tweetid": 69897794273546240, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T15:51:47.000Z"), "text": "and allergies", "mentionedUsers": [ ] }
+{ "id": 41, "tweetid": 69893733449080832, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T15:35:39.000Z"), "text": "Repeated splashing of water about the skin, specifically following an exposure to pollution and dirt, makes sure... http://bit.ly/mnWnJo", "mentionedUsers": [ ] }
diff --git a/asterixdb/asterix-doc/pom.xml b/asterixdb/asterix-doc/pom.xml
index 8ddc1d8..0cc43e4 100644
--- a/asterixdb/asterix-doc/pom.xml
+++ b/asterixdb/asterix-doc/pom.xml
@@ -69,6 +69,12 @@
<concat destfile="${project.build.directory}/generated-site/markdown/aws.md">
<filelist dir="${project.basedir}/src/main/installation/" files="aws_title.md,aws.md" />
</concat>
+ <concat destfile="${project.build.directory}/generated-site/markdown/feeds.md">
+ <filelist dir="${project.basedir}/src/main/data_ingestion/" files="feeds_title.md,feeds.md" />
+ </concat>
+ <concat destfile="${project.build.directory}/generated-site/markdown/udf.md">
+ <filelist dir="${project.basedir}/src/main/user-defined_function/" files="udf_title.md,udf.md" />
+ </concat>
</target>
</configuration>
<goals>
diff --git a/asterixdb/asterix-doc/src/site/markdown/feeds/tutorial.md b/asterixdb/asterix-doc/src/main/data_ingestion/feeds.md
similarity index 96%
rename from asterixdb/asterix-doc/src/site/markdown/feeds/tutorial.md
rename to asterixdb/asterix-doc/src/main/data_ingestion/feeds.md
index f5635b8..0dd6789 100644
--- a/asterixdb/asterix-doc/src/site/markdown/feeds/tutorial.md
+++ b/asterixdb/asterix-doc/src/main/data_ingestion/feeds.md
@@ -17,14 +17,6 @@
! under the License.
!-->
-# Support for Data Ingestion in AsterixDB #
-
-## <a id="#toc">Table of Contents</a> ##
-
-* [Introduction](#Introduction)
-* [Feed Adapters](#FeedAdapters)
-* [Feed Policies](#FeedPolicies)
-
## <a name="Introduction">Introduction</a> ##
In this document, we describe the support for data ingestion in
@@ -101,7 +93,12 @@
The "push_twitter" adapter takes as configuration the above mentioned
parameters. End users are required to obtain the above authentication credentials prior to
using the "push_twitter" adapter. For further information on obtaining OAuth keys and tokens and
-registering an application with Twitter, please visit http://apps.twitter.com
+registering an application with Twitter, please visit http://apps.twitter.com.
+
+Note that AsterixDB uses the Twitter4J API for getting data from Twitter. Due to a license conflict,
+Apache AsterixDB cannot ship the Twitter4J library. To use the Twitter adapter in AsterixDB,
+please download the necessary dependencies (`twitter4j-core-4.0.x.jar` and `twitter4j-stream-4.0.x.jar`) and drop
+them into the `repo/` directory before AsterixDB starts.
Given below is an example SQL++ statement that creates a feed called "TwitterFeed" by using the
"push_twitter" adapter.
diff --git a/asterixdb/asterix-doc/src/main/data_ingestion/feeds_title.md b/asterixdb/asterix-doc/src/main/data_ingestion/feeds_title.md
new file mode 100644
index 0000000..1b7293d
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/data_ingestion/feeds_title.md
@@ -0,0 +1,25 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements. See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership. The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License. You may obtain a copy of the License at
+ !
+ ! http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied. See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+# Data Ingestion with Feeds #
+
+## <a id="#toc">Table of Contents</a> ##
+* [Introduction](#Introduction)
+* [Feed Adapters](#FeedAdapters)
+* [Feed Policies](#FeedPolicies)
\ No newline at end of file
diff --git a/asterixdb/asterix-doc/src/main/installation/ansible_title.md b/asterixdb/asterix-doc/src/main/installation/ansible_title.md
index 307580a..d72801f 100644
--- a/asterixdb/asterix-doc/src/main/installation/ansible_title.md
+++ b/asterixdb/asterix-doc/src/main/installation/ansible_title.md
@@ -16,7 +16,9 @@
! specific language governing permissions and limitations
! under the License.
!-->
+# Installation using Ansible #
+## <a id="#toc">Table of Contents</a> ##
* [Introduction](#Introduction)
* [Prerequisites](#Prerequisites)
* [Cluster Configuration](#config)
diff --git a/asterixdb/asterix-doc/src/main/installation/aws_title.md b/asterixdb/asterix-doc/src/main/installation/aws_title.md
index abf01c9..9af36a9 100644
--- a/asterixdb/asterix-doc/src/main/installation/aws_title.md
+++ b/asterixdb/asterix-doc/src/main/installation/aws_title.md
@@ -16,7 +16,9 @@
! specific language governing permissions and limitations
! under the License.
!-->
+# Installation using Amazon Web Services #
+## <a id="#toc">Table of Contents</a> ##
* [Introduction](#Introduction)
* [Prerequisites](#Prerequisites)
* [Cluster Configuration](#config)
diff --git a/asterixdb/asterix-doc/src/main/user-defined_function/udf.md b/asterixdb/asterix-doc/src/main/user-defined_function/udf.md
new file mode 100644
index 0000000..2431448
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/user-defined_function/udf.md
@@ -0,0 +1,147 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements. See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership. The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License. You may obtain a copy of the License at
+ !
+ ! http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied. See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+## <a name="introduction">Introduction</a>##
+
+Apache AsterixDB supports two languages for writing user-defined functions (UDFs): SQL++ and Java.
+A user can encapsulate data processing logic into a UDF and invoke it
+later repeatedly. For SQL++ functions, a user can refer to [SQL++ Functions](sqlpp/manual.html#Functions)
+for their usages. In this document, we
+focus on how to install/invoke/uninstall a Java function library using the Ansible script that we provide.
+
+
+## <a name="installingUDF">Installing an UDF Library</a>##
+
+UDFs have to be installed offline.
+This section describes the process assuming that you have followed the preceding [ansible installation instructions](ansible.html)
+to deploy an AsterixDB instance on your local machine or cluster. Here are the
+instructions to install an UDF library:
+
+- Step 1: Stop the AsterixDB instance if it is ACTIVE.
+
+ $ bin/stop.sh
+
+- Step 2: Deploy the UDF package.
+
+ $ bin/udf.sh -m i -d DATAVERSE_NAME -l LIBRARY_NAME -p UDF_PACKAGE_PATH
+
+- Step 3: Start AsterixDB
+
+ $ bin/start.sh
+
+After AsterixDB starts, you can use the following query to check whether your UDFs have been sucessfully registered with the system.
+
+ SELECT * FROM Metadata.`Function`;
+
+In the AsterixDB source release, we provide several sample UDFs that you can try out.
+You need to build the AsterixDB source to get the compiled UDF package. It can be found under
+the `asterixdb-external` sub-project. Assuming that these UDFs have been installed into the `udfs` dataverse and `testlib` library,
+here is an example that uses the sample UDF `mysum` to compute the sum of two input integers.
+
+ use udfs;
+
+ testlib#mysum(3,4);
+
+## <a id="UDFOnFeeds">Attaching a UDF on Data Feeds</a> ##
+
+In [Data Ingestion using feeds](feeds.html), we introduced an efficient way for users to get data into AsterixDB. In
+some use cases, users may want to pre-process the incoming data before storing it into the dataset. To meet this need,
+AsterixDB allows
+the user to attach a UDF onto the ingestion pipeline. Following the example in [Data Ingestion](feeds.html), here we
+show an example of how to attach a UDF that extracts the user names mentioned from the incoming Tweet text, storing the
+processed Tweets into a dataset.
+
+We start by creating the datatype and dataset that will be used for the feed and UDF. One thing to keep in mind is that
+data flows from the feed to the UDF and then to the dataset. This means that the feed's datatype
+should be the same as the input type of the UDF, and the output datatype of the UDF should be the same as the dataset's
+datatype. Thus, users should make sure that their datatypes are consistent in the UDF configuration. Users can also
+take advantage of open datatypes in AsterixDB by creating a minimum description of the data for simplicity.
+Here we use open datatypes:
+
+ use udfs;
+
+ create type TweetType if not exists as open {
+ id: int64
+ };
+
+ create dataset ProcessedTweets(TweetType) primary key id;
+
+As the `TweetType` is an open datatype, processed Tweets can be stored into the dataset after they are annotated
+with an extra attribute. Given the datatype and dataset above, we can create a Twitter Feed with the same datatype.
+Please refer to section [Data Ingestion](feeds.html) if you have any trouble in creating feeds.
+
+ use udfs;
+
+ create feed TwitterFeed with {
+ "adapter-name": "push_twitter",
+ "type-name": "TweetType",
+ "format": "twitter-status",
+ "consumer.key": "************",
+ "consumer.secret": "************",
+ "access.token": "**********",
+ "access.token.secret": "*************"
+ };
+
+After creating the feed, we attach the UDF onto the feed pipeline and start the feed with following statements:
+
+ use udfs;
+
+ connect feed TwitterFeed to dataset ProcessedTweets apply function udfs#addMentionedUsers;
+
+ start feed TwitterFeed;
+
+You can check the annotated Tweets by querying the `ProcessedTweets` dataset:
+
+ SELECT * FROM ProcessedTweets LIMIT 10;
+
+## <a name="udfConfiguration">A quick look of the UDF configuration</a>##
+
+AsterixDB uses an XML configuration file to describe the UDFs. A user can use it to define and reuse their compiled UDFs
+for different purposes. Here is a snippet of the configuration used in our [previous example](#UDFOnFeeds):
+
+ <libraryFunction>
+ <name>addMentionedUsers</name>
+ <function_type>SCALAR</function_type>
+ <argument_type>TweetType</argument_type>
+ <return_type>TweetType</return_type>
+ <definition>org.apache.asterix.external.library.AddMentionedUsersFactory</definition>
+ <parameters>text</parameters>
+ </libraryFunction>
+
+Here are the explanations of the fields in the configuration file:
+
+ name: The proper name that is used for invoke the function.
+ function_type: The type of the function.
+ argument_type: The datatype of the arguments passed in. If there is more than one parameter, separate them with comma(s), e.g., `AINT32,AINT32`.
+ return_type: The datatype of the returning value.
+ definition: A reference to the function factory.
+ parameters: The parameters passed into the function.
+
+In our feeds example, we passed in `"text"` as a parameter to the function so it knows which field to look at to get the Tweet text.
+If the Twitter API were to change its field names in the future, we can accommodate that change by simply modifying the configuration file
+instead of recompiling the whole UDF package. This feature can be further utilized in use cases where a user has a Machine Learning
+algorithm with different trained model files. If you are interested, You can find more examples [here](https://github.com/apache/asterixdb/tree/master/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library)
+
+## <a name="uninstall">Unstalling an UDF Library</a>##
+
+If you want to uninstall the UDF library, put AsterixDB into `INACTVIVE` mode and run following command:
+
+ $ bin/udf.sh -m u -d DATAVERSE_NAME -l LIBRARY_NAME
+
+
diff --git a/asterixdb/asterix-doc/src/main/user-defined_function/udf_title.md b/asterixdb/asterix-doc/src/main/user-defined_function/udf_title.md
new file mode 100644
index 0000000..659c13b
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/user-defined_function/udf_title.md
@@ -0,0 +1,27 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements. See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership. The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License. You may obtain a copy of the License at
+ !
+ ! http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied. See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+# User-defined Functions #
+
+## <a id="#toc">Table of Contents</a> ##
+* [Introduction](#introduction)
+* [Installing an UDF Library](#installingUDF)
+* [Attaching an UDF on Data Feeds](#UDFOnFeeds)
+* [A quick look of the UDF configuration](#udfConfiguration)
+* [Unstalling an UDF Library](#uninstall)
\ No newline at end of file
diff --git a/asterixdb/asterix-doc/src/site/markdown/ncservice.md b/asterixdb/asterix-doc/src/site/markdown/ncservice.md
index 2b309ce..ef2ac9b1 100644
--- a/asterixdb/asterix-doc/src/site/markdown/ncservice.md
+++ b/asterixdb/asterix-doc/src/site/markdown/ncservice.md
@@ -17,6 +17,8 @@
! under the License.
!-->
+# Installation using NCService #
+
## <a id="toc">Table of Contents</a> ##
* [Quick Start](#quickstart)
diff --git a/asterixdb/asterix-doc/src/site/markdown/udf.md b/asterixdb/asterix-doc/src/site/markdown/udf.md
deleted file mode 100644
index b2ef2bc..0000000
--- a/asterixdb/asterix-doc/src/site/markdown/udf.md
+++ /dev/null
@@ -1,189 +0,0 @@
-<!--
- ! Licensed to the Apache Software Foundation (ASF) under one
- ! or more contributor license agreements. See the NOTICE file
- ! distributed with this work for additional information
- ! regarding copyright ownership. The ASF licenses this file
- ! to you under the Apache License, Version 2.0 (the
- ! "License"); you may not use this file except in compliance
- ! with the License. You may obtain a copy of the License at
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing,
- ! software distributed under the License is distributed on an
- ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- ! KIND, either express or implied. See the License for the
- ! specific language governing permissions and limitations
- ! under the License.
- !-->
-
-# Support for User Defined Functions in AsterixDB #
-
-## <a id="#toc">Table of Contents</a> ##
-* [Using UDF to preprocess feed-collected data](#PreprocessingCollectedData)
-* [Writing an External UDF](#WritingAnExternalUDF)
-* [Creating an AsterixDB Library](#CreatingAnAsterixDBLibrary)
-* [Installing an AsterixDB Library](#installingUDF)
-
-In this document, we describe the support for implementing, using, and installing user-defined functions (UDF) in
-AsterixDB. We will explain how we can use UDFs to preprocess, e.g., data collected using feeds (see the [feeds tutorial](feeds/tutorial.html)).
-
-
-### <a name="installingUDF">Installing an AsterixDB Library</a>###
-
-We assume you have followed the [installation instructions](../install.html) to set up a running AsterixDB instance. Let us refer your AsterixDB instance by the name "my_asterix".
-
-- Step 1: Stop the AsterixDB instance if it is in the ACTIVE state.
-
- $ managix stop -n my_asterix
-
-- Step 2: Install the library using Managix install command. Just to illustrate, we use the help command to look up the syntax
-
- $ managix help -cmd install
- Installs a library to an asterix instance.
- Options
- n Name of Asterix Instance
- d Name of the dataverse under which the library will be installed
- l Name of the library
- p Path to library zip bundle
-
-Above is a sample output and explains the usage and the required parameters. Each library has a name and is installed under a dataverse. Recall that we had created a dataverse by the name - "feeds" prior to creating our datatypes and dataset. We shall name our library - "testlib".
-
-We assume you have a library zip bundle that needs to be installed.
-To install the library, use the Managix install command. An example is shown below.
-
- $ managix install -n my_asterix -d feeds -l testlib -p extlibs/asterix-external-data-0.8.7-binary-assembly.zip
-
-You should see the following message:
-
- INFO: Installed library testlib
-
-We shall next start our AsterixDB instance using the start command as shown below.
-
- $ managix start -n my_asterix
-
-You may now use the AsterixDB library in AQL statements and queries. To look at the installed artifacts, you may execute the following query at the AsterixDB web-console.
-
- for $x in dataset Metadata.Function
- return $x
-
- for $x in dataset Metadata.Library
- return $x
-
-Our library is now installed and is ready to be used.
-
-
-## <a id="PreprocessingCollectedData">Preprocessing Collected Data</a> ###
-
-In the following we assume that you already created the `TwitterFeed` and its corresponding data types and dataset following the instruction explained in the [feeds tutorial](feeds/tutorial.html).
-
-A feed definition may optionally include the specification of a
-user-defined function that is to be applied to each feed object prior
-to persistence. Examples of pre-processing might include adding
-attributes, filtering out objects, sampling, sentiment analysis, feature
-extraction, etc. We can express a UDF, which can be defined in AQL or in a programming
-language such as Java, to perform such pre-processing. An AQL UDF is a good fit when
-pre-processing a object requires the result of a query (join or aggregate)
-over data contained in AsterixDB datasets. More sophisticated
-processing such as sentiment analysis of text is better handled
-by providing a Java UDF. A Java UDF has an initialization phase
-that allows the UDF to access any resources it may need to initialize
-itself prior to being used in a data flow. It is assumed by the
-AsterixDB compiler to be stateless and thus usable as an embarrassingly
-parallel black box. In contrast, the AsterixDB compiler can
-reason about an AQL UDF and involve the use of indexes during
-its invocation.
-
-We consider an example transformation of a raw tweet into its
-lightweight version called `ProcessedTweet`, which is defined next.
-
- use dataverse feeds;
-
- create type ProcessedTweet if not exists as open {
- id: string,
- user_name:string,
- location:point,
- created_at:string,
- message_text:string,
- country: string,
- topics: {{string}}
- };
-
- create dataset ProcessedTweets(ProcessedTweet)
- primary key id;
-
-The processing required in transforming a collected tweet to its lighter version of type `ProcessedTweet` involves extracting the topics or hash-tags (if any) in a tweet
-and collecting them in the referred "topics" attribute for the tweet.
-Additionally, the latitude and longitude values (doubles) are combined into the spatial point type. Note that spatial data types are considered as first-class citizens that come with the support for creating indexes. Next we show a revised version of our example TwitterFeed that involves the use of a UDF. We assume that the UDF that contains the transformation logic into a "ProcessedTweet" is available as a Java UDF inside an AsterixDB library named 'testlib'. We defer the writing of a Java UDF and its installation as part of an AsterixDB library to a later section of this document.
-
- use dataverse feeds;
-
- create feed ProcessedTwitterFeed if not exists
- using "push_twitter"
- (("type-name"="Tweet"),
- ("consumer.key"="************"),
- ("consumer.secret"="**************"),
- ("access.token"="**********"),
- ("access.token.secret"="*************"))
-
- apply function testlib#addHashTagsInPlace;
-
-Note that a feed adaptor and a UDF act as pluggable components. These
-contribute towards providing a generic "plug-and-play" model where
-custom implementations can be provided to cater to specific requirements.
-
-####Building a Cascade Network of Feeds####
-Multiple high-level applications may wish to consume the data
-ingested from a data feed. Each such application might perceive the
-feed in a different way and require the arriving data to be processed
-and/or persisted differently. Building a separate flow of data from
-the external source for each application is wasteful of resources as
-the pre-processing or transformations required by each application
-might overlap and could be done together in an incremental fashion
-to avoid redundancy. A single flow of data from the external source
-could provide data for multiple applications. To achieve this, we
-introduce the notion of primary and secondary feeds in AsterixDB.
-
-A feed in AsterixDB is considered to be a primary feed if it gets
-its data from an external data source. The objects contained in a
-feed (subsequent to any pre-processing) are directed to a designated
-AsterixDB dataset. Alternatively or additionally, these objects can
-be used to derive other feeds known as secondary feeds. A secondary
-feed is similar to its parent feed in every other aspect; it can
-have an associated UDF to allow for any subsequent processing,
-can be persisted into a dataset, and/or can be made to derive other
-secondary feeds to form a cascade network. A primary feed and a
-dependent secondary feed form a hierarchy. As an example, we next show an
-example AQL statement that redefines the previous feed
-"ProcessedTwitterFeed" in terms of their
-respective parent feed (TwitterFeed).
-
- use dataverse feeds;
-
- drop feed ProcessedTwitterFeed if exists;
-
- create secondary feed ProcessedTwitterFeed from feed TwitterFeed
- apply function testlib#addHashTags;
-
- connect feed ProcessedTwitterFeed to dataset ProcessedTweets;
-
-The `addHashTags` function is already provided in the example UDF.To see what objects
-are being inserted into the dataset, we can perform a simple dataset scan after
-allowing a few moments for the feed to start ingesting data:
-
- use dataverse feeds;
-
- for $i in dataset ProcessedTweets limit 10 return $i;
-
-For an example of how to write a Java UDF from scratch, the source for the example
-UDF that has been used in this tutorial is available [here] (https://github.com/apache/asterixdb/tree/master/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library)
-
-## <a name="installingUDF">Unstalling an AsterixDB Library</a>###
-
-To uninstall a library, use the Managix uninstall command as follows:
-
- $ managix stop -n my_asterix
-
- $ managix uninstall -n my_asterix -d feeds -l testlib
-
-
diff --git a/asterixdb/asterix-doc/src/site/site.xml b/asterixdb/asterix-doc/src/site/site.xml
index 1167c37..6db028e 100644
--- a/asterixdb/asterix-doc/src/site/site.xml
+++ b/asterixdb/asterix-doc/src/site/site.xml
@@ -90,7 +90,7 @@
<menu name="Advanced Features">
<item name="Accessing External Data" href="aql/externaldata.html"/>
- <item name="Support for Data Ingestion" href="feeds/tutorial.html"/>
+ <item name="Data Ingestion with Feeds" href="feeds.html"/>
<item name="User Defined Functions" href="udf.html"/>
<item name="Filter-Based LSM Index Acceleration" href="sqlpp/filters.html"/>
<item name="Support of Full-text Queries" href="sqlpp/fulltext.html"/>
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/Datatypes.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/Datatypes.java
index d915559..9381d09 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/Datatypes.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/Datatypes.java
@@ -146,22 +146,4 @@
private Tweet_User() {
}
}
-
- /*
- The following assumes this DDL (but ignoring the field name orders):
- create type ProcessedTweet if not exists as open {
- id: string,
- user_name:string,
- location:point,
- created_at:string,
- message_text:string,
- country: string,
- topics: [string]
- };
- */
- public static final class ProcessedTweet {
- public static final String USER_NAME = "user_name";
- public static final String LOCATION = "location";
- public static final String TOPICS = "topics";
- }
}
diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFunction.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFunction.java
deleted file mode 100644
index 1b5fecd..0000000
--- a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFunction.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.library;
-
-import org.apache.asterix.external.library.java.JBuiltinType;
-import org.apache.asterix.external.library.java.base.JDouble;
-import org.apache.asterix.external.library.java.base.JPoint;
-import org.apache.asterix.external.library.java.base.JRecord;
-import org.apache.asterix.external.library.java.base.JString;
-import org.apache.asterix.external.library.java.base.JUnorderedList;
-import org.apache.asterix.external.api.IExternalScalarFunction;
-import org.apache.asterix.external.api.IFunctionHelper;
-import org.apache.asterix.external.library.java.JTypeTag;
-import org.apache.asterix.external.util.Datatypes;
-
-public class AddHashTagsFunction implements IExternalScalarFunction {
-
- private JUnorderedList list = null;
- private JPoint location = null;
-
- @Override
- public void initialize(IFunctionHelper functionHelper) {
- list = new JUnorderedList(JBuiltinType.JSTRING);
- location = new JPoint(0, 0);
- }
-
- @Override
- public void deinitialize() {
- }
-
- @Override
- public void evaluate(IFunctionHelper functionHelper) throws Exception {
- list.clear();
- JRecord inputRecord = (JRecord) functionHelper.getArgument(0);
- JString text = (JString) inputRecord.getValueByName(Datatypes.Tweet.MESSAGE);
- JDouble latitude = (JDouble) inputRecord.getValueByName(Datatypes.Tweet.LATITUDE);
- JDouble longitude = (JDouble) inputRecord.getValueByName(Datatypes.Tweet.LONGITUDE);
-
- if (latitude != null && longitude != null) {
- location.setValue(latitude.getValue(), longitude.getValue());
- } else {
- location.setValue(0, 0);
- }
-
- String[] tokens = text.getValue().split(" ");
- for (String tk : tokens) {
- if (tk.startsWith("#")) {
- JString newField = (JString) functionHelper.getObject(JTypeTag.STRING);
- newField.setValue(tk);
- list.add(newField);
- }
- }
-
- JRecord outputRecord = (JRecord) functionHelper.getResultObject();
- outputRecord.setField(Datatypes.Tweet.ID, inputRecord.getValueByName(Datatypes.Tweet.ID));
-
- JRecord userRecord = (JRecord) inputRecord.getValueByName(Datatypes.Tweet.USER);
- outputRecord.setField(Datatypes.ProcessedTweet.USER_NAME,
- userRecord.getValueByName(Datatypes.Tweet.SCREEN_NAME));
-
- outputRecord.setField(Datatypes.ProcessedTweet.LOCATION, location);
- outputRecord.setField(Datatypes.Tweet.CREATED_AT, inputRecord.getValueByName(Datatypes.Tweet.CREATED_AT));
- outputRecord.setField(Datatypes.Tweet.MESSAGE, text);
- outputRecord.setField(Datatypes.ProcessedTweet.TOPICS, list);
-
- functionHelper.setResult(outputRecord);
- }
-
-}
diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFunction.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFunction.java
index 7873835..ecee876 100644
--- a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFunction.java
+++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFunction.java
@@ -54,7 +54,7 @@
list.add(newField);
}
}
- inputRecord.addField(Datatypes.ProcessedTweet.TOPICS, list);
+ inputRecord.addField("topics", list);
functionHelper.setResult(inputRecord);
}
diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFactory.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddMentionedUsersFactory.java
similarity index 89%
rename from asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFactory.java
rename to asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddMentionedUsersFactory.java
index db693a1..92e8ade 100644
--- a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFactory.java
+++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddMentionedUsersFactory.java
@@ -21,11 +21,11 @@
import org.apache.asterix.external.api.IExternalScalarFunction;
import org.apache.asterix.external.api.IFunctionFactory;
-public class AddHashTagsFactory implements IFunctionFactory {
+public class AddMentionedUsersFactory implements IFunctionFactory {
@Override
public IExternalScalarFunction getExternalFunction() {
- return new AddHashTagsFunction();
+ return new addMentionedUsersFunction();
}
}
diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/addMentionedUsersFunction.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/addMentionedUsersFunction.java
new file mode 100644
index 0000000..981aa2b
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/addMentionedUsersFunction.java
@@ -0,0 +1,63 @@
+/* 1
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.library;
+
+import org.apache.asterix.external.library.java.JBuiltinType;
+import org.apache.asterix.external.library.java.base.JRecord;
+import org.apache.asterix.external.library.java.base.JString;
+import org.apache.asterix.external.library.java.base.JUnorderedList;
+import org.apache.asterix.external.api.IExternalScalarFunction;
+import org.apache.asterix.external.api.IFunctionHelper;
+import org.apache.asterix.external.library.java.JTypeTag;
+import org.apache.asterix.external.util.Datatypes;
+
+public class addMentionedUsersFunction implements IExternalScalarFunction {
+
+ private JUnorderedList list = null;
+ private String textFieldName;
+
+ @Override
+ public void initialize(IFunctionHelper functionHelper) {
+ list = new JUnorderedList(JBuiltinType.JSTRING);
+ textFieldName = functionHelper.getParameters().get(0);
+ }
+
+ @Override
+ public void deinitialize() {
+ }
+
+ @Override
+ public void evaluate(IFunctionHelper functionHelper) throws Exception {
+ list.clear();
+ JRecord inputRecord = (JRecord) functionHelper.getArgument(0);
+ JString text = (JString) inputRecord.getValueByName(textFieldName);
+
+ String[] tokens = text.getValue().split(" ");
+ for (String tk : tokens) {
+ if (tk.startsWith("@")) {
+ JString newField = (JString) functionHelper.getObject(JTypeTag.STRING);
+ newField.setValue(tk);
+ list.add(newField);
+ }
+ }
+ inputRecord.addField("mentionedUsers", list);
+ functionHelper.setResult(inputRecord);
+ }
+
+}
diff --git a/asterixdb/asterix-external-data/src/test/resources/library_descriptor.xml b/asterixdb/asterix-external-data/src/test/resources/library_descriptor.xml
index 6b59041..de6a67f 100644
--- a/asterixdb/asterix-external-data/src/test/resources/library_descriptor.xml
+++ b/asterixdb/asterix-external-data/src/test/resources/library_descriptor.xml
@@ -42,16 +42,15 @@
<function_type>SCALAR</function_type>
<argument_type>TweetInputType</argument_type>
<return_type>TweetOutputType</return_type>
- <definition>org.apache.asterix.external.library.ParseTweetFactory
- </definition>
+ <definition>org.apache.asterix.external.library.ParseTweetFactory</definition>
</libraryFunction>
<libraryFunction>
- <name>addHashTags</name>
+ <name>addMentionedUsers</name>
<function_type>SCALAR</function_type>
- <argument_type>Tweet</argument_type>
- <return_type>ProcessedTweet</return_type>
- <definition>org.apache.asterix.external.library.AddHashTagsFactory
- </definition>
+ <argument_type>TweetType</argument_type>
+ <return_type>TweetType</return_type>
+ <definition>org.apache.asterix.external.library.AddMentionedUsersFactory</definition>
+ <parameters>text</parameters>
</libraryFunction>
<libraryFunction>
<name>addHashTagsInPlace</name>
diff --git a/asterixdb/asterix-server/src/test/resources/integrationts/library/results/library-metadata/functionDataset/functionDataset.1.adm b/asterixdb/asterix-server/src/test/resources/integrationts/library/results/library-metadata/functionDataset/functionDataset.1.adm
index 20dc8c8..b1fa1d1 100644
--- a/asterixdb/asterix-server/src/test/resources/integrationts/library/results/library-metadata/functionDataset/functionDataset.1.adm
+++ b/asterixdb/asterix-server/src/test/resources/integrationts/library/results/library-metadata/functionDataset/functionDataset.1.adm
@@ -1,4 +1,4 @@
-{ "DataverseName": "externallibtest", "Name": "testlib#addHashTags", "Arity": "1", "Params": [ "Tweet" ], "ReturnType": "ProcessedTweet", "Definition": "org.apache.asterix.external.library.AddHashTagsFactory", "Language": "JAVA", "Kind": "SCALAR" }
+{ "DataverseName": "externallibtest", "Name": "testlib#addMentionedUsers", "Arity": "1", "Params": [ "TweetType" ], "ReturnType": "TweetType", "Definition": "org.apache.asterix.external.library.AddMentionedUsersFactory", "Language": "JAVA", "Kind": "SCALAR" }
{ "DataverseName": "externallibtest", "Name": "testlib#addHashTagsInPlace", "Arity": "1", "Params": [ "Tweet" ], "ReturnType": "ProcessedTweet", "Definition": "org.apache.asterix.external.library.AddHashTagsInPlaceFactory", "Language": "JAVA", "Kind": "SCALAR" }
{ "DataverseName": "externallibtest", "Name": "testlib#allTypes", "Arity": "1", "Params": [ "AllType" ], "ReturnType": "AllType", "Definition": "org.apache.asterix.external.library.AllTypesFactory", "Language": "JAVA", "Kind": "SCALAR" }
{ "DataverseName": "externallibtest", "Name": "testlib#echoDelay", "Arity": "1", "Params": [ "TweetMessageType" ], "ReturnType": "TweetMessageType", "Definition": "org.apache.asterix.external.library.EchoDelayFactory", "Language": "JAVA", "Kind": "SCALAR" }