blob: 0378a14697f47f0d09301a152a6134fea5139a81 [file] [log] [blame]
vinayakb38b7ca42012-03-05 05:44:15 +00001drop dataverse twitter if exists;
2create dataverse twitter;
3use dataverse twitter;
4
5create type Tweet as open {
6 id: int32,
7 tweetid: int64,
8 loc: point,
RamanGrover29@gmail.com58cf3302012-11-09 00:27:45 +00009 time: datetime,
vinayakb38b7ca42012-03-05 05:44:15 +000010 text: string
11}
12
13create external dataset TwitterData(Tweet)
Ian Maxonf18bba22015-08-21 12:35:14 -070014using "org.apache.asterix.external.dataset.adapter.NCFileSystemAdapter"
vinayakb38b7ca42012-03-05 05:44:15 +000015(("path"="nc1://data/twitter/smalltweets.txt"),("format"="adm"));
16
17write output to nc1:"/tmp/count-tweets.adm";
18
19for $t in dataset('TwitterData')
20let $tokens := word-tokens($t.text)
21for $token in $tokens
22group by $tok := $token with $token
23return { "word": $tok, "count": count($token) }
24