blob: 651cf88e6c9637d377e4958e1c2c5fc53b1b1ce9 [file] [log] [blame]
vinayakb38b7ca42012-03-05 05:44:15 +00001drop dataverse twitter if exists;
2create dataverse twitter;
3use dataverse twitter;
4
5create type Tweet as open {
6 id: int32,
7 tweetid: int64,
8 loc: point,
RamanGrover29@gmail.com58cf3302012-11-09 00:27:45 +00009 time: datetime,
vinayakb38b7ca42012-03-05 05:44:15 +000010 text: string
11}
12
13create external dataset TwitterData(Tweet)
14using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
15(("path"="nc1://data/twitter/smalltweets.txt"),("format"="adm"));
16
17write output to nc1:"/tmp/count-tweets.adm";
18
19for $t in dataset('TwitterData')
20let $tokens := word-tokens($t.text)
21for $token in $tokens
22group by $tok := $token with $token
23return { "word": $tok, "count": count($token) }
24