blob: 5e4cde10ad4e5f67833eaee2433cfad92407c4d8 [file] [log] [blame]
vinayakb38b7ca42012-03-05 05:44:15 +00001drop dataverse twitter if exists;
2create dataverse twitter;
3use dataverse twitter;
4
5create type Tweet as open {
6 id: int32,
7 tweetid: int64,
8 loc: point,
9 time: string,
10 text: string
11}
12
13create external dataset TwitterData(Tweet)
14using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
15(("path"="nc1://data/twitter/smalltweets.txt"),("format"="adm"));
16
17write output to nc1:"/tmp/count-tweets.adm";
18
19for $t in dataset('TwitterData')
20let $tokens := word-tokens($t.text)
21for $token in $tokens
22group by $tok := $token with $token
23return { "word": $tok, "count": count($token) }
24