blob: ce4662d7238d9e39b6018eee5014802985b69ce9 [file] [log] [blame]
vinayakb38b7ca42012-03-05 05:44:15 +00001drop dataverse twitter if exists;
2create dataverse twitter;
3use dataverse twitter;
4create type Tweet as open {
5 id: int32,
6 tweetid: int64,
7 loc: point,
8 time: datetime,
9 text: string
10}
11
12create external dataset TwitterData(Tweet)
13using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
14(("path"="nc1://data/twitter/extrasmalltweets.txt"),("format"="adm"));
15
16write output to nc1:"rttest/groupby-orderby-count.adm";
17
18for $t in dataset('TwitterData')
19let $tokens := word-tokens($t.text)
20for $token in $tokens
21group by $tok := $token with $token
22order by count($token) desc, $tok asc
23return { "word": $tok, "count": count($token) }