drop dataverse twitter if exists; | |
create dataverse twitter; | |
use dataverse twitter; | |
create type Tweet as open { | |
id: int32, | |
tweetid: int64, | |
loc: point, | |
time: datetime, | |
text: string | |
} | |
create external dataset TwitterData(Tweet) | |
using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter" | |
(("path"="nc1://data/twitter/smalltweets.txt"),("format"="adm")); | |
write output to nc1:"/tmp/count-tweets.adm"; | |
for $t in dataset('TwitterData') | |
let $tokens := word-tokens($t.text) | |
for $token in $tokens | |
group by $tok := $token with $token | |
return { "word": $tok, "count": count($token) } | |