asterix-app/src/test/resources/optimizerts/queries/count-tweets.aql - asterixdb - Gitiles

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 drop dataverse twitter if exists;
 create dataverse twitter;
 use dataverse twitter;

 create type Tweet as open {
   id: int32,
   tweetid: int64,
   loc: point,
   time: datetime,
   text: string
 }

 create external dataset TwitterData(Tweet)
 using "org.apache.asterix.external.dataset.adapter.NCFileSystemAdapter"
 (("path"="nc1://data/twitter/smalltweets.txt"),("format"="adm"));

 write output to nc1:"/tmp/count-tweets.adm";

 for $t in dataset('TwitterData')
 let $tokens := word-tokens($t.text)
 for $token in $tokens
 group by $tok := $token with $token
 return { "word": $tok, "count": count($token) }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/
	drop dataverse twitter if exists;
	create dataverse twitter;
	use dataverse twitter;

	create type Tweet as open {
	id: int32,
	tweetid: int64,
	loc: point,
	time: datetime,
	text: string
	}

	create external dataset TwitterData(Tweet)
	using "org.apache.asterix.external.dataset.adapter.NCFileSystemAdapter"
	(("path"="nc1://data/twitter/smalltweets.txt"),("format"="adm"));

	write output to nc1:"/tmp/count-tweets.adm";

	for $t in dataset('TwitterData')
	let $tokens := word-tokens($t.text)
	for $token in $tokens
	group by $tok := $token with $token
	return { "word": $tok, "count": count($token) }