blob: bb8db50a33c418c2ed240fe45c5c9b7eaab8f98c [file] [log] [blame]
vinayakb0c860392012-10-06 18:47:20 +00001/*
Ian Maxond8857792015-09-11 14:19:53 -07002 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
vinayakb0c860392012-10-06 18:47:20 +000018 */
19options {
20 STATIC = false;
21 IGNORE_CASE = true;
22 LOOKAHEAD = 2;
23}
24
25PARSER_BEGIN(PigletParser)
26
Ian Maxone915e8c2015-07-01 17:03:31 -070027package org.apache.hyracks.algebricks.examples.piglet.parser;
vinayakb0c860392012-10-06 18:47:20 +000028
29import java.util.*;
Ian Maxone915e8c2015-07-01 17:03:31 -070030import org.apache.hyracks.algebricks.common.utils.Pair;
31import org.apache.hyracks.algebricks.examples.piglet.ast.*;
32import org.apache.hyracks.algebricks.examples.piglet.types.*;
vinayakb0c860392012-10-06 18:47:20 +000033
34public class PigletParser {
35 private ExpressionNode createFunction(FunctionTag fTag, String fName, ExpressionNode... arguments) {
36 List<ASTNode> args = new ArrayList<ASTNode>();
37 for(ExpressionNode e : arguments) {
38 args.add(e);
39 }
40 return new ScalarFunctionExpressionNode(fTag, fName, args);
41 }
42
43 private String stripQuotes(String s) {
44 s = s.substring(1);
45 s = s.substring(0, s.length() - 1);
46 return s;
47 }
48}
49
50PARSER_END(PigletParser)
51
52List<ASTNode> Statements(): {
53 List<ASTNode> statememts = new ArrayList<ASTNode>();
54 ASTNode s;
55} {
56 (
57 (
58 s = AssignmentStatement() {
59 statememts.add(s);
60 }
61 | s = DumpStatement() {
62 statememts.add(s);
63 }
64 ) ";"
65 )* <EOF> {
66 return statememts;
67 }
68}
69
70ASTNode AssignmentStatement(): {
71 String a;
72 RelationNode r;
73} {
74 a = Alias() "=" r = RelationalStatement() {
75 return new AssignmentNode(a, r);
76 }
77}
78
79String Alias(): {
80 Token t;
81} {
82 t = <IDENTIFIER> {
83 return t.image;
84 }
85}
86
87String ColumnName(): {
88 Token t;
89} {
90 t = <IDENTIFIER> {
91 return t.image;
92 }
93}
94
95Type TypeName(): {
96 Token t;
97} {
98 "int" {
99 return IntegerType.INSTANCE;
100 }
101 | "long" {
102 return LongType.INSTANCE;
103 }
104 | "float" {
105 return FloatType.INSTANCE;
106 }
107 | "double" {
108 return DoubleType.INSTANCE;
109 }
110 | "chararray" {
111 return CharArrayType.INSTANCE;
112 }
113 | "tuple" {
114 return new TupleType();
115 }
116 | "bag" {
117 return new BagType();
118 }
119 | "map" {
120 return new MapType();
121 }
122}
123
124Pair<String, Type> ColumnSchema(): {
125 String c;
126 Type t;
127} {
128 c = ColumnName() ":" t = TypeName() {
129 return new Pair<String, Type>(c, t);
130 }
131}
132
133Schema Schema(): {
134 List<Pair<String, Type>> schema = new ArrayList<Pair<String, Type>>();
135 Pair<String, Type> cSchema;
136} {
137 "(" (
138 cSchema = ColumnSchema() {
139 schema.add(cSchema);
140 } (
141 "," cSchema = ColumnSchema() {
142 schema.add(cSchema);
143 }
144 )*
145 )? ")" {
146 return new Schema(schema);
147 }
148}
149
150RelationNode RelationalStatement(): {
151 RelationNode r;
152} {
153 r = LoadStatement() {
154 return r;
155 }
156 | r = FilterStatement() {
157 return r;
158 }
159}
160
161RelationNode LoadStatement(): {
162 Token t;
163 Schema s;
164} {
165 "load" t = <STRING_LITERAL> "as" s = Schema() {
166 return new LoadNode(t.image, s);
167 }
168}
169
170RelationNode FilterStatement(): {
171 String a;
172 ExpressionNode e;
173} {
174 "filter" a = Alias() "by" e = Expression() {
175 return new FilterNode(a, e);
176 }
177}
178
179ASTNode DumpStatement(): {
180 String a;
181 Token t;
182} {
183 "dump" a = Alias() "into" t = <STRING_LITERAL> {
184 return new DumpNode(t.image, a);
185 }
186}
187
188ExpressionNode Expression(): {
189 ExpressionNode e;
190} {
191 e = OrExpression() {
192 return e;
193 }
194}
195
196ExpressionNode OrExpression(): {
197 ExpressionNode e1;
198 ExpressionNode e2;
199} {
200 e1 = AndExpression() (
201 "or" e2 = AndExpression() {
202 e1 = createFunction(FunctionTag.BOOLEAN_OR, null, e1, e2);
203 }
204 )* {
205 return e1;
206 }
207}
208
209ExpressionNode AndExpression(): {
210 ExpressionNode e1;
211 ExpressionNode e2;
212} {
213 e1 = ComparisonExpression() (
214 "and" e2 = ComparisonExpression() {
215 e1 = createFunction(FunctionTag.BOOLEAN_AND, null, e1, e2);
216 }
217 )* {
218 return e1;
219 }
220}
221
222ExpressionNode ComparisonExpression(): {
223 ExpressionNode e1;
224 ExpressionNode e2;
225 FunctionTag fTag;
226} {
227 e1 = AdditiveExpression() (
228 fTag = ComparisonOperator() e2 = AdditiveExpression() {
229 e1 = createFunction(fTag, null, e1, e2);
230 }
231 )? {
232 return e1;
233 }
234}
235
236FunctionTag ComparisonOperator(): {
237} {
238 "==" {
239 return FunctionTag.EQ;
240 }
241 | "!=" {
242 return FunctionTag.NEQ;
243 }
244 | "<" {
245 return FunctionTag.LT;
246 }
247 | "<=" {
248 return FunctionTag.LTE;
249 }
250 | ">" {
251 return FunctionTag.GT;
252 }
253 | ">=" {
254 return FunctionTag.GTE;
255 }
256}
257
258ExpressionNode AdditiveExpression(): {
259 ExpressionNode e1;
260 ExpressionNode e2;
261 FunctionTag fTag;
262} {
263 e1 = MultiplicativeExpression() (
264 fTag = AdditiveOperator() e2 = MultiplicativeExpression() {
265 e1 = createFunction(fTag, null, e1, e2);
266 }
267 )* {
268 return e1;
269 }
270}
271
272FunctionTag AdditiveOperator(): {
273} {
274 "+" {
275 return FunctionTag.ADD;
276 }
277 | "-" {
278 return FunctionTag.SUBTRACT;
279 }
280}
281
282ExpressionNode MultiplicativeExpression(): {
283 ExpressionNode e1;
284 ExpressionNode e2;
285 FunctionTag fTag;
286} {
287 e1 = PrimaryExpression() (
288 fTag = MultiplicativeOperator() e2 = PrimaryExpression() {
289 e1 = createFunction(fTag, null, e1, e2);
290 }
291 )* {
292 return e1;
293 }
294}
295
296FunctionTag MultiplicativeOperator(): {
297} {
298 "*" {
299 return FunctionTag.MULTIPLY;
300 }
301 | "/" {
302 return FunctionTag.DIVIDE;
303 }
304 | "%" {
305 return FunctionTag.MOD;
306 }
307}
308
309ExpressionNode PrimaryExpression(): {
310 ExpressionNode e;
311} {
312 e = Literal() {
313 return e;
314 }
315 | e = FieldAccess() {
316 return e;
317 }
318}
319
320ExpressionNode Literal(): {
321 Token t;
322} {
323 t = <STRING_LITERAL> {
324 return new LiteralExpressionNode(stripQuotes(t.image), CharArrayType.INSTANCE);
325 }
326 | t = <INTEGER_LITERAL> {
327 return new LiteralExpressionNode(t.image, IntegerType.INSTANCE);
328 }
329 | t = <DOUBLE_LITERAL> {
330 return new LiteralExpressionNode(t.image, DoubleType.INSTANCE);
331 }
332}
333
334ExpressionNode FieldAccess(): {
335 String relName = null;
336 Token fieldName;
337} {
338 (relName = Alias() ".")? fieldName = <IDENTIFIER> {
339 return new FieldAccessExpressionNode(relName, fieldName.image);
340 }
341}
342
343<DEFAULT>
344TOKEN : {
345 <STRING_LITERAL: (("\"" (~["\"", "\n"])* "\"") | ("'" (~["'", "\n"])* "'"))>
346 | <IDENTIFIER: <Letter> (<Letter> | <Digit> | <Extender>)*>
347 | <INTEGER_LITERAL: (<Digit>)+>
348 | <DOUBLE_LITERAL: (((<Digit>)* "." (<Digit>)+ (<Exponent>)?) | (<INTEGER_LITERAL> <Exponent>))>
349 | <INDEXED_FIELD: ("$" <INTEGER_LITERAL>)>
350}
351
352TOKEN :
353{
354 < #Exponent : ((["+", "-"])? ["E", "e"] <INTEGER_LITERAL>)>
355}
356
357SPECIAL_TOKEN :
358{
359 < WhitespaceChar : ["\t", "\r", "\n", " "] >
360}
361
362TOKEN :
363{
364 < #Letter : (<BaseChar> | <Ideographic>) >
365}
366
367TOKEN :
368{
369 < #BaseChar : ["\u0041" - "\u005a", "\u0061" - "\u007a", "\u00c0" - "\u00d6", "\u00d8" - "\u00f6", "\u00f8" - "\u00ff", "\u0100" - "\u0131", "\u0134" - "\u013e", "\u0141" - "\u0148", "\u014a" - "\u017e", "\u0180" - "\u01c3", "\u01cd" - "\u01f0", "\u01f4" - "\u01f5", "\u01fa" - "\u0217", "\u0250" - "\u02a8", "\u02bb" - "\u02c1", "\u0386", "\u0388" - "\u038a", "\u038c", "\u038e" - "\u03a1", "\u03a3" - "\u03ce", "\u03d0" - "\u03d6", "\u03da", "\u03dc", "\u03de", "\u03e0", "\u03e2" - "\u03f3", "\u0401" - "\u040c", "\u040e" - "\u044f", "\u0451" - "\u045c", "\u045e" - "\u0481", "\u0490" - "\u04c4", "\u04c7" - "\u04c8", "\u04cb" - "\u04cc", "\u04d0" - "\u04eb", "\u04ee" - "\u04f5", "\u04f8" - "\u04f9", "\u0531" - "\u0556", "\u0559", "\u0561" - "\u0586", "\u05d0" - "\u05ea", "\u05f0" - "\u05f2", "\u0621" - "\u063a", "\u0641" - "\u064a", "\u0671" - "\u06b7", "\u06ba" - "\u06be", "\u06c0" - "\u06ce", "\u06d0" - "\u06d3", "\u06d5", "\u06e5" - "\u06e6", "\u0905" - "\u0939", "\u093d", "\u0958" - "\u0961", "\u0985" - "\u098c", "\u098f" - "\u0990", "\u0993" - "\u09a8", "\u09aa" - "\u09b0", "\u09b2", "\u09b6" - "\u09b9", "\u09dc" - "\u09dd", "\u09df" - "\u09e1", "\u09f0" - "\u09f1", "\u0a05" - "\u0a0a", "\u0a0f" - "\u0a10", "\u0a13" - "\u0a28", "\u0a2a" - "\u0a30", "\u0a32" - "\u0a33", "\u0a35" - "\u0a36", "\u0a38" - "\u0a39", "\u0a59" - "\u0a5c", "\u0a5e", "\u0a72" - "\u0a74", "\u0a85" - "\u0a8b", "\u0a8d", "\u0a8f" - "\u0a91", "\u0a93" - "\u0aa8", "\u0aaa" - "\u0ab0", "\u0ab2" - "\u0ab3", "\u0ab5" - "\u0ab9", "\u0abd", "\u0ae0", "\u0b05" - "\u0b0c", "\u0b0f" - "\u0b10", "\u0b13" - "\u0b28", "\u0b2a" - "\u0b30", "\u0b32" - "\u0b33", "\u0b36" - "\u0b39", "\u0b3d", "\u0b5c" - "\u0b5d", "\u0b5f" - "\u0b61", "\u0b85" - "\u0b8a", "\u0b8e" - "\u0b90", "\u0b92" - "\u0b95", "\u0b99" - "\u0b9a", "\u0b9c", "\u0b9e" - "\u0b9f", "\u0ba3" - "\u0ba4", "\u0ba8" - "\u0baa", "\u0bae" - "\u0bb5", "\u0bb7" - "\u0bb9", "\u0c05" - "\u0c0c", "\u0c0e" - "\u0c10", "\u0c12" - "\u0c28", "\u0c2a" - "\u0c33", "\u0c35" - "\u0c39", "\u0c60" - "\u0c61", "\u0c85" - "\u0c8c", "\u0c8e" - "\u0c90", "\u0c92" - "\u0ca8", "\u0caa" - "\u0cb3", "\u0cb5" - "\u0cb9", "\u0cde", "\u0ce0" - "\u0ce1", "\u0d05" - "\u0d0c", "\u0d0e" - "\u0d10", "\u0d12" - "\u0d28", "\u0d2a" - "\u0d39", "\u0d60" - "\u0d61", "\u0e01" - "\u0e2e", "\u0e30", "\u0e32" - "\u0e33", "\u0e40" - "\u0e45", "\u0e81" - "\u0e82", "\u0e84", "\u0e87" - "\u0e88", "\u0e8a", "\u0e8d", "\u0e94" - "\u0e97", "\u0e99" - "\u0e9f", "\u0ea1" - "\u0ea3", "\u0ea5", "\u0ea7", "\u0eaa" - "\u0eab", "\u0ead" - "\u0eae", "\u0eb0", "\u0eb2" - "\u0eb3", "\u0ebd", "\u0ec0" - "\u0ec4", "\u0f40" - "\u0f47", "\u0f49" - "\u0f69", "\u10a0" - "\u10c5", "\u10d0" - "\u10f6", "\u1100", "\u1102" - "\u1103", "\u1105" - "\u1107", "\u1109", "\u110b" - "\u110c", "\u110e" - "\u1112", "\u113c", "\u113e", "\u1140", "\u114c", "\u114e", "\u1150", "\u1154" - "\u1155", "\u1159", "\u115f" - "\u1161", "\u1163", "\u1165", "\u1167", "\u1169", "\u116d" - "\u116e", "\u1172" - "\u1173", "\u1175", "\u119e", "\u11a8", "\u11ab", "\u11ae" - "\u11af", "\u11b7" - "\u11b8", "\u11ba", "\u11bc" - "\u11c2", "\u11eb", "\u11f0", "\u11f9", "\u1e00" - "\u1e9b", "\u1ea0" - "\u1ef9", "\u1f00" - "\u1f15", "\u1f18" - "\u1f1d", "\u1f20" - "\u1f45", "\u1f48" - "\u1f4d", "\u1f50" - "\u1f57", "\u1f59", "\u1f5b", "\u1f5d", "\u1f5f" - "\u1f7d", "\u1f80" - "\u1fb4", "\u1fb6" - "\u1fbc", "\u1fbe", "\u1fc2" - "\u1fc4", "\u1fc6" - "\u1fcc", "\u1fd0" - "\u1fd3", "\u1fd6" - "\u1fdb", "\u1fe0" - "\u1fec", "\u1ff2" - "\u1ff4", "\u1ff6" - "\u1ffc", "\u2126", "\u212a" - "\u212b", "\u212e", "\u2180" - "\u2182", "\u3041" - "\u3094", "\u30a1" - "\u30fa", "\u3105" - "\u312c", "\uac00" - "\ud7a3"] >
370}
371
372TOKEN :
373{
374 < #Ideographic : ["\u4e00" - "\u9fa5", "\u3007", "\u3021" - "\u3029"] >
375}
376
377TOKEN :
378{
379 < #CombiningChar : ["\u0300" - "\u0345", "\u0360" - "\u0361", "\u0483" - "\u0486", "\u0591" - "\u05a1", "\u05a3" - "\u05b9", "\u05bb" - "\u05bd", "\u05bf", "\u05c1" - "\u05c2", "\u05c4", "\u064b" - "\u0652", "\u0670", "\u06d6" - "\u06dc", "\u06dd" - "\u06df", "\u06e0" - "\u06e4", "\u06e7" - "\u06e8", "\u06ea" - "\u06ed", "\u0901" - "\u0903", "\u093c", "\u093e" - "\u094c", "\u094d", "\u0951" - "\u0954", "\u0962" - "\u0963", "\u0981" - "\u0983", "\u09bc", "\u09be", "\u09bf", "\u09c0" - "\u09c4", "\u09c7" - "\u09c8", "\u09cb" - "\u09cd", "\u09d7", "\u09e2" - "\u09e3", "\u0a02", "\u0a3c", "\u0a3e", "\u0a3f", "\u0a40" - "\u0a42", "\u0a47" - "\u0a48", "\u0a4b" - "\u0a4d", "\u0a70" - "\u0a71", "\u0a81" - "\u0a83", "\u0abc", "\u0abe" - "\u0ac5", "\u0ac7" - "\u0ac9", "\u0acb" - "\u0acd", "\u0b01" - "\u0b03", "\u0b3c", "\u0b3e" - "\u0b43", "\u0b47" - "\u0b48", "\u0b4b" - "\u0b4d", "\u0b56" - "\u0b57", "\u0b82" - "\u0b83", "\u0bbe" - "\u0bc2", "\u0bc6" - "\u0bc8", "\u0bca" - "\u0bcd", "\u0bd7", "\u0c01" - "\u0c03", "\u0c3e" - "\u0c44", "\u0c46" - "\u0c48", "\u0c4a" - "\u0c4d", "\u0c55" - "\u0c56", "\u0c82" - "\u0c83", "\u0cbe" - "\u0cc4", "\u0cc6" - "\u0cc8", "\u0cca" - "\u0ccd", "\u0cd5" - "\u0cd6", "\u0d02" - "\u0d03", "\u0d3e" - "\u0d43", "\u0d46" - "\u0d48", "\u0d4a" - "\u0d4d", "\u0d57", "\u0e31", "\u0e34" - "\u0e3a", "\u0e47" - "\u0e4e", "\u0eb1", "\u0eb4" - "\u0eb9", "\u0ebb" - "\u0ebc", "\u0ec8" - "\u0ecd", "\u0f18" - "\u0f19", "\u0f35", "\u0f37", "\u0f39", "\u0f3e", "\u0f3f", "\u0f71" - "\u0f84", "\u0f86" - "\u0f8b", "\u0f90" - "\u0f95", "\u0f97", "\u0f99" - "\u0fad", "\u0fb1" - "\u0fb7", "\u0fb9", "\u20d0" - "\u20dc", "\u20e1", "\u302a" - "\u302f", "\u3099", "\u309a"] >
380}
381
382TOKEN :
383{
384 < #Digit : ["\u0030" - "\u0039", "\u0660" - "\u0669", "\u06f0" - "\u06f9", "\u0966" - "\u096f", "\u09e6" - "\u09ef", "\u0a66" - "\u0a6f", "\u0ae6" - "\u0aef", "\u0b66" - "\u0b6f", "\u0be7" - "\u0bef", "\u0c66" - "\u0c6f", "\u0ce6" - "\u0cef", "\u0d66" - "\u0d6f", "\u0e50" - "\u0e59", "\u0ed0" - "\u0ed9", "\u0f20" - "\u0f29"] >
385}
386
387TOKEN :
388{
389 < #Extender : ["\u00b7", "\u02d0", "\u02d1", "\u0387", "\u0640", "\u0e46", "\u0ec6", "\u3005", "\u3031" - "\u3035", "\u309d" - "\u309e", "\u30fc" - "\u30fe"] >
390}