blob: 2541536aa024befd92aaf240e2da0182e2d3ed9f [file] [log] [blame]
vinayakb0c860392012-10-06 18:47:20 +00001/*
2 * Copyright 2009-2010 by The Regents of the University of California
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * you may obtain a copy of the License from
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15options {
16 STATIC = false;
17 IGNORE_CASE = true;
18 LOOKAHEAD = 2;
19}
20
21PARSER_BEGIN(PigletParser)
22
23package edu.uci.ics.hyracks.algebricks.examples.piglet.parser;
24
25import java.util.*;
26import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
27import edu.uci.ics.hyracks.algebricks.examples.piglet.ast.*;
28import edu.uci.ics.hyracks.algebricks.examples.piglet.types.*;
29
30public class PigletParser {
31 private ExpressionNode createFunction(FunctionTag fTag, String fName, ExpressionNode... arguments) {
32 List<ASTNode> args = new ArrayList<ASTNode>();
33 for(ExpressionNode e : arguments) {
34 args.add(e);
35 }
36 return new ScalarFunctionExpressionNode(fTag, fName, args);
37 }
38
39 private String stripQuotes(String s) {
40 s = s.substring(1);
41 s = s.substring(0, s.length() - 1);
42 return s;
43 }
44}
45
46PARSER_END(PigletParser)
47
48List<ASTNode> Statements(): {
49 List<ASTNode> statememts = new ArrayList<ASTNode>();
50 ASTNode s;
51} {
52 (
53 (
54 s = AssignmentStatement() {
55 statememts.add(s);
56 }
57 | s = DumpStatement() {
58 statememts.add(s);
59 }
60 ) ";"
61 )* <EOF> {
62 return statememts;
63 }
64}
65
66ASTNode AssignmentStatement(): {
67 String a;
68 RelationNode r;
69} {
70 a = Alias() "=" r = RelationalStatement() {
71 return new AssignmentNode(a, r);
72 }
73}
74
75String Alias(): {
76 Token t;
77} {
78 t = <IDENTIFIER> {
79 return t.image;
80 }
81}
82
83String ColumnName(): {
84 Token t;
85} {
86 t = <IDENTIFIER> {
87 return t.image;
88 }
89}
90
91Type TypeName(): {
92 Token t;
93} {
94 "int" {
95 return IntegerType.INSTANCE;
96 }
97 | "long" {
98 return LongType.INSTANCE;
99 }
100 | "float" {
101 return FloatType.INSTANCE;
102 }
103 | "double" {
104 return DoubleType.INSTANCE;
105 }
106 | "chararray" {
107 return CharArrayType.INSTANCE;
108 }
109 | "tuple" {
110 return new TupleType();
111 }
112 | "bag" {
113 return new BagType();
114 }
115 | "map" {
116 return new MapType();
117 }
118}
119
120Pair<String, Type> ColumnSchema(): {
121 String c;
122 Type t;
123} {
124 c = ColumnName() ":" t = TypeName() {
125 return new Pair<String, Type>(c, t);
126 }
127}
128
129Schema Schema(): {
130 List<Pair<String, Type>> schema = new ArrayList<Pair<String, Type>>();
131 Pair<String, Type> cSchema;
132} {
133 "(" (
134 cSchema = ColumnSchema() {
135 schema.add(cSchema);
136 } (
137 "," cSchema = ColumnSchema() {
138 schema.add(cSchema);
139 }
140 )*
141 )? ")" {
142 return new Schema(schema);
143 }
144}
145
146RelationNode RelationalStatement(): {
147 RelationNode r;
148} {
149 r = LoadStatement() {
150 return r;
151 }
152 | r = FilterStatement() {
153 return r;
154 }
155}
156
157RelationNode LoadStatement(): {
158 Token t;
159 Schema s;
160} {
161 "load" t = <STRING_LITERAL> "as" s = Schema() {
162 return new LoadNode(t.image, s);
163 }
164}
165
166RelationNode FilterStatement(): {
167 String a;
168 ExpressionNode e;
169} {
170 "filter" a = Alias() "by" e = Expression() {
171 return new FilterNode(a, e);
172 }
173}
174
175ASTNode DumpStatement(): {
176 String a;
177 Token t;
178} {
179 "dump" a = Alias() "into" t = <STRING_LITERAL> {
180 return new DumpNode(t.image, a);
181 }
182}
183
184ExpressionNode Expression(): {
185 ExpressionNode e;
186} {
187 e = OrExpression() {
188 return e;
189 }
190}
191
192ExpressionNode OrExpression(): {
193 ExpressionNode e1;
194 ExpressionNode e2;
195} {
196 e1 = AndExpression() (
197 "or" e2 = AndExpression() {
198 e1 = createFunction(FunctionTag.BOOLEAN_OR, null, e1, e2);
199 }
200 )* {
201 return e1;
202 }
203}
204
205ExpressionNode AndExpression(): {
206 ExpressionNode e1;
207 ExpressionNode e2;
208} {
209 e1 = ComparisonExpression() (
210 "and" e2 = ComparisonExpression() {
211 e1 = createFunction(FunctionTag.BOOLEAN_AND, null, e1, e2);
212 }
213 )* {
214 return e1;
215 }
216}
217
218ExpressionNode ComparisonExpression(): {
219 ExpressionNode e1;
220 ExpressionNode e2;
221 FunctionTag fTag;
222} {
223 e1 = AdditiveExpression() (
224 fTag = ComparisonOperator() e2 = AdditiveExpression() {
225 e1 = createFunction(fTag, null, e1, e2);
226 }
227 )? {
228 return e1;
229 }
230}
231
232FunctionTag ComparisonOperator(): {
233} {
234 "==" {
235 return FunctionTag.EQ;
236 }
237 | "!=" {
238 return FunctionTag.NEQ;
239 }
240 | "<" {
241 return FunctionTag.LT;
242 }
243 | "<=" {
244 return FunctionTag.LTE;
245 }
246 | ">" {
247 return FunctionTag.GT;
248 }
249 | ">=" {
250 return FunctionTag.GTE;
251 }
252}
253
254ExpressionNode AdditiveExpression(): {
255 ExpressionNode e1;
256 ExpressionNode e2;
257 FunctionTag fTag;
258} {
259 e1 = MultiplicativeExpression() (
260 fTag = AdditiveOperator() e2 = MultiplicativeExpression() {
261 e1 = createFunction(fTag, null, e1, e2);
262 }
263 )* {
264 return e1;
265 }
266}
267
268FunctionTag AdditiveOperator(): {
269} {
270 "+" {
271 return FunctionTag.ADD;
272 }
273 | "-" {
274 return FunctionTag.SUBTRACT;
275 }
276}
277
278ExpressionNode MultiplicativeExpression(): {
279 ExpressionNode e1;
280 ExpressionNode e2;
281 FunctionTag fTag;
282} {
283 e1 = PrimaryExpression() (
284 fTag = MultiplicativeOperator() e2 = PrimaryExpression() {
285 e1 = createFunction(fTag, null, e1, e2);
286 }
287 )* {
288 return e1;
289 }
290}
291
292FunctionTag MultiplicativeOperator(): {
293} {
294 "*" {
295 return FunctionTag.MULTIPLY;
296 }
297 | "/" {
298 return FunctionTag.DIVIDE;
299 }
300 | "%" {
301 return FunctionTag.MOD;
302 }
303}
304
305ExpressionNode PrimaryExpression(): {
306 ExpressionNode e;
307} {
308 e = Literal() {
309 return e;
310 }
311 | e = FieldAccess() {
312 return e;
313 }
314}
315
316ExpressionNode Literal(): {
317 Token t;
318} {
319 t = <STRING_LITERAL> {
320 return new LiteralExpressionNode(stripQuotes(t.image), CharArrayType.INSTANCE);
321 }
322 | t = <INTEGER_LITERAL> {
323 return new LiteralExpressionNode(t.image, IntegerType.INSTANCE);
324 }
325 | t = <DOUBLE_LITERAL> {
326 return new LiteralExpressionNode(t.image, DoubleType.INSTANCE);
327 }
328}
329
330ExpressionNode FieldAccess(): {
331 String relName = null;
332 Token fieldName;
333} {
334 (relName = Alias() ".")? fieldName = <IDENTIFIER> {
335 return new FieldAccessExpressionNode(relName, fieldName.image);
336 }
337}
338
339<DEFAULT>
340TOKEN : {
341 <STRING_LITERAL: (("\"" (~["\"", "\n"])* "\"") | ("'" (~["'", "\n"])* "'"))>
342 | <IDENTIFIER: <Letter> (<Letter> | <Digit> | <Extender>)*>
343 | <INTEGER_LITERAL: (<Digit>)+>
344 | <DOUBLE_LITERAL: (((<Digit>)* "." (<Digit>)+ (<Exponent>)?) | (<INTEGER_LITERAL> <Exponent>))>
345 | <INDEXED_FIELD: ("$" <INTEGER_LITERAL>)>
346}
347
348TOKEN :
349{
350 < #Exponent : ((["+", "-"])? ["E", "e"] <INTEGER_LITERAL>)>
351}
352
353SPECIAL_TOKEN :
354{
355 < WhitespaceChar : ["\t", "\r", "\n", " "] >
356}
357
358TOKEN :
359{
360 < #Letter : (<BaseChar> | <Ideographic>) >
361}
362
363TOKEN :
364{
365 < #BaseChar : ["\u0041" - "\u005a", "\u0061" - "\u007a", "\u00c0" - "\u00d6", "\u00d8" - "\u00f6", "\u00f8" - "\u00ff", "\u0100" - "\u0131", "\u0134" - "\u013e", "\u0141" - "\u0148", "\u014a" - "\u017e", "\u0180" - "\u01c3", "\u01cd" - "\u01f0", "\u01f4" - "\u01f5", "\u01fa" - "\u0217", "\u0250" - "\u02a8", "\u02bb" - "\u02c1", "\u0386", "\u0388" - "\u038a", "\u038c", "\u038e" - "\u03a1", "\u03a3" - "\u03ce", "\u03d0" - "\u03d6", "\u03da", "\u03dc", "\u03de", "\u03e0", "\u03e2" - "\u03f3", "\u0401" - "\u040c", "\u040e" - "\u044f", "\u0451" - "\u045c", "\u045e" - "\u0481", "\u0490" - "\u04c4", "\u04c7" - "\u04c8", "\u04cb" - "\u04cc", "\u04d0" - "\u04eb", "\u04ee" - "\u04f5", "\u04f8" - "\u04f9", "\u0531" - "\u0556", "\u0559", "\u0561" - "\u0586", "\u05d0" - "\u05ea", "\u05f0" - "\u05f2", "\u0621" - "\u063a", "\u0641" - "\u064a", "\u0671" - "\u06b7", "\u06ba" - "\u06be", "\u06c0" - "\u06ce", "\u06d0" - "\u06d3", "\u06d5", "\u06e5" - "\u06e6", "\u0905" - "\u0939", "\u093d", "\u0958" - "\u0961", "\u0985" - "\u098c", "\u098f" - "\u0990", "\u0993" - "\u09a8", "\u09aa" - "\u09b0", "\u09b2", "\u09b6" - "\u09b9", "\u09dc" - "\u09dd", "\u09df" - "\u09e1", "\u09f0" - "\u09f1", "\u0a05" - "\u0a0a", "\u0a0f" - "\u0a10", "\u0a13" - "\u0a28", "\u0a2a" - "\u0a30", "\u0a32" - "\u0a33", "\u0a35" - "\u0a36", "\u0a38" - "\u0a39", "\u0a59" - "\u0a5c", "\u0a5e", "\u0a72" - "\u0a74", "\u0a85" - "\u0a8b", "\u0a8d", "\u0a8f" - "\u0a91", "\u0a93" - "\u0aa8", "\u0aaa" - "\u0ab0", "\u0ab2" - "\u0ab3", "\u0ab5" - "\u0ab9", "\u0abd", "\u0ae0", "\u0b05" - "\u0b0c", "\u0b0f" - "\u0b10", "\u0b13" - "\u0b28", "\u0b2a" - "\u0b30", "\u0b32" - "\u0b33", "\u0b36" - "\u0b39", "\u0b3d", "\u0b5c" - "\u0b5d", "\u0b5f" - "\u0b61", "\u0b85" - "\u0b8a", "\u0b8e" - "\u0b90", "\u0b92" - "\u0b95", "\u0b99" - "\u0b9a", "\u0b9c", "\u0b9e" - "\u0b9f", "\u0ba3" - "\u0ba4", "\u0ba8" - "\u0baa", "\u0bae" - "\u0bb5", "\u0bb7" - "\u0bb9", "\u0c05" - "\u0c0c", "\u0c0e" - "\u0c10", "\u0c12" - "\u0c28", "\u0c2a" - "\u0c33", "\u0c35" - "\u0c39", "\u0c60" - "\u0c61", "\u0c85" - "\u0c8c", "\u0c8e" - "\u0c90", "\u0c92" - "\u0ca8", "\u0caa" - "\u0cb3", "\u0cb5" - "\u0cb9", "\u0cde", "\u0ce0" - "\u0ce1", "\u0d05" - "\u0d0c", "\u0d0e" - "\u0d10", "\u0d12" - "\u0d28", "\u0d2a" - "\u0d39", "\u0d60" - "\u0d61", "\u0e01" - "\u0e2e", "\u0e30", "\u0e32" - "\u0e33", "\u0e40" - "\u0e45", "\u0e81" - "\u0e82", "\u0e84", "\u0e87" - "\u0e88", "\u0e8a", "\u0e8d", "\u0e94" - "\u0e97", "\u0e99" - "\u0e9f", "\u0ea1" - "\u0ea3", "\u0ea5", "\u0ea7", "\u0eaa" - "\u0eab", "\u0ead" - "\u0eae", "\u0eb0", "\u0eb2" - "\u0eb3", "\u0ebd", "\u0ec0" - "\u0ec4", "\u0f40" - "\u0f47", "\u0f49" - "\u0f69", "\u10a0" - "\u10c5", "\u10d0" - "\u10f6", "\u1100", "\u1102" - "\u1103", "\u1105" - "\u1107", "\u1109", "\u110b" - "\u110c", "\u110e" - "\u1112", "\u113c", "\u113e", "\u1140", "\u114c", "\u114e", "\u1150", "\u1154" - "\u1155", "\u1159", "\u115f" - "\u1161", "\u1163", "\u1165", "\u1167", "\u1169", "\u116d" - "\u116e", "\u1172" - "\u1173", "\u1175", "\u119e", "\u11a8", "\u11ab", "\u11ae" - "\u11af", "\u11b7" - "\u11b8", "\u11ba", "\u11bc" - "\u11c2", "\u11eb", "\u11f0", "\u11f9", "\u1e00" - "\u1e9b", "\u1ea0" - "\u1ef9", "\u1f00" - "\u1f15", "\u1f18" - "\u1f1d", "\u1f20" - "\u1f45", "\u1f48" - "\u1f4d", "\u1f50" - "\u1f57", "\u1f59", "\u1f5b", "\u1f5d", "\u1f5f" - "\u1f7d", "\u1f80" - "\u1fb4", "\u1fb6" - "\u1fbc", "\u1fbe", "\u1fc2" - "\u1fc4", "\u1fc6" - "\u1fcc", "\u1fd0" - "\u1fd3", "\u1fd6" - "\u1fdb", "\u1fe0" - "\u1fec", "\u1ff2" - "\u1ff4", "\u1ff6" - "\u1ffc", "\u2126", "\u212a" - "\u212b", "\u212e", "\u2180" - "\u2182", "\u3041" - "\u3094", "\u30a1" - "\u30fa", "\u3105" - "\u312c", "\uac00" - "\ud7a3"] >
366}
367
368TOKEN :
369{
370 < #Ideographic : ["\u4e00" - "\u9fa5", "\u3007", "\u3021" - "\u3029"] >
371}
372
373TOKEN :
374{
375 < #CombiningChar : ["\u0300" - "\u0345", "\u0360" - "\u0361", "\u0483" - "\u0486", "\u0591" - "\u05a1", "\u05a3" - "\u05b9", "\u05bb" - "\u05bd", "\u05bf", "\u05c1" - "\u05c2", "\u05c4", "\u064b" - "\u0652", "\u0670", "\u06d6" - "\u06dc", "\u06dd" - "\u06df", "\u06e0" - "\u06e4", "\u06e7" - "\u06e8", "\u06ea" - "\u06ed", "\u0901" - "\u0903", "\u093c", "\u093e" - "\u094c", "\u094d", "\u0951" - "\u0954", "\u0962" - "\u0963", "\u0981" - "\u0983", "\u09bc", "\u09be", "\u09bf", "\u09c0" - "\u09c4", "\u09c7" - "\u09c8", "\u09cb" - "\u09cd", "\u09d7", "\u09e2" - "\u09e3", "\u0a02", "\u0a3c", "\u0a3e", "\u0a3f", "\u0a40" - "\u0a42", "\u0a47" - "\u0a48", "\u0a4b" - "\u0a4d", "\u0a70" - "\u0a71", "\u0a81" - "\u0a83", "\u0abc", "\u0abe" - "\u0ac5", "\u0ac7" - "\u0ac9", "\u0acb" - "\u0acd", "\u0b01" - "\u0b03", "\u0b3c", "\u0b3e" - "\u0b43", "\u0b47" - "\u0b48", "\u0b4b" - "\u0b4d", "\u0b56" - "\u0b57", "\u0b82" - "\u0b83", "\u0bbe" - "\u0bc2", "\u0bc6" - "\u0bc8", "\u0bca" - "\u0bcd", "\u0bd7", "\u0c01" - "\u0c03", "\u0c3e" - "\u0c44", "\u0c46" - "\u0c48", "\u0c4a" - "\u0c4d", "\u0c55" - "\u0c56", "\u0c82" - "\u0c83", "\u0cbe" - "\u0cc4", "\u0cc6" - "\u0cc8", "\u0cca" - "\u0ccd", "\u0cd5" - "\u0cd6", "\u0d02" - "\u0d03", "\u0d3e" - "\u0d43", "\u0d46" - "\u0d48", "\u0d4a" - "\u0d4d", "\u0d57", "\u0e31", "\u0e34" - "\u0e3a", "\u0e47" - "\u0e4e", "\u0eb1", "\u0eb4" - "\u0eb9", "\u0ebb" - "\u0ebc", "\u0ec8" - "\u0ecd", "\u0f18" - "\u0f19", "\u0f35", "\u0f37", "\u0f39", "\u0f3e", "\u0f3f", "\u0f71" - "\u0f84", "\u0f86" - "\u0f8b", "\u0f90" - "\u0f95", "\u0f97", "\u0f99" - "\u0fad", "\u0fb1" - "\u0fb7", "\u0fb9", "\u20d0" - "\u20dc", "\u20e1", "\u302a" - "\u302f", "\u3099", "\u309a"] >
376}
377
378TOKEN :
379{
380 < #Digit : ["\u0030" - "\u0039", "\u0660" - "\u0669", "\u06f0" - "\u06f9", "\u0966" - "\u096f", "\u09e6" - "\u09ef", "\u0a66" - "\u0a6f", "\u0ae6" - "\u0aef", "\u0b66" - "\u0b6f", "\u0be7" - "\u0bef", "\u0c66" - "\u0c6f", "\u0ce6" - "\u0cef", "\u0d66" - "\u0d6f", "\u0e50" - "\u0e59", "\u0ed0" - "\u0ed9", "\u0f20" - "\u0f29"] >
381}
382
383TOKEN :
384{
385 < #Extender : ["\u00b7", "\u02d0", "\u02d1", "\u0387", "\u0640", "\u0e46", "\u0ec6", "\u3005", "\u3031" - "\u3035", "\u309d" - "\u309e", "\u30fc" - "\u30fe"] >
386}