vinayakb | 59d505d | 2012-10-29 10:38:02 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2009-2010 by The Regents of the University of California |
| 3 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | * you may not use this file except in compliance with the License. |
| 5 | * you may obtain a copy of the License from |
| 6 | * |
| 7 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | * |
| 9 | * Unless required by applicable law or agreed to in writing, software |
| 10 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | * See the License for the specific language governing permissions and |
| 13 | * limitations under the License. |
| 14 | */ |
| 15 | options { |
| 16 | STATIC = false; |
| 17 | IGNORE_CASE = true; |
| 18 | LOOKAHEAD = 2; |
| 19 | } |
| 20 | |
| 21 | PARSER_BEGIN(PigletParser) |
| 22 | |
| 23 | package edu.uci.ics.hyracks.algebricks.examples.piglet.parser; |
| 24 | |
| 25 | import java.util.*; |
| 26 | import edu.uci.ics.hyracks.algebricks.common.utils.Pair; |
| 27 | import edu.uci.ics.hyracks.algebricks.examples.piglet.ast.*; |
| 28 | import edu.uci.ics.hyracks.algebricks.examples.piglet.types.*; |
| 29 | |
| 30 | public class PigletParser { |
| 31 | private ExpressionNode createFunction(FunctionTag fTag, String fName, ExpressionNode... arguments) { |
| 32 | List<ASTNode> args = new ArrayList<ASTNode>(); |
| 33 | for(ExpressionNode e : arguments) { |
| 34 | args.add(e); |
| 35 | } |
| 36 | return new ScalarFunctionExpressionNode(fTag, fName, args); |
| 37 | } |
| 38 | |
| 39 | private String stripQuotes(String s) { |
| 40 | s = s.substring(1); |
| 41 | s = s.substring(0, s.length() - 1); |
| 42 | return s; |
| 43 | } |
| 44 | } |
| 45 | |
| 46 | PARSER_END(PigletParser) |
| 47 | |
| 48 | List<ASTNode> Statements(): { |
| 49 | List<ASTNode> statememts = new ArrayList<ASTNode>(); |
| 50 | ASTNode s; |
| 51 | } { |
| 52 | ( |
| 53 | ( |
| 54 | s = AssignmentStatement() { |
| 55 | statememts.add(s); |
| 56 | } |
| 57 | | s = DumpStatement() { |
| 58 | statememts.add(s); |
| 59 | } |
| 60 | ) ";" |
| 61 | )* <EOF> { |
| 62 | return statememts; |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | ASTNode AssignmentStatement(): { |
| 67 | String a; |
| 68 | RelationNode r; |
| 69 | } { |
| 70 | a = Alias() "=" r = RelationalStatement() { |
| 71 | return new AssignmentNode(a, r); |
| 72 | } |
| 73 | } |
| 74 | |
| 75 | String Alias(): { |
| 76 | Token t; |
| 77 | } { |
| 78 | t = <IDENTIFIER> { |
| 79 | return t.image; |
| 80 | } |
| 81 | } |
| 82 | |
| 83 | String ColumnName(): { |
| 84 | Token t; |
| 85 | } { |
| 86 | t = <IDENTIFIER> { |
| 87 | return t.image; |
| 88 | } |
| 89 | } |
| 90 | |
| 91 | Type TypeName(): { |
| 92 | Token t; |
| 93 | } { |
| 94 | "int" { |
| 95 | return IntegerType.INSTANCE; |
| 96 | } |
| 97 | | "long" { |
| 98 | return LongType.INSTANCE; |
| 99 | } |
| 100 | | "float" { |
| 101 | return FloatType.INSTANCE; |
| 102 | } |
| 103 | | "double" { |
| 104 | return DoubleType.INSTANCE; |
| 105 | } |
| 106 | | "chararray" { |
| 107 | return CharArrayType.INSTANCE; |
| 108 | } |
| 109 | | "tuple" { |
| 110 | return new TupleType(); |
| 111 | } |
| 112 | | "bag" { |
| 113 | return new BagType(); |
| 114 | } |
| 115 | | "map" { |
| 116 | return new MapType(); |
| 117 | } |
| 118 | } |
| 119 | |
| 120 | Pair<String, Type> ColumnSchema(): { |
| 121 | String c; |
| 122 | Type t; |
| 123 | } { |
| 124 | c = ColumnName() ":" t = TypeName() { |
| 125 | return new Pair<String, Type>(c, t); |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | Schema Schema(): { |
| 130 | List<Pair<String, Type>> schema = new ArrayList<Pair<String, Type>>(); |
| 131 | Pair<String, Type> cSchema; |
| 132 | } { |
| 133 | "(" ( |
| 134 | cSchema = ColumnSchema() { |
| 135 | schema.add(cSchema); |
| 136 | } ( |
| 137 | "," cSchema = ColumnSchema() { |
| 138 | schema.add(cSchema); |
| 139 | } |
| 140 | )* |
| 141 | )? ")" { |
| 142 | return new Schema(schema); |
| 143 | } |
| 144 | } |
| 145 | |
| 146 | RelationNode RelationalStatement(): { |
| 147 | RelationNode r; |
| 148 | } { |
| 149 | r = LoadStatement() { |
| 150 | return r; |
| 151 | } |
| 152 | | r = FilterStatement() { |
| 153 | return r; |
| 154 | } |
| 155 | } |
| 156 | |
| 157 | RelationNode LoadStatement(): { |
| 158 | Token t; |
| 159 | Schema s; |
| 160 | } { |
| 161 | "load" t = <STRING_LITERAL> "as" s = Schema() { |
| 162 | return new LoadNode(t.image, s); |
| 163 | } |
| 164 | } |
| 165 | |
| 166 | RelationNode FilterStatement(): { |
| 167 | String a; |
| 168 | ExpressionNode e; |
| 169 | } { |
| 170 | "filter" a = Alias() "by" e = Expression() { |
| 171 | return new FilterNode(a, e); |
| 172 | } |
| 173 | } |
| 174 | |
| 175 | ASTNode DumpStatement(): { |
| 176 | String a; |
| 177 | Token t; |
| 178 | } { |
| 179 | "dump" a = Alias() "into" t = <STRING_LITERAL> { |
| 180 | return new DumpNode(t.image, a); |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | ExpressionNode Expression(): { |
| 185 | ExpressionNode e; |
| 186 | } { |
| 187 | e = OrExpression() { |
| 188 | return e; |
| 189 | } |
| 190 | } |
| 191 | |
| 192 | ExpressionNode OrExpression(): { |
| 193 | ExpressionNode e1; |
| 194 | ExpressionNode e2; |
| 195 | } { |
| 196 | e1 = AndExpression() ( |
| 197 | "or" e2 = AndExpression() { |
| 198 | e1 = createFunction(FunctionTag.BOOLEAN_OR, null, e1, e2); |
| 199 | } |
| 200 | )* { |
| 201 | return e1; |
| 202 | } |
| 203 | } |
| 204 | |
| 205 | ExpressionNode AndExpression(): { |
| 206 | ExpressionNode e1; |
| 207 | ExpressionNode e2; |
| 208 | } { |
| 209 | e1 = ComparisonExpression() ( |
| 210 | "and" e2 = ComparisonExpression() { |
| 211 | e1 = createFunction(FunctionTag.BOOLEAN_AND, null, e1, e2); |
| 212 | } |
| 213 | )* { |
| 214 | return e1; |
| 215 | } |
| 216 | } |
| 217 | |
| 218 | ExpressionNode ComparisonExpression(): { |
| 219 | ExpressionNode e1; |
| 220 | ExpressionNode e2; |
| 221 | FunctionTag fTag; |
| 222 | } { |
| 223 | e1 = AdditiveExpression() ( |
| 224 | fTag = ComparisonOperator() e2 = AdditiveExpression() { |
| 225 | e1 = createFunction(fTag, null, e1, e2); |
| 226 | } |
| 227 | )? { |
| 228 | return e1; |
| 229 | } |
| 230 | } |
| 231 | |
| 232 | FunctionTag ComparisonOperator(): { |
| 233 | } { |
| 234 | "==" { |
| 235 | return FunctionTag.EQ; |
| 236 | } |
| 237 | | "!=" { |
| 238 | return FunctionTag.NEQ; |
| 239 | } |
| 240 | | "<" { |
| 241 | return FunctionTag.LT; |
| 242 | } |
| 243 | | "<=" { |
| 244 | return FunctionTag.LTE; |
| 245 | } |
| 246 | | ">" { |
| 247 | return FunctionTag.GT; |
| 248 | } |
| 249 | | ">=" { |
| 250 | return FunctionTag.GTE; |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | ExpressionNode AdditiveExpression(): { |
| 255 | ExpressionNode e1; |
| 256 | ExpressionNode e2; |
| 257 | FunctionTag fTag; |
| 258 | } { |
| 259 | e1 = MultiplicativeExpression() ( |
| 260 | fTag = AdditiveOperator() e2 = MultiplicativeExpression() { |
| 261 | e1 = createFunction(fTag, null, e1, e2); |
| 262 | } |
| 263 | )* { |
| 264 | return e1; |
| 265 | } |
| 266 | } |
| 267 | |
| 268 | FunctionTag AdditiveOperator(): { |
| 269 | } { |
| 270 | "+" { |
| 271 | return FunctionTag.ADD; |
| 272 | } |
| 273 | | "-" { |
| 274 | return FunctionTag.SUBTRACT; |
| 275 | } |
| 276 | } |
| 277 | |
| 278 | ExpressionNode MultiplicativeExpression(): { |
| 279 | ExpressionNode e1; |
| 280 | ExpressionNode e2; |
| 281 | FunctionTag fTag; |
| 282 | } { |
| 283 | e1 = PrimaryExpression() ( |
| 284 | fTag = MultiplicativeOperator() e2 = PrimaryExpression() { |
| 285 | e1 = createFunction(fTag, null, e1, e2); |
| 286 | } |
| 287 | )* { |
| 288 | return e1; |
| 289 | } |
| 290 | } |
| 291 | |
| 292 | FunctionTag MultiplicativeOperator(): { |
| 293 | } { |
| 294 | "*" { |
| 295 | return FunctionTag.MULTIPLY; |
| 296 | } |
| 297 | | "/" { |
| 298 | return FunctionTag.DIVIDE; |
| 299 | } |
| 300 | | "%" { |
| 301 | return FunctionTag.MOD; |
| 302 | } |
| 303 | } |
| 304 | |
| 305 | ExpressionNode PrimaryExpression(): { |
| 306 | ExpressionNode e; |
| 307 | } { |
| 308 | e = Literal() { |
| 309 | return e; |
| 310 | } |
| 311 | | e = FieldAccess() { |
| 312 | return e; |
| 313 | } |
| 314 | } |
| 315 | |
| 316 | ExpressionNode Literal(): { |
| 317 | Token t; |
| 318 | } { |
| 319 | t = <STRING_LITERAL> { |
| 320 | return new LiteralExpressionNode(stripQuotes(t.image), CharArrayType.INSTANCE); |
| 321 | } |
| 322 | | t = <INTEGER_LITERAL> { |
| 323 | return new LiteralExpressionNode(t.image, IntegerType.INSTANCE); |
| 324 | } |
| 325 | | t = <DOUBLE_LITERAL> { |
| 326 | return new LiteralExpressionNode(t.image, DoubleType.INSTANCE); |
| 327 | } |
| 328 | } |
| 329 | |
| 330 | ExpressionNode FieldAccess(): { |
| 331 | String relName = null; |
| 332 | Token fieldName; |
| 333 | } { |
| 334 | (relName = Alias() ".")? fieldName = <IDENTIFIER> { |
| 335 | return new FieldAccessExpressionNode(relName, fieldName.image); |
| 336 | } |
| 337 | } |
| 338 | |
| 339 | <DEFAULT> |
| 340 | TOKEN : { |
| 341 | <STRING_LITERAL: (("\"" (~["\"", "\n"])* "\"") | ("'" (~["'", "\n"])* "'"))> |
| 342 | | <IDENTIFIER: <Letter> (<Letter> | <Digit> | <Extender>)*> |
| 343 | | <INTEGER_LITERAL: (<Digit>)+> |
| 344 | | <DOUBLE_LITERAL: (((<Digit>)* "." (<Digit>)+ (<Exponent>)?) | (<INTEGER_LITERAL> <Exponent>))> |
| 345 | | <INDEXED_FIELD: ("$" <INTEGER_LITERAL>)> |
| 346 | } |
| 347 | |
| 348 | TOKEN : |
| 349 | { |
| 350 | < #Exponent : ((["+", "-"])? ["E", "e"] <INTEGER_LITERAL>)> |
| 351 | } |
| 352 | |
| 353 | SPECIAL_TOKEN : |
| 354 | { |
| 355 | < WhitespaceChar : ["\t", "\r", "\n", " "] > |
| 356 | } |
| 357 | |
| 358 | TOKEN : |
| 359 | { |
| 360 | < #Letter : (<BaseChar> | <Ideographic>) > |
| 361 | } |
| 362 | |
| 363 | TOKEN : |
| 364 | { |
| 365 | < #BaseChar : ["\u0041" - "\u005a", "\u0061" - "\u007a", "\u00c0" - "\u00d6", "\u00d8" - "\u00f6", "\u00f8" - "\u00ff", "\u0100" - "\u0131", "\u0134" - "\u013e", "\u0141" - "\u0148", "\u014a" - "\u017e", "\u0180" - "\u01c3", "\u01cd" - "\u01f0", "\u01f4" - "\u01f5", "\u01fa" - "\u0217", "\u0250" - "\u02a8", "\u02bb" - "\u02c1", "\u0386", "\u0388" - "\u038a", "\u038c", "\u038e" - "\u03a1", "\u03a3" - "\u03ce", "\u03d0" - "\u03d6", "\u03da", "\u03dc", "\u03de", "\u03e0", "\u03e2" - "\u03f3", "\u0401" - "\u040c", "\u040e" - "\u044f", "\u0451" - "\u045c", "\u045e" - "\u0481", "\u0490" - "\u04c4", "\u04c7" - "\u04c8", "\u04cb" - "\u04cc", "\u04d0" - "\u04eb", "\u04ee" - "\u04f5", "\u04f8" - "\u04f9", "\u0531" - "\u0556", "\u0559", "\u0561" - "\u0586", "\u05d0" - "\u05ea", "\u05f0" - "\u05f2", "\u0621" - "\u063a", "\u0641" - "\u064a", "\u0671" - "\u06b7", "\u06ba" - "\u06be", "\u06c0" - "\u06ce", "\u06d0" - "\u06d3", "\u06d5", "\u06e5" - "\u06e6", "\u0905" - "\u0939", "\u093d", "\u0958" - "\u0961", "\u0985" - "\u098c", "\u098f" - "\u0990", "\u0993" - "\u09a8", "\u09aa" - "\u09b0", "\u09b2", "\u09b6" - "\u09b9", "\u09dc" - "\u09dd", "\u09df" - "\u09e1", "\u09f0" - "\u09f1", "\u0a05" - "\u0a0a", "\u0a0f" - "\u0a10", "\u0a13" - "\u0a28", "\u0a2a" - "\u0a30", "\u0a32" - "\u0a33", "\u0a35" - "\u0a36", "\u0a38" - "\u0a39", "\u0a59" - "\u0a5c", "\u0a5e", "\u0a72" - "\u0a74", "\u0a85" - "\u0a8b", "\u0a8d", "\u0a8f" - "\u0a91", "\u0a93" - "\u0aa8", "\u0aaa" - "\u0ab0", "\u0ab2" - "\u0ab3", "\u0ab5" - "\u0ab9", "\u0abd", "\u0ae0", "\u0b05" - "\u0b0c", "\u0b0f" - "\u0b10", "\u0b13" - "\u0b28", "\u0b2a" - "\u0b30", "\u0b32" - "\u0b33", "\u0b36" - "\u0b39", "\u0b3d", "\u0b5c" - "\u0b5d", "\u0b5f" - "\u0b61", "\u0b85" - "\u0b8a", "\u0b8e" - "\u0b90", "\u0b92" - "\u0b95", "\u0b99" - "\u0b9a", "\u0b9c", "\u0b9e" - "\u0b9f", "\u0ba3" - "\u0ba4", "\u0ba8" - "\u0baa", "\u0bae" - "\u0bb5", "\u0bb7" - "\u0bb9", "\u0c05" - "\u0c0c", "\u0c0e" - "\u0c10", "\u0c12" - "\u0c28", "\u0c2a" - "\u0c33", "\u0c35" - "\u0c39", "\u0c60" - "\u0c61", "\u0c85" - "\u0c8c", "\u0c8e" - "\u0c90", "\u0c92" - "\u0ca8", "\u0caa" - "\u0cb3", "\u0cb5" - "\u0cb9", "\u0cde", "\u0ce0" - "\u0ce1", "\u0d05" - "\u0d0c", "\u0d0e" - "\u0d10", "\u0d12" - "\u0d28", "\u0d2a" - "\u0d39", "\u0d60" - "\u0d61", "\u0e01" - "\u0e2e", "\u0e30", "\u0e32" - "\u0e33", "\u0e40" - "\u0e45", "\u0e81" - "\u0e82", "\u0e84", "\u0e87" - "\u0e88", "\u0e8a", "\u0e8d", "\u0e94" - "\u0e97", "\u0e99" - "\u0e9f", "\u0ea1" - "\u0ea3", "\u0ea5", "\u0ea7", "\u0eaa" - "\u0eab", "\u0ead" - "\u0eae", "\u0eb0", "\u0eb2" - "\u0eb3", "\u0ebd", "\u0ec0" - "\u0ec4", "\u0f40" - "\u0f47", "\u0f49" - "\u0f69", "\u10a0" - "\u10c5", "\u10d0" - "\u10f6", "\u1100", "\u1102" - "\u1103", "\u1105" - "\u1107", "\u1109", "\u110b" - "\u110c", "\u110e" - "\u1112", "\u113c", "\u113e", "\u1140", "\u114c", "\u114e", "\u1150", "\u1154" - "\u1155", "\u1159", "\u115f" - "\u1161", "\u1163", "\u1165", "\u1167", "\u1169", "\u116d" - "\u116e", "\u1172" - "\u1173", "\u1175", "\u119e", "\u11a8", "\u11ab", "\u11ae" - "\u11af", "\u11b7" - "\u11b8", "\u11ba", "\u11bc" - "\u11c2", "\u11eb", "\u11f0", "\u11f9", "\u1e00" - "\u1e9b", "\u1ea0" - "\u1ef9", "\u1f00" - "\u1f15", "\u1f18" - "\u1f1d", "\u1f20" - "\u1f45", "\u1f48" - "\u1f4d", "\u1f50" - "\u1f57", "\u1f59", "\u1f5b", "\u1f5d", "\u1f5f" - "\u1f7d", "\u1f80" - "\u1fb4", "\u1fb6" - "\u1fbc", "\u1fbe", "\u1fc2" - "\u1fc4", "\u1fc6" - "\u1fcc", "\u1fd0" - "\u1fd3", "\u1fd6" - "\u1fdb", "\u1fe0" - "\u1fec", "\u1ff2" - "\u1ff4", "\u1ff6" - "\u1ffc", "\u2126", "\u212a" - "\u212b", "\u212e", "\u2180" - "\u2182", "\u3041" - "\u3094", "\u30a1" - "\u30fa", "\u3105" - "\u312c", "\uac00" - "\ud7a3"] > |
| 366 | } |
| 367 | |
| 368 | TOKEN : |
| 369 | { |
| 370 | < #Ideographic : ["\u4e00" - "\u9fa5", "\u3007", "\u3021" - "\u3029"] > |
| 371 | } |
| 372 | |
| 373 | TOKEN : |
| 374 | { |
| 375 | < #CombiningChar : ["\u0300" - "\u0345", "\u0360" - "\u0361", "\u0483" - "\u0486", "\u0591" - "\u05a1", "\u05a3" - "\u05b9", "\u05bb" - "\u05bd", "\u05bf", "\u05c1" - "\u05c2", "\u05c4", "\u064b" - "\u0652", "\u0670", "\u06d6" - "\u06dc", "\u06dd" - "\u06df", "\u06e0" - "\u06e4", "\u06e7" - "\u06e8", "\u06ea" - "\u06ed", "\u0901" - "\u0903", "\u093c", "\u093e" - "\u094c", "\u094d", "\u0951" - "\u0954", "\u0962" - "\u0963", "\u0981" - "\u0983", "\u09bc", "\u09be", "\u09bf", "\u09c0" - "\u09c4", "\u09c7" - "\u09c8", "\u09cb" - "\u09cd", "\u09d7", "\u09e2" - "\u09e3", "\u0a02", "\u0a3c", "\u0a3e", "\u0a3f", "\u0a40" - "\u0a42", "\u0a47" - "\u0a48", "\u0a4b" - "\u0a4d", "\u0a70" - "\u0a71", "\u0a81" - "\u0a83", "\u0abc", "\u0abe" - "\u0ac5", "\u0ac7" - "\u0ac9", "\u0acb" - "\u0acd", "\u0b01" - "\u0b03", "\u0b3c", "\u0b3e" - "\u0b43", "\u0b47" - "\u0b48", "\u0b4b" - "\u0b4d", "\u0b56" - "\u0b57", "\u0b82" - "\u0b83", "\u0bbe" - "\u0bc2", "\u0bc6" - "\u0bc8", "\u0bca" - "\u0bcd", "\u0bd7", "\u0c01" - "\u0c03", "\u0c3e" - "\u0c44", "\u0c46" - "\u0c48", "\u0c4a" - "\u0c4d", "\u0c55" - "\u0c56", "\u0c82" - "\u0c83", "\u0cbe" - "\u0cc4", "\u0cc6" - "\u0cc8", "\u0cca" - "\u0ccd", "\u0cd5" - "\u0cd6", "\u0d02" - "\u0d03", "\u0d3e" - "\u0d43", "\u0d46" - "\u0d48", "\u0d4a" - "\u0d4d", "\u0d57", "\u0e31", "\u0e34" - "\u0e3a", "\u0e47" - "\u0e4e", "\u0eb1", "\u0eb4" - "\u0eb9", "\u0ebb" - "\u0ebc", "\u0ec8" - "\u0ecd", "\u0f18" - "\u0f19", "\u0f35", "\u0f37", "\u0f39", "\u0f3e", "\u0f3f", "\u0f71" - "\u0f84", "\u0f86" - "\u0f8b", "\u0f90" - "\u0f95", "\u0f97", "\u0f99" - "\u0fad", "\u0fb1" - "\u0fb7", "\u0fb9", "\u20d0" - "\u20dc", "\u20e1", "\u302a" - "\u302f", "\u3099", "\u309a"] > |
| 376 | } |
| 377 | |
| 378 | TOKEN : |
| 379 | { |
| 380 | < #Digit : ["\u0030" - "\u0039", "\u0660" - "\u0669", "\u06f0" - "\u06f9", "\u0966" - "\u096f", "\u09e6" - "\u09ef", "\u0a66" - "\u0a6f", "\u0ae6" - "\u0aef", "\u0b66" - "\u0b6f", "\u0be7" - "\u0bef", "\u0c66" - "\u0c6f", "\u0ce6" - "\u0cef", "\u0d66" - "\u0d6f", "\u0e50" - "\u0e59", "\u0ed0" - "\u0ed9", "\u0f20" - "\u0f29"] > |
| 381 | } |
| 382 | |
| 383 | TOKEN : |
| 384 | { |
| 385 | < #Extender : ["\u00b7", "\u02d0", "\u02d1", "\u0387", "\u0640", "\u0e46", "\u0ec6", "\u3005", "\u3031" - "\u3035", "\u309d" - "\u309e", "\u30fc" - "\u30fe"] > |
| 386 | } |