support for JSON escapes in ADM and AQL parser
fix https://code.google.com/p/asterixdb/issues/detail?id=752
fix https://code.google.com/p/asterixdb/issues/detail?id=753
introduce PrintTools.writeUTF8StringWithEscapes to replace
Hyracks' WriteValueTools.writeUTF8String as this serialization
is ADM-specific
new method AQLParser.parse for better exception handling
more robust extraction of line numbers from error messages
Change-Id: I2e9bea7658d00032a3ac7a1d107eba8f17423eeb
Reviewed-on: http://fulliautomatix.ics.uci.edu:8443/75
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Chris Hillery <ceej@lambda.nu>
diff --git a/asterix-app/data/escapes.adm b/asterix-app/data/escapes01.adm
similarity index 100%
rename from asterix-app/data/escapes.adm
rename to asterix-app/data/escapes01.adm
diff --git a/asterix-app/data/escapes02.adm b/asterix-app/data/escapes02.adm
new file mode 100644
index 0000000..23c6cdf
--- /dev/null
+++ b/asterix-app/data/escapes02.adm
@@ -0,0 +1,14 @@
+{ "id": "s00", "val": "1\f2\n3\t4\r56\b7\"8" }
+{ "id": "s01", "val": "-\u0000-\u0001-\u000a-\u0020-\u007f-\u0080-\u009f-\u00a0-" }
+{ "id": "s02", "val": "\"\\\"" }
+{ "id": "s03", "val": "\"\\\\\"" }
+{ "id": "s04", "val": "\"\\\\\\\"" }
+{ "id": "s05", "val": "\"\\ \\ \\\"" }
+{ "id": "s06", "val": "\" \\t \\\\ \\\\t \\\" \" \" a b c d e \" \" \\t \\b \\r \\f \\n ast\" " }
+{ "id": "s07", "val": "\\" }
+{ "id": "s08", "val": "\\\\" }
+{ "id": "s09", "val": "\\\\\\" }
+{ "id": "s10", "val": "\\ \\ \\" }
+{ "id": "s11", "val": " \\t \\\\ \\\\t \\\" \" \" a b c d e \" \" \\t \\b \\r \\f \\n ast " }
+{ "id": "s12", "val": " \\t \\\\ \\\\t \\\" \" \" a b c d e \" \" \t \b \r \f \n ast " }
+{ "id": "s13", "val": "\" \\t \\\\ \\\\t \\\" \" \" a b c\\'a\\' d e \" \" \t \b \r \f \n ast \"" }
diff --git a/asterix-app/src/main/java/edu/uci/ics/asterix/api/http/servlet/APIServlet.java b/asterix-app/src/main/java/edu/uci/ics/asterix/api/http/servlet/APIServlet.java
index d62cc9d..e947b10 100644
--- a/asterix-app/src/main/java/edu/uci/ics/asterix/api/http/servlet/APIServlet.java
+++ b/asterix-app/src/main/java/edu/uci/ics/asterix/api/http/servlet/APIServlet.java
@@ -3,9 +3,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -86,7 +86,7 @@
}
}
AQLParser parser = new AQLParser(query);
- List<Statement> aqlStatements = parser.Statement();
+ List<Statement> aqlStatements = parser.parse();
SessionConfig sessionConfig = new SessionConfig(true, isSet(printExprParam),
isSet(printRewrittenExprParam), isSet(printLogicalPlanParam),
isSet(printOptimizedLogicalPlanParam), false, isSet(executeQuery), true, isSet(printJob));
diff --git a/asterix-app/src/main/java/edu/uci/ics/asterix/api/http/servlet/RESTAPIServlet.java b/asterix-app/src/main/java/edu/uci/ics/asterix/api/http/servlet/RESTAPIServlet.java
index fd3dea2..f37d83c 100644
--- a/asterix-app/src/main/java/edu/uci/ics/asterix/api/http/servlet/RESTAPIServlet.java
+++ b/asterix-app/src/main/java/edu/uci/ics/asterix/api/http/servlet/RESTAPIServlet.java
@@ -3,9 +3,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -100,7 +100,7 @@
}
AQLParser parser = new AQLParser(query);
- List<Statement> aqlStatements = parser.Statement();
+ List<Statement> aqlStatements = parser.parse();
if (!containsForbiddenStatements(aqlStatements)) {
SessionConfig sessionConfig = new SessionConfig(true, false, false, false, false, false, true, true,
false);
diff --git a/asterix-app/src/main/java/edu/uci/ics/asterix/api/java/AsterixJavaClient.java b/asterix-app/src/main/java/edu/uci/ics/asterix/api/java/AsterixJavaClient.java
index da52c4d..11bcc6a 100644
--- a/asterix-app/src/main/java/edu/uci/ics/asterix/api/java/AsterixJavaClient.java
+++ b/asterix-app/src/main/java/edu/uci/ics/asterix/api/java/AsterixJavaClient.java
@@ -3,9 +3,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -70,7 +70,7 @@
AQLParser parser = new AQLParser(builder.toString());
List<Statement> aqlStatements;
try {
- aqlStatements = parser.Statement();
+ aqlStatements = parser.parse();
} catch (ParseException pe) {
throw new AsterixException(pe);
}
diff --git a/asterix-app/src/main/java/edu/uci/ics/asterix/result/ResultUtils.java b/asterix-app/src/main/java/edu/uci/ics/asterix/result/ResultUtils.java
index e1b098b..5c86254 100644
--- a/asterix-app/src/main/java/edu/uci/ics/asterix/result/ResultUtils.java
+++ b/asterix-app/src/main/java/edu/uci/ics/asterix/result/ResultUtils.java
@@ -3,9 +3,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -23,6 +23,8 @@
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.json.JSONArray;
import org.json.JSONException;
@@ -38,15 +40,15 @@
public class ResultUtils {
static Map<Character, String> HTML_ENTITIES = new HashMap<Character, String>();
-
+
static {
HTML_ENTITIES.put('"', """);
HTML_ENTITIES.put('&', "&");
HTML_ENTITIES.put('<', "<");
HTML_ENTITIES.put('>', ">");
}
-
- public static String escapeHTML(String s) {
+
+ public static String escapeHTML(String s) {
for (Character c : HTML_ENTITIES.keySet()) {
if (s.indexOf(c) >= 0) {
s = s.replace(c.toString(), HTML_ENTITIES.get(c));
@@ -114,7 +116,7 @@
escapeHTML(extractErrorSummary(e)), escapeHTML(extractFullStackTrace(e)));
out.println(errorOutput);
}
-
+
public static void webUIParseExceptionHandler(PrintWriter out, Throwable e, String query) {
String errorTemplate = readTemplateFile("/webui/errortemplate_message.html", "<pre class=\"error\">%s\n</pre>");
@@ -142,17 +144,20 @@
String message = e.getMessage();
message = message.replace("<", "<");
message = message.replace(">", ">");
- errorMessage.append("SyntaxError:" + message + "\n");
+ errorMessage.append("SyntaxError: " + message + "\n");
int pos = message.indexOf("line");
if (pos > 0) {
- int columnPos = message.indexOf(",", pos + 1 + "line".length());
- int lineNo = Integer.parseInt(message.substring(pos + "line".length() + 1, columnPos));
- String[] lines = query.split("\n");
- if (lineNo >= lines.length) {
- errorMessage.append("===> <BLANK LINE> \n");
- } else {
- String line = lines[lineNo - 1];
- errorMessage.append("==> " + line);
+ Pattern p = Pattern.compile("\\d+");
+ Matcher m = p.matcher(message);
+ if (m.find(pos)) {
+ int lineNo = Integer.parseInt(message.substring(m.start(), m.end()));
+ String[] lines = query.split("\n");
+ if (lineNo > lines.length) {
+ errorMessage.append("===> <BLANK LINE> \n");
+ } else {
+ String line = lines[lineNo - 1];
+ errorMessage.append("==> " + line);
+ }
}
}
return errorMessage.toString();
@@ -169,7 +174,7 @@
/**
* Extract the message in the root cause of the stack trace:
- *
+ *
* @param e
* @return error message string.
*/
@@ -193,7 +198,7 @@
* Extract the meaningful part of a stack trace:
* a. the causes in the stack trace hierarchy
* b. the top exception for each cause
- *
+ *
* @param e
* @return the contacted message containing a and b.
*/
@@ -211,7 +216,7 @@
/**
* Extract the full stack trace:
- *
+ *
* @param e
* @return the string containing the full stack trace of the error.
*/
@@ -225,7 +230,7 @@
/**
* Read the template file which is stored as a resource and return its content. If the file does not exist or is
* not readable return the default template string.
- *
+ *
* @param path
* The path to the resource template file
* @param defaultTemplate
diff --git a/asterix-app/src/test/java/edu/uci/ics/asterix/test/aql/AQLTestCase.java b/asterix-app/src/test/java/edu/uci/ics/asterix/test/aql/AQLTestCase.java
index 2236f49..7b9c08b 100644
--- a/asterix-app/src/test/java/edu/uci/ics/asterix/test/aql/AQLTestCase.java
+++ b/asterix-app/src/test/java/edu/uci/ics/asterix/test/aql/AQLTestCase.java
@@ -3,9 +3,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -53,7 +53,7 @@
List<Statement> statements;
GlobalConfig.ASTERIX_LOGGER.info(queryFile.toString());
try {
- statements = parser.Statement();
+ statements = parser.parse();
} catch (ParseException e) {
GlobalConfig.ASTERIX_LOGGER.warning("Failed while testing file " + fis);
StringWriter sw = new StringWriter();
diff --git a/asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.2.update.aql
deleted file mode 100644
index da559fa..0000000
--- a/asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.2.update.aql
+++ /dev/null
@@ -1,3 +0,0 @@
-use dataverse TestDataverse;
-
-load dataset TestSet using localfs (("path"="nc1://data/escapes.adm"),("format"="adm"));
diff --git a/asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/load/escapes01/escapes01.1.ddl.aql
similarity index 100%
rename from asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.1.ddl.aql
rename to asterix-app/src/test/resources/runtimets/queries/load/escapes01/escapes01.1.ddl.aql
diff --git a/asterix-app/src/test/resources/runtimets/queries/load/escapes01/escapes01.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/load/escapes01/escapes01.2.update.aql
new file mode 100644
index 0000000..1612648
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/escapes01/escapes01.2.update.aql
@@ -0,0 +1,3 @@
+use dataverse TestDataverse;
+
+load dataset TestSet using localfs (("path"="nc1://data/escapes01.adm"),("format"="adm"));
diff --git a/asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.3.query.aql b/asterix-app/src/test/resources/runtimets/queries/load/escapes01/escapes01.3.query.aql
similarity index 100%
rename from asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.3.query.aql
rename to asterix-app/src/test/resources/runtimets/queries/load/escapes01/escapes01.3.query.aql
diff --git a/asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/load/escapes02/escapes02.1.ddl.aql
similarity index 100%
copy from asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.1.ddl.aql
copy to asterix-app/src/test/resources/runtimets/queries/load/escapes02/escapes02.1.ddl.aql
diff --git a/asterix-app/src/test/resources/runtimets/queries/load/escapes02/escapes02.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/load/escapes02/escapes02.2.update.aql
new file mode 100644
index 0000000..9d335fd
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/escapes02/escapes02.2.update.aql
@@ -0,0 +1,3 @@
+use dataverse TestDataverse;
+
+load dataset TestSet using localfs (("path"="nc1://data/escapes02.adm"),("format"="adm"));
diff --git a/asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.3.query.aql b/asterix-app/src/test/resources/runtimets/queries/load/escapes02/escapes02.3.query.aql
similarity index 100%
copy from asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.3.query.aql
copy to asterix-app/src/test/resources/runtimets/queries/load/escapes02/escapes02.3.query.aql
diff --git a/asterix-app/src/test/resources/runtimets/queries/string/escapes01/escapes01.3.query.aql b/asterix-app/src/test/resources/runtimets/queries/string/escapes01/escapes01.3.query.aql
new file mode 100644
index 0000000..6726d39
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/string/escapes01/escapes01.3.query.aql
@@ -0,0 +1 @@
+string-concat(["1\f2\n3\t4\r56\b7\"8" , '\'9'])
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/string/escapes02/escapes02.3.query.aql b/asterix-app/src/test/resources/runtimets/queries/string/escapes02/escapes02.3.query.aql
new file mode 100644
index 0000000..c89f5cb
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/string/escapes02/escapes02.3.query.aql
@@ -0,0 +1,17 @@
+for $s in [
+ "1\f2\n3\t4\r56\b7\"8",
+ "-\u0000-\u0001-\u000a-\u0020-\u007f-\u0080-\u009f-\u00a0-",
+ "\"\\\"",
+ "\"\\\\\"",
+ "\"\\\\\\\"",
+ "\"\\ \\ \\\"",
+ "\" \\t \\\\ \\\\t \\\" \" \" a b c d e \" \" \\t \\b \\r \\f \\n ast\" ",
+ "\\",
+ "\\\\",
+ "\\\\\\",
+ "\\ \\ \\",
+ " \\t \\\\ \\\\t \\\" \" \" a b c d e \" \" \\t \\b \\r \\f \\n ast ",
+ " \\t \\\\ \\\\t \\\" \" \" a b c d e \" \" \t \b \r \f \n ast ",
+ "\" \\t \\\\ \\\\t \\\" \" \" a b c\\'a\\' d e \" \" \t \b \r \f \n ast \""
+ ]
+return { "s": $s }
diff --git a/asterix-app/src/test/resources/runtimets/results/load/escapes/escapes.1.adm b/asterix-app/src/test/resources/runtimets/results/load/escapes01/escapes01.1.adm
similarity index 100%
rename from asterix-app/src/test/resources/runtimets/results/load/escapes/escapes.1.adm
rename to asterix-app/src/test/resources/runtimets/results/load/escapes01/escapes01.1.adm
diff --git a/asterix-app/src/test/resources/runtimets/results/load/escapes02/escapes02.1.adm b/asterix-app/src/test/resources/runtimets/results/load/escapes02/escapes02.1.adm
new file mode 100644
index 0000000..6be081f
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/load/escapes02/escapes02.1.adm
@@ -0,0 +1,14 @@
+{ "id": "s00", "val": "1\f2\n3\t4\r56\b7\"8" }
+{ "id": "s01", "val": "-\u0000-\u0001-\n- -\u007f-\u0080-\u009f- -" }
+{ "id": "s02", "val": "\"\\\"" }
+{ "id": "s03", "val": "\"\\\\\"" }
+{ "id": "s04", "val": "\"\\\\\\\"" }
+{ "id": "s05", "val": "\"\\ \\ \\\"" }
+{ "id": "s06", "val": "\" \\t \\\\ \\\\t \\\" \" \" a b c d e \" \" \\t \\b \\r \\f \\n ast\" " }
+{ "id": "s07", "val": "\\" }
+{ "id": "s08", "val": "\\\\" }
+{ "id": "s09", "val": "\\\\\\" }
+{ "id": "s10", "val": "\\ \\ \\" }
+{ "id": "s11", "val": " \\t \\\\ \\\\t \\\" \" \" a b c d e \" \" \\t \\b \\r \\f \\n ast " }
+{ "id": "s12", "val": " \\t \\\\ \\\\t \\\" \" \" a b c d e \" \" \t \b \r \f \n ast " }
+{ "id": "s13", "val": "\" \\t \\\\ \\\\t \\\" \" \" a b c\\'a\\' d e \" \" \t \b \r \f \n ast \"" }
diff --git a/asterix-app/src/test/resources/runtimets/results/string/escapes01/escapes01.1.adm b/asterix-app/src/test/resources/runtimets/results/string/escapes01/escapes01.1.adm
new file mode 100644
index 0000000..669e476
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/escapes01/escapes01.1.adm
@@ -0,0 +1 @@
+"1\f2\n3\t4\r56\b7\"8'9"
diff --git a/asterix-app/src/test/resources/runtimets/results/string/escapes02/escapes02.1.adm b/asterix-app/src/test/resources/runtimets/results/string/escapes02/escapes02.1.adm
new file mode 100644
index 0000000..667dc5e
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/escapes02/escapes02.1.adm
@@ -0,0 +1,14 @@
+{ "s": "1\f2\n3\t4\r56\b7\"8" }
+{ "s": "-\u0000-\u0001-\n- -\u007f-\u0080-\u009f- -" }
+{ "s": "\"\\\"" }
+{ "s": "\"\\\\\"" }
+{ "s": "\"\\\\\\\"" }
+{ "s": "\"\\ \\ \\\"" }
+{ "s": "\" \\t \\\\ \\\\t \\\" \" \" a b c d e \" \" \\t \\b \\r \\f \\n ast\" " }
+{ "s": "\\" }
+{ "s": "\\\\" }
+{ "s": "\\\\\\" }
+{ "s": "\\ \\ \\" }
+{ "s": " \\t \\\\ \\\\t \\\" \" \" a b c d e \" \" \\t \\b \\r \\f \\n ast " }
+{ "s": " \\t \\\\ \\\\t \\\" \" \" a b c d e \" \" \t \b \r \f \n ast " }
+{ "s": "\" \\t \\\\ \\\\t \\\" \" \" a b c\\'a\\' d e \" \" \t \b \r \f \n ast \"" }
diff --git a/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterix-app/src/test/resources/runtimets/testsuite.xml
index d4facd7..40cc84c 100644
--- a/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -3879,6 +3879,16 @@
</compilation-unit>
</test-case>
<test-case FilePath="string">
+ <compilation-unit name="escapes01">
+ <output-dir compare="Text">escapes01</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="string">
+ <compilation-unit name="escapes02">
+ <output-dir compare="Text">escapes02</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="string">
<compilation-unit name="length_01">
<output-dir compare="Text">length_01</output-dir>
</compilation-unit>
@@ -4801,8 +4811,13 @@
</compilation-unit>
</test-case>
<test-case FilePath="load">
- <compilation-unit name="escapes">
- <output-dir compare="Text">escapes</output-dir>
+ <compilation-unit name="escapes01">
+ <output-dir compare="Text">escapes01</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="load">
+ <compilation-unit name="escapes02">
+ <output-dir compare="Text">escapes02</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="load">
diff --git a/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/expression/ConnectFeedStatement.java b/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/expression/ConnectFeedStatement.java
index 3ffc1cc..25acff8 100644
--- a/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/expression/ConnectFeedStatement.java
+++ b/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/expression/ConnectFeedStatement.java
@@ -3,9 +3,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -129,7 +129,7 @@
List<Statement> statements;
try {
- statements = parser.Statement();
+ statements = parser.parse();
query = ((InsertStatement) statements.get(1)).getQuery();
} catch (ParseException pe) {
throw new MetadataException(pe);
diff --git a/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/parser/ScopeChecker.java b/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/parser/ScopeChecker.java
index 12c5de6..ef2112a 100644
--- a/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/parser/ScopeChecker.java
+++ b/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/parser/ScopeChecker.java
@@ -3,9 +3,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -47,7 +47,7 @@
/**
* Create a new scope, using the top scope in scopeStack as parent scope
- *
+ *
* @param scopeStack
* @return new scope
*/
@@ -59,7 +59,7 @@
/**
* Extend the current scope
- *
+ *
* @param scopeStack
* @return
*/
@@ -87,7 +87,7 @@
/**
* Remove current scope
- *
+ *
* @return
*/
public final Scope removeCurrentScope() {
@@ -96,7 +96,7 @@
/**
* get current scope
- *
+ *
* @return
*/
public final Scope getCurrentScope() {
@@ -105,7 +105,7 @@
/**
* find symbol in the scope
- *
+ *
* @return identifier
*/
public final Identifier lookupSymbol(String name) {
@@ -118,7 +118,7 @@
/**
* find FunctionSignature in the scope
- *
+ *
* @return functionDescriptor
*/
public final FunctionSignature lookupFunctionSignature(String dataverse, String name, int arity) {
@@ -161,8 +161,49 @@
public static final String removeQuotesAndEscapes(String s) {
char q = s.charAt(0); // simple or double quote
String stripped = s.substring(1, s.length() - 1);
- stripped = stripped.replace("\\" + q, "" + q);
- return stripped.replace("\\\\", "\\");
+ int pos = stripped.indexOf('\\');
+ if (pos < 0) {
+ return stripped;
+ }
+ StringBuilder res = new StringBuilder();
+ int start = 0;
+ while (pos >= 0) {
+ res.append(stripped.substring(start, pos));
+ char c = stripped.charAt(pos + 1);
+ switch (c) {
+ case '/':
+ case '\\':
+ res.append(c);
+ break;
+ case 'b':
+ res.append('\b');
+ break;
+ case 'f':
+ res.append('\f');
+ break;
+ case 'n':
+ res.append('\n');
+ break;
+ case 'r':
+ res.append('\r');
+ break;
+ case 't':
+ res.append('\t');
+ break;
+ case '\'':
+ case '"':
+ if (c == q) {
+ res.append(c);
+ break;
+ }
+ default:
+ throw new IllegalStateException("'\\" + c + "' should have been caught by the lexer");
+ }
+ start = pos + 2;
+ pos = stripped.indexOf('\\', start);
+ }
+ res.append(stripped.substring(start));
+ return res.toString();
}
public String extractFragment(int beginLine, int beginColumn, int endLine, int endColumn) {
diff --git a/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/util/FunctionUtils.java b/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/util/FunctionUtils.java
index 18695c3..f850a8d 100644
--- a/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/util/FunctionUtils.java
+++ b/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/util/FunctionUtils.java
@@ -3,9 +3,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -64,7 +64,7 @@
List<Statement> statements = null;
try {
- statements = parser.Statement();
+ statements = parser.parse();
} catch (ParseException pe) {
throw new AsterixException(pe);
}
diff --git a/asterix-aql/src/main/javacc/AQL.jj b/asterix-aql/src/main/javacc/AQL.jj
index 6be8300..d15e14f 100644
--- a/asterix-aql/src/main/javacc/AQL.jj
+++ b/asterix-aql/src/main/javacc/AQL.jj
@@ -157,9 +157,19 @@
File file = new File(args[0]);
Reader fis = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
AQLParser parser = new AQLParser(fis);
- List<Statement> st = parser.Statement();
+ List<Statement> st = parser.parse();
//st.accept(new AQLPrintVisitor(), 0);
}
+
+ public List<Statement> parse() throws ParseException {
+ try {
+ return Statement();
+ } catch (Error e) {
+ // this is here as the JavaCharStream that's below the lexer somtimes throws Errors that are not handled
+ // by the ANTLR-generated lexer or parser (e.g it does this for invalid backslash u + 4 hex digits escapes)
+ throw new ParseException(e.getMessage());
+ }
+ }
}
PARSER_END(AQLParser)
@@ -2350,11 +2360,36 @@
<DEFAULT,IN_DBL_BRACE>
TOKEN :
{
- <STRING_LITERAL : ("\"" (<EscapeQuot> | <EscapeBslash> | ~["\"","\\"])* "\"")
- | ("\'"(<EscapeApos> | <EscapeBslash> | ~["\'","\\"])* "\'")>
+ // backslash u + 4 hex digits escapes are handled in the underlying JavaCharStream
+ <STRING_LITERAL : ("\"" (
+ <EscapeQuot>
+ | <EscapeBslash>
+ | <EscapeSlash>
+ | <EscapeBspace>
+ | <EscapeFormf>
+ | <EscapeNl>
+ | <EscapeCr>
+ | <EscapeTab>
+ | ~["\"","\\"])* "\"")
+ | ("\'"(
+ <EscapeApos>
+ | <EscapeBslash>
+ | <EscapeSlash>
+ | <EscapeBspace>
+ | <EscapeFormf>
+ | <EscapeNl>
+ | <EscapeCr>
+ | <EscapeTab>
+ | ~["\'","\\"])* "\'")>
| < #EscapeQuot: "\\\"" >
| < #EscapeApos: "\\\'" >
| < #EscapeBslash: "\\\\" >
+ | < #EscapeSlash: "\\/" >
+ | < #EscapeBspace: "\\b" >
+ | < #EscapeFormf: "\\f" >
+ | < #EscapeNl: "\\n" >
+ | < #EscapeCr: "\\r" >
+ | < #EscapeTab: "\\t" >
}
<DEFAULT,IN_DBL_BRACE>
diff --git a/asterix-installer/src/test/resources/integrationts/library/results/library-metadata/functionDataset/functionDataset.1.adm b/asterix-installer/src/test/resources/integrationts/library/results/library-metadata/functionDataset/functionDataset.1.adm
index 4a8369b..47503c4 100644
--- a/asterix-installer/src/test/resources/integrationts/library/results/library-metadata/functionDataset/functionDataset.1.adm
+++ b/asterix-installer/src/test/resources/integrationts/library/results/library-metadata/functionDataset/functionDataset.1.adm
@@ -1,12 +1,6 @@
-{ "DataverseName": "externallibtest", "Name": "testlib#allTypes", "Arity": "1", "Params": [ "AllType" ], "ReturnType": "AllType", "Definition": "edu.uci.ics.asterix.external.library.AllTypesFactory
- ", "Language": "JAVA", "Kind": "SCALAR" }
-{ "DataverseName": "externallibtest", "Name": "testlib#echoDelay", "Arity": "1", "Params": [ "TweetMessageType" ], "ReturnType": "TweetMessageType", "Definition": "edu.uci.ics.asterix.external.library.EchoDelayFactory
- ", "Language": "JAVA", "Kind": "SCALAR" }
-{ "DataverseName": "externallibtest", "Name": "testlib#getCapital", "Arity": "1", "Params": [ "ASTRING" ], "ReturnType": "CountryCapitalType", "Definition": "edu.uci.ics.asterix.external.library.CapitalFinderFactory
- ", "Language": "JAVA", "Kind": "SCALAR" }
-{ "DataverseName": "externallibtest", "Name": "testlib#mysum", "Arity": "2", "Params": [ "AINT32", "AINT32" ], "ReturnType": "AINT32", "Definition": "edu.uci.ics.asterix.external.library.SumFactory
- ", "Language": "JAVA", "Kind": "SCALAR" }
-{ "DataverseName": "externallibtest", "Name": "testlib#parseTweet", "Arity": "1", "Params": [ "TweetInputType" ], "ReturnType": "TweetOutputType", "Definition": "edu.uci.ics.asterix.external.library.ParseTweetFactory
- ", "Language": "JAVA", "Kind": "SCALAR" }
-{ "DataverseName": "externallibtest", "Name": "testlib#toUpper", "Arity": "1", "Params": [ "TextType" ], "ReturnType": "TextType", "Definition": "edu.uci.ics.asterix.external.library.UpperCaseFactory
- ", "Language": "JAVA", "Kind": "SCALAR" }
+{ "DataverseName": "externallibtest", "Name": "testlib#allTypes", "Arity": "1", "Params": [ "AllType" ], "ReturnType": "AllType", "Definition": "edu.uci.ics.asterix.external.library.AllTypesFactory\n\t\t\t", "Language": "JAVA", "Kind": "SCALAR" }
+{ "DataverseName": "externallibtest", "Name": "testlib#echoDelay", "Arity": "1", "Params": [ "TweetMessageType" ], "ReturnType": "TweetMessageType", "Definition": "edu.uci.ics.asterix.external.library.EchoDelayFactory\n\t\t\t", "Language": "JAVA", "Kind": "SCALAR" }
+{ "DataverseName": "externallibtest", "Name": "testlib#getCapital", "Arity": "1", "Params": [ "ASTRING" ], "ReturnType": "CountryCapitalType", "Definition": "edu.uci.ics.asterix.external.library.CapitalFinderFactory\n\t\t\t", "Language": "JAVA", "Kind": "SCALAR" }
+{ "DataverseName": "externallibtest", "Name": "testlib#mysum", "Arity": "2", "Params": [ "AINT32", "AINT32" ], "ReturnType": "AINT32", "Definition": "edu.uci.ics.asterix.external.library.SumFactory\n\t\t\t", "Language": "JAVA", "Kind": "SCALAR" }
+{ "DataverseName": "externallibtest", "Name": "testlib#parseTweet", "Arity": "1", "Params": [ "TweetInputType" ], "ReturnType": "TweetOutputType", "Definition": "edu.uci.ics.asterix.external.library.ParseTweetFactory\n\t\t\t", "Language": "JAVA", "Kind": "SCALAR" }
+{ "DataverseName": "externallibtest", "Name": "testlib#toUpper", "Arity": "1", "Params": [ "TextType" ], "ReturnType": "TextType", "Definition": "edu.uci.ics.asterix.external.library.UpperCaseFactory\n\t\t\t", "Language": "JAVA", "Kind": "SCALAR" }
diff --git a/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/printers/AStringPrinter.java b/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/printers/AStringPrinter.java
index 6f37927..105a22e 100644
--- a/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/printers/AStringPrinter.java
+++ b/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/printers/AStringPrinter.java
@@ -3,9 +3,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -19,7 +19,6 @@
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.data.IPrinter;
-import edu.uci.ics.hyracks.algebricks.data.utils.WriteValueTools;
public class AStringPrinter implements IPrinter {
@@ -33,7 +32,7 @@
@Override
public void print(byte[] b, int s, int l, PrintStream ps) throws AlgebricksException {
try {
- WriteValueTools.writeUTF8String(b, s + 1, l - 1, ps);
+ PrintTools.writeUTF8StringWithEscapes(b, s + 1, l - 1, ps);
} catch (IOException e) {
throw new AlgebricksException(e);
}
diff --git a/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/printers/PrintTools.java b/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/printers/PrintTools.java
new file mode 100644
index 0000000..d109ae9
--- /dev/null
+++ b/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/printers/PrintTools.java
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2014 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.asterix.dataflow.data.nontagged.printers;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+
+public class PrintTools {
+ public static void writeUTF8StringWithEscapes(byte[] b, int s, int l, OutputStream os) throws IOException {
+ int stringLength = UTF8StringPointable.getUTFLength(b, s);
+ int position = s + 2; // skip 2 bytes containing string size
+ int maxPosition = position + stringLength;
+ os.write('\"');
+ while (position < maxPosition) {
+ char c = UTF8StringPointable.charAt(b, position);
+ int sz = UTF8StringPointable.charSize(b, position);
+ switch (c) {
+ // escape
+ case '\b':
+ os.write('\\');
+ os.write('b');
+ position += sz;
+ break;
+ case '\f':
+ os.write('\\');
+ os.write('f');
+ position += sz;
+ break;
+ case '\n':
+ os.write('\\');
+ os.write('n');
+ position += sz;
+ break;
+ case '\r':
+ os.write('\\');
+ os.write('r');
+ position += sz;
+ break;
+ case '\t':
+ os.write('\\');
+ os.write('t');
+ position += sz;
+ break;
+ case '\\':
+ case '"':
+ os.write('\\');
+ default:
+ switch (sz) {
+ case 1:
+ if (c <= (byte) 0x1f || c == (byte) 0x7f) {
+ // this is to print out "control code points" (single byte UTF-8 representation,
+ // value up to 0x1f or 0x7f) in the 'uXXXX' format
+ writeUEscape(os, c);
+ ++position;
+ sz = 0; // no more processing
+ }
+ break;
+
+ case 2:
+ // 2-byte encodings of some code points in modified UTF-8 as described in
+ // DataInput.html#modified-utf-8
+ //
+ // 110xxxxx 10xxxxxx
+ // U+0000 00000 000000 C0 80
+ // U+0080 00010 000000 C2 80
+ // U+009F 00010 011111 C2 9F
+ switch (b[position]) {
+ case (byte) 0xc0:
+ if (b[position + 1] == (byte) 0x80) {
+ // special treatment for the U+0000 code point as described in
+ // DataInput.html#modified-utf-8
+ writeUEscape(os, c);
+ position += 2;
+ sz = 0; // no more processing
+ }
+ break;
+ case (byte) 0xc2:
+ if (b[position + 1] <= (byte) 0x9f) {
+ // special treatment for the U+0080 to U+009F code points
+ writeUEscape(os, c);
+ position += 2;
+ sz = 0; // no more processing
+ }
+ break;
+ }
+ break;
+ }
+ while (sz > 0) {
+ os.write(b[position]);
+ ++position;
+ --sz;
+ }
+ break;
+ }
+ }
+ os.write('\"');
+ }
+
+ private static void writeUEscape(OutputStream os, char c) throws IOException {
+ os.write('\\');
+ os.write('u');
+ os.write('0');
+ os.write('0');
+ os.write(hex((c >>> 4) & 0x0f));
+ os.write(hex(c & 0x0f));
+ }
+
+ private static byte hex(int i) {
+ return (byte) (i < 10 ? i + '0' : i + ('a' - 10));
+ }
+}
diff --git a/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/printers/json/AStringPrinter.java b/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/printers/json/AStringPrinter.java
index d48bf4a..2c84c82 100644
--- a/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/printers/json/AStringPrinter.java
+++ b/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/printers/json/AStringPrinter.java
@@ -3,9 +3,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -17,9 +17,9 @@
import java.io.IOException;
import java.io.PrintStream;
+import edu.uci.ics.asterix.dataflow.data.nontagged.printers.PrintTools;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.data.IPrinter;
-import edu.uci.ics.hyracks.algebricks.data.utils.WriteValueTools;
public class AStringPrinter implements IPrinter {
@@ -33,7 +33,7 @@
@Override
public void print(byte[] b, int s, int l, PrintStream ps) throws AlgebricksException {
try {
- WriteValueTools.writeUTF8String(b, s + 1, l - 1, ps);
+ PrintTools.writeUTF8StringWithEscapes(b, s + 1, l - 1, ps);
} catch (IOException e) {
throw new AlgebricksException(e);
}
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java
index 1990357..1c06604 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java
@@ -377,24 +377,57 @@
private String replaceEscapes(String tokenImage) throws ParseException {
char[] chars = tokenImage.toCharArray();
int len = chars.length;
- int idx = 0;
- while (idx < len) {
- if (chars[idx] == '\\') {
- switch (chars[idx + 1]) {
+ int readpos = 0;
+ int writepos = 0;
+ int movemarker = 0;
+ while (readpos < len) {
+ if (chars[readpos] == '\\') {
+ moveChars(chars, movemarker, readpos, readpos - writepos);
+ switch (chars[readpos + 1]) {
case '\\':
case '\"':
- for (int i = idx + 1; i < len; ++i) {
- chars[i - 1] = chars[i];
- }
- --len;
+ case '/':
+ chars[writepos] = chars[readpos + 1];
+ break;
+ case 'b':
+ chars[writepos] = '\b';
+ break;
+ case 'f':
+ chars[writepos] = '\f';
+ break;
+ case 'n':
+ chars[writepos] = '\n';
+ break;
+ case 'r':
+ chars[writepos] = '\r';
+ break;
+ case 't':
+ chars[writepos] = '\t';
+ break;
+ case 'u':
+ chars[writepos] = (char) Integer.parseInt(new String(chars, readpos + 2, 4), 16);
+ readpos += 4;
break;
default:
- throw new ParseException("Illegal escape '\\" + chars[idx + 1] + "'");
+ throw new ParseException("Illegal escape '\\" + chars[readpos + 1] + "'");
}
+ ++readpos;
+ movemarker = readpos + 1;
}
- ++idx;
+ ++writepos;
+ ++readpos;
}
- return new String(chars, 0, len);
+ moveChars(chars, movemarker, len, readpos - writepos);
+ return new String(chars, 0, len - (readpos - writepos));
+ }
+
+ private static void moveChars(final char[] chars, final int start, final int end, final int offset) {
+ if (offset == 0) {
+ return;
+ }
+ for (int i = start; i < end; ++i) {
+ chars[i - offset] = chars[i];
+ }
}
private IAType getComplexType(IAType aObjectType, ATypeTag tag) {
diff --git a/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/datagen/AdmDataGen.java b/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/datagen/AdmDataGen.java
index eb85bb9..2c2f44d 100644
--- a/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/datagen/AdmDataGen.java
+++ b/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/datagen/AdmDataGen.java
@@ -3,9 +3,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -942,7 +942,7 @@
AlgebricksException {
FileReader aql = new FileReader(schemaFile);
AQLParser parser = new AQLParser(aql);
- List<Statement> statements = parser.Statement();
+ List<Statement> statements = parser.parse();
aql.close();
// TODO: Need to fix how to use transactions here.
MetadataTransactionContext mdTxnCtx = new MetadataTransactionContext(new JobId(-1));