add escapes to ADM parser
remove unused parameter from ADMDataParser.checkType
diff --git a/asterix-app/data/escapes.adm b/asterix-app/data/escapes.adm
new file mode 100644
index 0000000..d8193c1
--- /dev/null
+++ b/asterix-app/data/escapes.adm
@@ -0,0 +1,3 @@
+{ "id": "abc" }
+{ "id": "\"abc\"" }
+{ "id": "\\abc\\" }
diff --git a/asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.1.ddl.aql
new file mode 100644
index 0000000..28a27d1
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.1.ddl.aql
@@ -0,0 +1,10 @@
+drop dataverse TestDataverse if exists;
+create dataverse TestDataverse;
+use dataverse TestDataverse;
+
+create type TestType as {
+ id: string
+}
+
+create dataset TestSet(TestType)
+primary key id;
diff --git a/asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.2.update.aql
new file mode 100644
index 0000000..da559fa
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.2.update.aql
@@ -0,0 +1,3 @@
+use dataverse TestDataverse;
+
+load dataset TestSet using localfs (("path"="nc1://data/escapes.adm"),("format"="adm"));
diff --git a/asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.3.query.aql b/asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.3.query.aql
new file mode 100644
index 0000000..207c33d
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/escapes/escapes.3.query.aql
@@ -0,0 +1,5 @@
+use dataverse TestDataverse;
+
+for $i in dataset TestSet
+order by $i.id
+return $i
diff --git a/asterix-app/src/test/resources/runtimets/results/load/escapes/escapes.1.adm b/asterix-app/src/test/resources/runtimets/results/load/escapes/escapes.1.adm
new file mode 100644
index 0000000..68d1632
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/load/escapes/escapes.1.adm
@@ -0,0 +1,3 @@
+{ "id": "\"abc\"" }
+{ "id": "\\abc\\" }
+{ "id": "abc" }
diff --git a/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterix-app/src/test/resources/runtimets/testsuite.xml
index 32608c4..880c41f 100644
--- a/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -4745,6 +4745,11 @@
<output-dir compare="Text">type_promotion_0</output-dir>
</compilation-unit>
</test-case>
+ <test-case FilePath="load">
+ <compilation-unit name="escapes">
+ <output-dir compare="Text">escapes</output-dir>
+ </compilation-unit>
+ </test-case>
<test-case FilePath="user-defined-functions">
<compilation-unit name="query-issue244">
<output-dir compare="Text">query-issue244</output-dir>
diff --git a/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/java/edu/uci/ics/asterix/lexergenerator/rules/RuleAnythingUntil.java b/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/java/edu/uci/ics/asterix/lexergenerator/rules/RuleAnythingUntil.java
index 3476a40..83d6ed7 100644
--- a/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/java/edu/uci/ics/asterix/lexergenerator/rules/RuleAnythingUntil.java
+++ b/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/java/edu/uci/ics/asterix/lexergenerator/rules/RuleAnythingUntil.java
@@ -55,14 +55,10 @@
@Override
public String javaMatch(String action) {
- StringBuilder result = new StringBuilder();
- result.append("boolean escaped = false;");
- result.append("while (currentChar!='").append(expected).append("' || escaped)");
- result.append("{\nif(!escaped && currentChar=='\\\\\\\\'){escaped=true;}\nelse {escaped=false;}\ncurrentChar = readNextChar();\n}");
- result.append("\nif (currentChar=='").append(expected).append("'){");
- result.append(action);
- result.append("}\n");
- return result.toString();
+ return "boolean escaped = false;\n" + "while (currentChar != '" + expected + "' || escaped) {\n"
+ + "if(!escaped && currentChar == '\\\\\\\\') {\n" + "escaped = true;\n" + "containsEscapes = true;\n"
+ + "} else {\n" + "escaped = false;\n" + "}\n" + "currentChar = readNextChar();\n" + "}\n"
+ + "if (currentChar == '" + expected + "') {" + action + "}\n";
}
}
diff --git a/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java b/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java
index 4ce840d..e34bde4 100644
--- a/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java
+++ b/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java
@@ -35,6 +35,7 @@
protected int line;
protected boolean prevCharIsCR;
protected boolean prevCharIsLF;
+ protected boolean containsEscapes;
protected char[] buffer;
protected int bufsize;
protected int bufpos;
@@ -53,11 +54,12 @@
// Main method. Return a TOKEN_CONSTANT
// ================================================================================
- public int next() throws [LEXER_NAME]Exception, IOException{
+ public int next() throws [LEXER_NAME]Exception, IOException {
char currentChar = buffer[bufpos];
while (currentChar == ' ' || currentChar=='\t' || currentChar == '\n' || currentChar=='\r')
currentChar = readNextChar();
tokenBegin = bufpos;
+ containsEscapes = false;
if (currentChar==EOF_CHAR) return TOKEN_EOF;
[LEXER_LOGIC]
@@ -104,6 +106,10 @@
return line;
}
+ public boolean containsEscapes() {
+ return containsEscapes;
+ }
+
public static String tokenKindToString(int token) {
return tokenImage[token];
}
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java
index 735bf2a5..ac92355 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java
@@ -158,14 +158,14 @@
switch (token) {
case AdmLexer.TOKEN_NULL_LITERAL: {
- if (checkType(ATypeTag.NULL, objectType, out)) {
+ if (checkType(ATypeTag.NULL, objectType)) {
nullSerde.serialize(ANull.NULL, out);
} else
throw new ParseException("This field can not be null");
break;
}
case AdmLexer.TOKEN_TRUE_LITERAL: {
- if (checkType(ATypeTag.BOOLEAN, objectType, out)) {
+ if (checkType(ATypeTag.BOOLEAN, objectType)) {
booleanSerde.serialize(ABoolean.TRUE, out);
} else
throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
@@ -176,7 +176,7 @@
break;
}
case AdmLexer.TOKEN_FALSE_LITERAL: {
- if (checkType(ATypeTag.BOOLEAN, objectType, out)) {
+ if (checkType(ATypeTag.BOOLEAN, objectType)) {
booleanSerde.serialize(ABoolean.FALSE, out);
} else
throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
@@ -235,9 +235,10 @@
break;
}
case AdmLexer.TOKEN_STRING_LITERAL: {
- if (checkType(ATypeTag.STRING, objectType, out)) {
- aString.setValue(admLexer.getLastTokenImage().substring(1,
- admLexer.getLastTokenImage().length() - 1));
+ if (checkType(ATypeTag.STRING, objectType)) {
+ final String tokenImage = admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1);
+ aString.setValue(admLexer.containsEscapes() ? replaceEscapes(tokenImage) : tokenImage);
stringSerde.serialize(aString, out);
} else
throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
@@ -260,7 +261,7 @@
break;
}
case AdmLexer.TOKEN_INTERVAL_DATE_CONS: {
- if (checkType(ATypeTag.INTERVAL, objectType, out)) {
+ if (checkType(ATypeTag.INTERVAL, objectType)) {
if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
if (admLexer.next() == AdmLexer.TOKEN_STRING_LITERAL) {
AIntervalSerializerDeserializer.parseDate(admLexer.getLastTokenImage(), out);
@@ -274,7 +275,7 @@
throw new ParseException("Wrong interval data parsing for date interval.");
}
case AdmLexer.TOKEN_INTERVAL_TIME_CONS: {
- if (checkType(ATypeTag.INTERVAL, objectType, out)) {
+ if (checkType(ATypeTag.INTERVAL, objectType)) {
if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
if (admLexer.next() == AdmLexer.TOKEN_STRING_LITERAL) {
AIntervalSerializerDeserializer.parseTime(admLexer.getLastTokenImage(), out);
@@ -288,7 +289,7 @@
throw new ParseException("Wrong interval data parsing for time interval.");
}
case AdmLexer.TOKEN_INTERVAL_DATETIME_CONS: {
- if (checkType(ATypeTag.INTERVAL, objectType, out)) {
+ if (checkType(ATypeTag.INTERVAL, objectType)) {
if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
if (admLexer.next() == AdmLexer.TOKEN_STRING_LITERAL) {
AIntervalSerializerDeserializer.parseDatetime(admLexer.getLastTokenImage(), out);
@@ -338,7 +339,7 @@
break;
}
case AdmLexer.TOKEN_START_UNORDERED_LIST: {
- if (checkType(ATypeTag.UNORDEREDLIST, objectType, out)) {
+ if (checkType(ATypeTag.UNORDEREDLIST, objectType)) {
objectType = getComplexType(objectType, ATypeTag.UNORDEREDLIST);
parseUnorderedList((AUnorderedListType) objectType, out);
} else
@@ -347,7 +348,7 @@
}
case AdmLexer.TOKEN_START_ORDERED_LIST: {
- if (checkType(ATypeTag.ORDEREDLIST, objectType, out)) {
+ if (checkType(ATypeTag.ORDEREDLIST, objectType)) {
objectType = getComplexType(objectType, ATypeTag.ORDEREDLIST);
parseOrderedList((AOrderedListType) objectType, out);
} else
@@ -355,7 +356,7 @@
break;
}
case AdmLexer.TOKEN_START_RECORD: {
- if (checkType(ATypeTag.RECORD, objectType, out)) {
+ if (checkType(ATypeTag.RECORD, objectType)) {
objectType = getComplexType(objectType, ATypeTag.RECORD);
parseRecord((ARecordType) objectType, out, datasetRec);
} else
@@ -371,6 +372,10 @@
}
}
+ private String replaceEscapes(String tokenImage) {
+ return tokenImage.replace("\\\"", "\"").replace("\\\\", "\\");
+ }
+
private IAType getComplexType(IAType aObjectType, ATypeTag tag) {
if (aObjectType == null) {
return null;
@@ -408,7 +413,7 @@
return null;
}
- private boolean checkType(ATypeTag expectedTypeTag, IAType aObjectType, DataOutput out) throws IOException {
+ private boolean checkType(ATypeTag expectedTypeTag, IAType aObjectType) throws IOException {
return getTargetTypeTag(expectedTypeTag, aObjectType) != null;
}