[NO ISSUE][*DB] Update Dataverse canonical form
Update canonical dataverse name to use / as a part separator
*NOTE* this breaks metadata compatibility with existing multi-part
dataverse names, as the canonical format is stored in metadata
Change-Id: Ifc7d7fe5d7ce9a922371c1a9c6685d7a5dc64c33
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/10704
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Michael Blow <mblow@apache.org>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/multipart-dataverse/special_chars_2/special_chars_2.2.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/multipart-dataverse/special_chars_2/special_chars_2.2.adm
index 9abda4f..7edd3eb 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/multipart-dataverse/special_chars_2/special_chars_2.2.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/multipart-dataverse/special_chars_2/special_chars_2.2.adm
@@ -1,6 +1,6 @@
{ "CanonicalName": "A", "DisplayName": "A", "NameParts": [ "A" ] }
-{ "CanonicalName": "B.C", "DisplayName": "B.C", "NameParts": [ "B", "C" ] }
-{ "CanonicalName": "C@.D@.E", "DisplayName": "`C.D.E`", "NameParts": [ "C.D.E" ] }
+{ "CanonicalName": "B/C", "DisplayName": "B.C", "NameParts": [ "B", "C" ] }
+{ "CanonicalName": "C.D.E", "DisplayName": "`C.D.E`", "NameParts": [ "C.D.E" ] }
{ "CanonicalName": "Default", "DisplayName": "Default", "NameParts": [ "Default" ] }
{ "CanonicalName": "Metadata", "DisplayName": "Metadata", "NameParts": [ "Metadata" ] }
-{ "CanonicalName": "a-A.b_B.c$C.z@.Z", "DisplayName": "`a-A`.b_B.c$C.`z.Z`", "NameParts": [ "a-A", "b_B", "c$C", "z.Z" ] }
\ No newline at end of file
+{ "CanonicalName": "a-A/b_B/c$C/z.Z", "DisplayName": "`a-A`.b_B.c$C.`z.Z`", "NameParts": [ "a-A", "b_B", "c$C", "z.Z" ] }
\ No newline at end of file
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/metadata/DataverseName.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/metadata/DataverseName.java
index b8124a2..f943dea 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/metadata/DataverseName.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/metadata/DataverseName.java
@@ -36,11 +36,10 @@
* <p>
* Each dataverse name can be encoded into a single string (called a canonical form) by
* {@link #getCanonicalForm()} and decoded back from it with {@link #createFromCanonicalForm(String)}.
- * The canonical form encoding concatenates name parts together with {@link #CANONICAL_FORM_SEPARATOR_CHAR '.'}
- * character. The {@link #CANONICAL_FORM_ESCAPE_CHAR '@'} character is used to escape
- * {@link #CANONICAL_FORM_SEPARATOR_CHAR '.'} and itself in each name part prior to concatenation.
+ * The canonical form encoding concatenates name parts together with {@link #CANONICAL_FORM_SEPARATOR_CHAR '/'}
+ * character.
* <p>
- * E.g. the canonical form for a dataverse name {@code ["a", "b", "c"]} is {@code "a.b.c"}
+ * E.g. the canonical form for a dataverse name {@code ["a", "b", "c"]} is {@code "a/b/c"}
* <p>
* {@link #toString()} returns a display form which is suitable for error messages,
* and is a valid SQL++ multi-part identifier parsable by {@code IParser#parseMultipartIdentifier()}
@@ -59,11 +58,9 @@
*/
public final class DataverseName implements Serializable, Comparable<DataverseName> {
- private static final long serialVersionUID = 1L;
+ private static final long serialVersionUID = 2L;
- public static final char CANONICAL_FORM_SEPARATOR_CHAR = '.';
-
- private static final char CANONICAL_FORM_ESCAPE_CHAR = '@';
+ public static final char CANONICAL_FORM_SEPARATOR_CHAR = '/';
public static final char DISPLAY_FORM_SEPARATOR_CHAR = '.';
@@ -72,7 +69,7 @@
private static final char DISPLAY_FORM_ESCAPE_CHAR = '\\';
private static final char[] CANONICAL_FORM_SEPARATOR_AND_ESCAPE_CHARS =
- new char[] { CANONICAL_FORM_SEPARATOR_CHAR, CANONICAL_FORM_ESCAPE_CHAR };
+ new char[] { CANONICAL_FORM_SEPARATOR_CHAR };
private final boolean isMultiPart;
@@ -282,13 +279,7 @@
}
private static void encodePartIntoCanonicalForm(String part, StringBuilder out) {
- for (int i = 0, ln = part.length(); i < ln; i++) {
- char c = part.charAt(i);
- if (c == CANONICAL_FORM_SEPARATOR_CHAR || c == CANONICAL_FORM_ESCAPE_CHAR) {
- out.append(CANONICAL_FORM_ESCAPE_CHAR);
- }
- out.append(c);
- }
+ out.append(part);
}
private static <T> void decodeCanonicalForm(String canonicalForm, BiConsumer<CharSequence, T> partConsumer,
@@ -297,18 +288,11 @@
StringBuilder sb = new StringBuilder(ln);
for (int i = 0; i < ln; i++) {
char c = canonicalForm.charAt(i);
- switch (c) {
- case CANONICAL_FORM_SEPARATOR_CHAR:
- partConsumer.accept(sb, partConsumerArg);
- sb.setLength(0);
- break;
- case CANONICAL_FORM_ESCAPE_CHAR:
- i++;
- c = canonicalForm.charAt(i);
- // fall through to 'default'
- default:
- sb.append(c);
- break;
+ if (c == CANONICAL_FORM_SEPARATOR_CHAR) {
+ partConsumer.accept(sb, partConsumerArg);
+ sb.setLength(0);
+ } else {
+ sb.append(c);
}
}
if (sb.length() > 0) {
@@ -318,41 +302,11 @@
// optimization for a single part name
private static String decodeSinglePartNameFromCanonicalForm(String canonicalForm) {
- if (canonicalForm.indexOf(CANONICAL_FORM_ESCAPE_CHAR) < 0) {
- // no escaping was done
- return canonicalForm;
- }
-
- StringBuilder singlePart = new StringBuilder(canonicalForm.length());
- for (int i = 0, ln = canonicalForm.length(); i < ln; i++) {
- char c = canonicalForm.charAt(i);
- switch (c) {
- case CANONICAL_FORM_SEPARATOR_CHAR:
- throw new IllegalStateException(canonicalForm); // should never happen
- case CANONICAL_FORM_ESCAPE_CHAR:
- i++;
- c = canonicalForm.charAt(i);
- // fall through to 'default'
- default:
- singlePart.append(c);
- break;
- }
- }
- return singlePart.toString();
+ return canonicalForm;
}
private static boolean isMultiPartCanonicalForm(String canonicalForm) {
- for (int i = 0, ln = canonicalForm.length(); i < ln; i++) {
- char c = canonicalForm.charAt(i);
- switch (c) {
- case CANONICAL_FORM_SEPARATOR_CHAR:
- return true;
- case CANONICAL_FORM_ESCAPE_CHAR:
- i++;
- break;
- }
- }
- return false;
+ return canonicalForm.indexOf(CANONICAL_FORM_SEPARATOR_CHAR) != -1;
}
private static void addPartToCollection(CharSequence part, Collection<? super String> out) {
diff --git a/asterixdb/asterix-common/src/test/java/org/apache/asterix/common/metadata/DataverseNameTest.java b/asterixdb/asterix-common/src/test/java/org/apache/asterix/common/metadata/DataverseNameTest.java
index 2f0dff5..75b3989 100644
--- a/asterixdb/asterix-common/src/test/java/org/apache/asterix/common/metadata/DataverseNameTest.java
+++ b/asterixdb/asterix-common/src/test/java/org/apache/asterix/common/metadata/DataverseNameTest.java
@@ -51,9 +51,7 @@
private static final List<String> TEST_BUILTIN_DATAVERSE_INVALID_NAME_PARAMS = Arrays.asList(
// separator character is not allowed
- "a.b",
- // escape character is not allowed
- "c@d");
+ "a/b");
private static final List<Triple<String, String, String>> TEST_SINGLE_PART_NAME_PARAMS = Arrays.asList(
// <1-part-name, canonical-form, display-form>
@@ -63,30 +61,30 @@
// letters and digits
new Triple<>("aA09", "aA09", "aA09"),
// with canonical form escape character
- new Triple<>("a@b", "a@@b", "`a@b`"),
+ new Triple<>("a@b", "a@b", "`a@b`"),
// with canonical form separator character
- new Triple<>("a.b", "a@.b", "`a.b`"),
+ new Triple<>("a.b", "a.b", "`a.b`"),
// with canonical form escape and separator characters
- new Triple<>("a@.b", "a@@@.b", "`a@.b`"),
+ new Triple<>("a@.b", "a@.b", "`a@.b`"),
// with display form escape character
new Triple<>("a\\b", "a\\b", "`a\\\\b`"));
private static final List<Triple<List<String>, String, String>> TEST_MULTI_PART_NAME_PARAMS = Arrays.asList(
// <multi-part-name, canonical-form, display-form>
- new Triple<>(Arrays.asList("aa", "bb", "cc"), "aa.bb.cc", "aa.bb.cc"),
+ new Triple<>(Arrays.asList("aa", "bb", "cc"), "aa/bb/cc", "aa.bb.cc"),
// mixed case letters, digits
- new Triple<>(Arrays.asList("az", "AZ", "a09Z"), "az.AZ.a09Z", "az.AZ.a09Z"),
+ new Triple<>(Arrays.asList("az", "AZ", "a09Z"), "az/AZ/a09Z", "az.AZ.a09Z"),
// with canonical form escape character
- new Triple<>(Arrays.asList("a@a@", "@b@b", "@c@c"), "a@@a@@.@@b@@b.@@c@@c", "`a@a@`.`@b@b`.`@c@c`"),
+ new Triple<>(Arrays.asList("a@a@", "@b@b", "@c@c"), "a@a@/@b@b/@c@c", "`a@a@`.`@b@b`.`@c@c`"),
// with canonical form separator character
- new Triple<>(Arrays.asList("a.a.", ".b.b.", ".c.c"), "a@.a@..@.b@.b@..@.c@.c", "`a.a.`.`.b.b.`.`.c.c`"),
+ new Triple<>(Arrays.asList("a.a.", ".b.b.", ".c.c"), "a.a./.b.b./.c.c", "`a.a.`.`.b.b.`.`.c.c`"),
// with canonical form escape and separator characters
- new Triple<>(Arrays.asList("a@a.", "@b.b@", ".c@c"), "a@@a@..@@b@.b@@.@.c@@c", "`a@a.`.`@b.b@`.`.c@c`"),
+ new Triple<>(Arrays.asList("a@a.", "@b.b@", ".c@c"), "a@a./@b.b@/.c@c", "`a@a.`.`@b.b@`.`.c@c`"),
// with canonical form escape and separator characters repeated
- new Triple<>(Arrays.asList("a@@a..", "@@b..b@@", "..c@@c"), "a@@@@a@.@..@@@@b@.@.b@@@@.@.@.c@@@@c",
+ new Triple<>(Arrays.asList("a@@a..", "@@b..b@@", "..c@@c"), "a@@a../@@b..b@@/..c@@c",
"`a@@a..`.`@@b..b@@`.`..c@@c`"),
// with display form escape character
- new Triple<>(Arrays.asList("a\\b", "c\\d"), "a\\b.c\\d", "`a\\\\b`.`c\\\\d`"));
+ new Triple<>(Arrays.asList("a\\b", "c\\d"), "a\\b/c\\d", "`a\\\\b`.`c\\\\d`"));
@Test
public void testBuiltinDataverseName() throws Exception {
@@ -220,7 +218,6 @@
testRuntimeException(() -> DataverseName.createBuiltinDataverseName(null), NullPointerException.class);
testRuntimeException(() -> DataverseName.createFromCanonicalForm(null), NullPointerException.class);
testRuntimeException(() -> DataverseName.create(Collections.singletonList(null)), NullPointerException.class);
- testRuntimeException(() -> DataverseName.create(Arrays.asList(null, null)), NullPointerException.class);
// 3. IndexOutOfBoundsException
testRuntimeException(() -> DataverseName.create(Collections.emptyList(), 0, 1),
IndexOutOfBoundsException.class);