Merge "Merge branch 'gerrit/mad-hatter'" into cheshire-cat
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/metadata/MetadataManagerTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/metadata/MetadataManagerTest.java
index f8d6aeb..946d116 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/metadata/MetadataManagerTest.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/metadata/MetadataManagerTest.java
@@ -18,6 +18,7 @@
  */
 package org.apache.asterix.test.metadata;
 
+import java.io.File;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
@@ -161,4 +162,29 @@
             }
         }
     }
+
+    @Test
+    public void testInvalidCharacters() throws Exception {
+        TestCaseContext.OutputFormat cleanJson = TestCaseContext.OutputFormat.CLEAN_JSON;
+
+        List<DataverseName> dvNameBadCharsList = Arrays.asList(
+                // #1. nul characters
+                DataverseName.createSinglePartName("abc\u0000def"),
+                // #2. leading whitespace
+                DataverseName.createSinglePartName(" abcdef"),
+                // #2. file separator
+                DataverseName.createSinglePartName("abc" + File.separatorChar + "def"));
+
+        ErrorCode invalidNameErrCode = ErrorCode.INVALID_DATABASE_OBJECT_NAME;
+        for (DataverseName dvNameOk : dvNameBadCharsList) {
+            String sql = String.format("create dataverse %s;", dvNameOk);
+            try {
+                testExecutor.executeSqlppUpdateOrDdl(sql, cleanJson);
+                Assert.fail("Expected failure: " + invalidNameErrCode);
+            } catch (Exception e) {
+                Assert.assertTrue("Unexpected error message: " + e.getMessage(),
+                        e.getMessage().contains(invalidNameErrCode.errorCode()));
+            }
+        }
+    }
 }
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/metadata/MetadataManagerWindowsOsTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/metadata/MetadataManagerWindowsOsTest.java
new file mode 100644
index 0000000..b9f25e1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/metadata/MetadataManagerWindowsOsTest.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.test.metadata;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.asterix.api.common.AsterixHyracksIntegrationUtil;
+import org.apache.asterix.common.config.GlobalConfig;
+import org.apache.asterix.common.exceptions.ErrorCode;
+import org.apache.asterix.common.metadata.DataverseName;
+import org.apache.asterix.test.common.TestExecutor;
+import org.apache.asterix.testframework.context.TestCaseContext;
+import org.apache.commons.lang3.SystemUtils;
+import org.jetbrains.annotations.NotNull;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class MetadataManagerWindowsOsTest {
+
+    static {
+        System.setProperty("os.name", "Windows");
+    }
+
+    protected static final String TEST_CONFIG_FILE_NAME = "src/main/resources/cc.conf";
+    private static final TestExecutor testExecutor = new TestExecutor();
+    private static final AsterixHyracksIntegrationUtil integrationUtil = new AsterixHyracksIntegrationUtil();
+
+    @Before
+    public void setUp() throws Exception {
+        System.setProperty(GlobalConfig.CONFIG_FILE_PROPERTY, TEST_CONFIG_FILE_NAME);
+        integrationUtil.init(true, TEST_CONFIG_FILE_NAME);
+        Assert.assertTrue("wrong os reported", SystemUtils.IS_OS_WINDOWS);
+    }
+
+    @After
+    public void tearDown() throws Exception {
+        integrationUtil.deinit(true);
+    }
+
+    @Test
+    public void testInvalidCharacters() throws Exception {
+        TestCaseContext.OutputFormat cleanJson = TestCaseContext.OutputFormat.CLEAN_JSON;
+
+        List<DataverseName> dvNameBadCharsList = new ArrayList<>();
+
+        for (char c = 0; c <= 0x1F; c++) {
+            dvNameBadCharsList.add(badCharName(c));
+        }
+        dvNameBadCharsList.add(badCharName('\u007f'));
+        dvNameBadCharsList.add(badCharName('\\'));
+        dvNameBadCharsList.add(badCharName('/'));
+        dvNameBadCharsList.add(badCharName('>'));
+        dvNameBadCharsList.add(badCharName('\n'));
+        dvNameBadCharsList.add(badCharName('|'));
+
+        ErrorCode invalidNameErrCode = ErrorCode.INVALID_DATABASE_OBJECT_NAME;
+        for (DataverseName dvNameOk : dvNameBadCharsList) {
+            String sql = String.format("create dataverse %s;", dvNameOk);
+            try {
+                testExecutor.executeSqlppUpdateOrDdl(sql, cleanJson);
+                Assert.fail("Expected failure: " + invalidNameErrCode);
+            } catch (Exception e) {
+                Assert.assertTrue("Unexpected error message: " + e.getMessage(),
+                        e.getMessage().contains(invalidNameErrCode.errorCode()));
+            }
+        }
+    }
+
+    @NotNull
+    protected DataverseName badCharName(char c) {
+        return DataverseName.createSinglePartName("abc" + c + "def");
+    }
+}
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/DatasetCopyIdentifier.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/DatasetCopyIdentifier.java
index bf72c19..e520271 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/DatasetCopyIdentifier.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/DatasetCopyIdentifier.java
@@ -71,8 +71,8 @@
     }
 
     public boolean isMatch(ResourceReference resourceReference) {
-        return resourceReference.getDataverse().equals(dataverse.getCanonicalForm())
-                && resourceReference.getDataset().equals(dataset) && resourceReference.getRebalance().equals(rebalance);
+        return resourceReference.getDataverse().equals(dataverse) && resourceReference.getDataset().equals(dataset)
+                && resourceReference.getRebalance().equals(rebalance);
     }
 
     @Override
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/ResourceReference.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/ResourceReference.java
index 7791926..0e78152 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/ResourceReference.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/ResourceReference.java
@@ -21,24 +21,34 @@
 import java.io.File;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
 
+import org.apache.asterix.common.metadata.DataverseName;
 import org.apache.asterix.common.utils.StorageConstants;
+import org.apache.asterix.common.utils.StoragePathUtil;
+import org.apache.commons.lang3.ArrayUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hyracks.storage.am.lsm.common.impls.IndexComponentFileReference;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 
 public class ResourceReference {
 
+    private static final Logger LOGGER = LogManager.getLogger();
     protected final String root;
     protected final String partition;
-    protected final String dataverse; // == DataverseName.getCanonicalForm()
+    protected final DataverseName dataverse;
     protected final String dataset;
     protected final String rebalance;
     protected final String index;
     protected final String name;
-    private volatile Path relativePath;
+    private final Path relativePath;
 
     protected ResourceReference(String path) {
         // format: root/partition/dataverse/dataset/rebalanceCount/index/fileName
+        // format: root/partition/dataverse_p1[/^dataverse_p2[/^dataverse_p3...]]/dataset/rebalanceCount/index/fileName
         final String[] tokens = StringUtils.split(path, File.separatorChar);
         if (tokens.length < 6) {
             throw new IllegalStateException("Unrecognized path structure: " + path);
@@ -48,9 +58,40 @@
         index = tokens[--offset];
         rebalance = tokens[--offset];
         dataset = tokens[--offset];
-        dataverse = tokens[--offset]; //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT
-        partition = tokens[--offset];
-        root = tokens[--offset];
+        List<String> dvParts = new ArrayList<>();
+        String dvPart = tokens[--offset];
+        while (dvPart.charAt(0) == StoragePathUtil.DATAVERSE_CONTINUATION_MARKER) {
+            dvParts.add(dvPart.substring(1));
+            dvPart = tokens[--offset];
+        }
+        String probablyPartition = tokens[--offset];
+        if (dvParts.isEmpty()) {
+            // root/partition/dataverse/dataset/rebalanceCount/index/fileName
+            dataverse = DataverseName.createSinglePartName(dvPart);
+            partition = probablyPartition;
+            root = tokens[--offset];
+        } else if (probablyPartition.startsWith(StorageConstants.PARTITION_DIR_PREFIX)) {
+            // root/partition/dataverse_p1/^dataverse_p2/.../^dataverse_pn/dataset/rebalanceCount/index/fileName
+            dvParts.add(dvPart);
+            Collections.reverse(dvParts);
+            dataverse = DataverseName.create(dvParts);
+            partition = probablyPartition;
+            root = tokens[--offset];
+        } else if (dvPart.startsWith(StorageConstants.PARTITION_DIR_PREFIX)) {
+            // root/partition/dataverse/dataset/rebalanceCount/index/fileName (where dataverse starts with ^)
+            if (dvParts.size() != 1) {
+                throw new IllegalArgumentException("unable to parse path: '" + path + "'!");
+            }
+            dataverse =
+                    DataverseName.createSinglePartName(StoragePathUtil.DATAVERSE_CONTINUATION_MARKER + dvParts.get(0));
+            LOGGER.info("legacy dataverse starting with ^ found: '{}'; this is not supported for new dataverses",
+                    dataverse);
+            partition = dvPart;
+            root = probablyPartition;
+        } else {
+            throw new IllegalArgumentException("unable to parse path: '" + path + "'!");
+        }
+        relativePath = Paths.get(root, ArrayUtils.subarray(tokens, offset + 1, tokens.length - 1));
     }
 
     public static ResourceReference ofIndex(String indexPath) {
@@ -65,7 +106,7 @@
         return partition;
     }
 
-    public String getDataverse() { //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT
+    public DataverseName getDataverse() {
         return dataverse;
     }
 
@@ -86,19 +127,15 @@
     }
 
     public Path getRelativePath() {
-        if (relativePath == null) {
-            relativePath = Paths.get(root, partition, dataverse, dataset, rebalance, index);
-        }
         return relativePath;
     }
 
     public ResourceReference getDatasetReference() {
-        return ResourceReference
-                .ofIndex(Paths.get(root, partition, dataverse, dataset, rebalance, dataset).toFile().getPath());
+        return ResourceReference.ofIndex(relativePath.getParent().resolve(dataset).toFile().getPath());
     }
 
     public Path getFileRelativePath() {
-        return Paths.get(root, partition, dataverse, dataset, rebalance, index, name);
+        return relativePath.resolve(name);
     }
 
     public int getPartitionNum() {
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/StoragePathUtil.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/StoragePathUtil.java
index 587b8b3..32a226e 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/StoragePathUtil.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/StoragePathUtil.java
@@ -21,6 +21,7 @@
 import java.io.File;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.Iterator;
 
 import org.apache.asterix.common.cluster.ClusterPartition;
 import org.apache.asterix.common.metadata.DataverseName;
@@ -40,6 +41,7 @@
 public class StoragePathUtil {
 
     private static final Logger LOGGER = LogManager.getLogger();
+    public static final char DATAVERSE_CONTINUATION_MARKER = '^';
 
     private StoragePathUtil() {
     }
@@ -66,11 +68,21 @@
 
     public static String prepareDataverseIndexName(DataverseName dataverseName, String datasetName, String idxName,
             long rebalanceCount) {
-        return prepareDataverseIndexName(dataverseName, prepareFullIndexName(datasetName, idxName, rebalanceCount));
+        return prepareDataverseComponentName(dataverseName, prepareFullIndexName(datasetName, idxName, rebalanceCount));
     }
 
-    public static String prepareDataverseIndexName(DataverseName dataverseName, String fullIndexName) {
-        return dataverseName.getCanonicalForm() + File.separator + fullIndexName; //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT
+    public static String prepareDataverseName(DataverseName dataverseName) {
+        Iterator<String> dvParts = dataverseName.getParts().iterator();
+        StringBuilder builder = new StringBuilder();
+        builder.append(dvParts.next());
+        while (dvParts.hasNext()) {
+            builder.append(File.separatorChar).append(DATAVERSE_CONTINUATION_MARKER).append(dvParts.next());
+        }
+        return builder.toString();
+    }
+
+    public static String prepareDataverseComponentName(DataverseName dataverseName, String component) {
+        return prepareDataverseName(dataverseName) + File.separatorChar + component;
     }
 
     private static String prepareFullIndexName(String datasetName, String idxName, long rebalanceCount) {
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/FeedUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/FeedUtils.java
index 2110dee..7f3d911 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/FeedUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/FeedUtils.java
@@ -86,10 +86,10 @@
 
     public static FileSplit splitsForAdapter(DataverseName dataverseName, String feedName, String nodeName,
             ClusterPartition partition) {
-        String relPathFile = dataverseName.getCanonicalForm() + File.separator + feedName; //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT
+        String relPathFile = StoragePathUtil.prepareDataverseComponentName(dataverseName, feedName);
         String storagePartitionPath = StoragePathUtil.prepareStoragePartitionPath(partition.getPartitionId());
         // Note: feed adapter instances in a single node share the feed logger
-        // format: 'storage dir name'/partition_#/dataverse/feed/node
+        // format: 'storage dir name'/partition_#/dataverse_part1[/ dataverse_part2[...]]/feed/node
         File f = new File(storagePartitionPath + File.separator + relPathFile + File.separator + nodeName);
         return StoragePathUtil.getFileSplitForClusterPartition(partition, f.getPath());
     }
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/MetadataProvider.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/MetadataProvider.java
index 9b85601..a534954 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/MetadataProvider.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/MetadataProvider.java
@@ -18,6 +18,8 @@
  */
 package org.apache.asterix.metadata.declared;
 
+import static org.apache.asterix.metadata.utils.MetadataConstants.METADATA_OBJECT_NAME_INVALID_CHARS;
+
 import java.io.File;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
@@ -1768,7 +1770,7 @@
         if (name == null || name.isEmpty()) {
             throw new AsterixException(ErrorCode.INVALID_DATABASE_OBJECT_NAME, sourceLoc, "<empty>");
         }
-        if (Character.isWhitespace(name.codePointAt(0))) {
+        if (Character.isWhitespace(name.codePointAt(0)) || METADATA_OBJECT_NAME_INVALID_CHARS.matcher(name).find()) {
             throw new AsterixException(ErrorCode.INVALID_DATABASE_OBJECT_NAME, sourceLoc, name);
         }
         int lengthUTF8 = name.getBytes(StandardCharsets.UTF_8).length;
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/MetadataConstants.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/MetadataConstants.java
index 6bd52fa..f1412db 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/MetadataConstants.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/MetadataConstants.java
@@ -19,15 +19,20 @@
 
 package org.apache.asterix.metadata.utils;
 
+import java.util.regex.Pattern;
+
 import org.apache.asterix.common.metadata.DataverseName;
+import org.apache.commons.lang3.SystemUtils;
 
 /**
  * Contains metadata constants
  */
 public class MetadataConstants {
 
-    public static final int METADATA_OBJECT_NAME_LENGTH_LIMIT_UTF8 = 255;
-    public static final int DATAVERSE_NAME_TOTAL_LENGTH_LIMIT_UTF8 = 1023;
+    public static final int METADATA_OBJECT_NAME_LENGTH_LIMIT_UTF8 = 251;
+    public static final int DATAVERSE_NAME_TOTAL_LENGTH_LIMIT_UTF8 = METADATA_OBJECT_NAME_LENGTH_LIMIT_UTF8 * 4;
+    public static final Pattern METADATA_OBJECT_NAME_INVALID_CHARS =
+            Pattern.compile(SystemUtils.IS_OS_WINDOWS ? "[\u0000-\u001F\u007F\"*/:<>\\\\|+,.;=\\[\\]\n]" : "[\u0000/]");
 
     // Name of the dataverse the metadata lives in.
     public static final DataverseName METADATA_DATAVERSE_NAME = DataverseName.createBuiltinDataverseName("Metadata");
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java
index b93674c..c85f661 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java
@@ -48,11 +48,10 @@
             DataverseName dataverseName) {
         List<FileSplit> splits = new ArrayList<>();
         // get all partitions
-        ClusterPartition[] clusterPartition = clusterStateManager.getClusterPartitons();
-        for (int j = 0; j < clusterPartition.length; j++) {
-            File f = new File(StoragePathUtil.prepareStoragePartitionPath(clusterPartition[j].getPartitionId()),
-                    dataverseName.getCanonicalForm()); //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT
-            splits.add(StoragePathUtil.getFileSplitForClusterPartition(clusterPartition[j], f.getPath()));
+        for (ClusterPartition clusterPartition : clusterStateManager.getClusterPartitons()) {
+            File f = new File(StoragePathUtil.prepareStoragePartitionPath(clusterPartition.getPartitionId()),
+                    StoragePathUtil.prepareDataverseName(dataverseName));
+            splits.add(StoragePathUtil.getFileSplitForClusterPartition(clusterPartition, f.getPath()));
         }
         return splits.toArray(new FileSplit[] {});
     }