[ASTERIXDB-3196][*DB] Support S3 objects with special chars
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- Encode/decode S3 URIs to support objects with special chars.
- Add test case.
Change-Id: I75879b117c8b5bb761e138e6657ae1261a08f8de
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17605
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
Reviewed-by: Wail Alkowaileet <wael.y.k@gmail.com>
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/cloud_storage/CloudStorageTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/cloud_storage/CloudStorageTest.java
index 505e239..f3fd805 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/cloud_storage/CloudStorageTest.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/cloud_storage/CloudStorageTest.java
@@ -26,7 +26,7 @@
import org.apache.asterix.test.runtime.LangExecutionUtil;
import org.apache.asterix.testframework.context.TestCaseContext;
import org.junit.AfterClass;
-import org.junit.Before;
+import org.junit.BeforeClass;
import org.junit.FixMethodOrder;
import org.junit.Test;
import org.junit.runner.RunWith;
@@ -51,8 +51,8 @@
this.tcCtx = tcCtx;
}
- @Before
- public void setUp() throws Exception {
+ @BeforeClass
+ public static void setUp() throws Exception {
CloudUtils.startS3CloudEnvironment();
LangExecutionUtil.setUp(CONFIG_FILE_NAME, testExecutor);
System.setProperty(GlobalConfig.CONFIG_FILE_PROPERTY, CONFIG_FILE_NAME);
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.000.ddl.sqlpp
new file mode 100644
index 0000000..18a98e3
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.000.ddl.sqlpp
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE `part1`.`p%r t2` IF EXISTS;
+CREATE DATAVERSE `part1`.`p%r t2`;
+
+USE `part1`.`p%r t2`;
+CREATE COLLECTION `some@dataset` PRIMARY KEY (id: int);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.001.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.001.update.sqlpp
new file mode 100644
index 0000000..3629d7f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.001.update.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE `part1`.`p%r t2`;
+
+UPSERT INTO `some@dataset` {"id": 1};
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.002.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.002.query.sqlpp
new file mode 100644
index 0000000..ff2ab8f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.002.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE `part1`.`p%r t2`;
+
+SELECT VALUE COUNT(*)
+FROM `some@dataset`;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.999.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.999.ddl.sqlpp
new file mode 100644
index 0000000..3580ae9
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/cloud_storage/special-chars/test.999.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE `part1`.`p%r t2` IF EXISTS;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/cloud_storage/special-chars/result.002.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/cloud_storage/special-chars/result.002.adm
new file mode 100644
index 0000000..56a6051
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/cloud_storage/special-chars/result.002.adm
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_cloud_storage.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_cloud_storage.xml
index 5360f43..9c1d924 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_cloud_storage.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_cloud_storage.xml
@@ -24,5 +24,10 @@
<output-dir compare="Text">query</output-dir>
</compilation-unit>
</test-case>
+ <test-case FilePath="cloud_storage">
+ <compilation-unit name="special-chars">
+ <output-dir compare="Text">special-chars</output-dir>
+ </compilation-unit>
+ </test-case>
</test-group>
</test-suite>
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java
index 97b164f..6d65f69 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java
@@ -223,7 +223,7 @@
private Set<String> filterAndGet(List<S3Object> contents, FilenameFilter filter) {
Set<String> files = new HashSet<>();
for (S3Object s3Object : contents) {
- String path = s3Object.key();
+ String path = S3Utils.decodeURI(s3Object.key());
if (filter.accept(null, IoUtil.getFileNameFromPath(path))) {
files.add(path);
}
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3Utils.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3Utils.java
index 996b22d..e3e9e98 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3Utils.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3Utils.java
@@ -18,6 +18,10 @@
*/
package org.apache.asterix.cloud.clients.aws.s3;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URLDecoder;
+import java.nio.charset.Charset;
import java.util.List;
import software.amazon.awssdk.services.s3.S3Client;
@@ -33,11 +37,9 @@
public static List<S3Object> listS3Objects(S3Client s3Client, String bucket, String path) {
String newMarker = null;
-
ListObjectsV2Response listObjectsResponse;
ListObjectsV2Request.Builder listObjectsBuilder = ListObjectsV2Request.builder().bucket(bucket);
- listObjectsBuilder.prefix(path);
-
+ listObjectsBuilder.prefix(encodeURI(path));
while (true) {
// List the objects from the start, or from the last marker in case of truncated result
if (newMarker == null) {
@@ -55,4 +57,19 @@
}
return listObjectsResponse.contents();
}
+
+ public static String encodeURI(String path) {
+ if (path.isEmpty()) {
+ return path;
+ }
+ try {
+ return new URI("s3", "//", path).getRawFragment();
+ } catch (URISyntaxException e) {
+ throw new IllegalArgumentException(e);
+ }
+ }
+
+ public static String decodeURI(String path) {
+ return URLDecoder.decode(path, Charset.defaultCharset());
+ }
}