[ASTERIXDB-3393][STO] Refactor Cloud writer
- user model changes: no
- storage format changes: no
- interface changes: yes
Details:
Introduce ICloudWriter, which abstracts the write
operations, in preparation for other cloud providers.
Change-Id: I52124696b50d6dcc1f3c65b7e0fe251df1579ac5
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18259
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
Tested-by: Ian Maxon <imaxon@uci.edu>
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
index 368be26..7ab8a5a 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
@@ -34,6 +34,7 @@
import org.apache.asterix.cloud.bulk.NoOpDeleteBulkCallBack;
import org.apache.asterix.cloud.clients.CloudClientProvider;
import org.apache.asterix.cloud.clients.ICloudClient;
+import org.apache.asterix.cloud.clients.ICloudWriter;
import org.apache.asterix.cloud.util.CloudFileUtil;
import org.apache.asterix.common.api.INamespacePathResolver;
import org.apache.asterix.common.cloud.IPartitionBootstrapper;
@@ -71,7 +72,7 @@
this.bucket = cloudProperties.getStorageBucket();
cloudClient = CloudClientProvider.getClient(cloudProperties);
int numOfThreads = getIODevices().size() * getIOParallelism();
- writeBufferProvider = new WriteBufferProvider(numOfThreads);
+ writeBufferProvider = new WriteBufferProvider(numOfThreads, cloudClient.getWriteBufferSize());
partitions = new HashSet<>();
partitionPaths = new ArrayList<>();
this.localIoManager = ioManager;
@@ -165,8 +166,9 @@
@Override
public final IFileHandle open(FileReference fileRef, FileReadWriteMode rwMode, FileSyncMode syncMode)
throws HyracksDataException {
- CloudFileHandle fHandle = new CloudFileHandle(cloudClient, bucket, fileRef, writeBufferProvider);
- onOpen(fHandle, rwMode, syncMode);
+ ICloudWriter cloudWriter = cloudClient.createdWriter(bucket, fileRef.getRelativePath(), writeBufferProvider);
+ CloudFileHandle fHandle = new CloudFileHandle(fileRef, cloudWriter);
+ onOpen(fHandle);
try {
fHandle.open(rwMode, syncMode);
} catch (IOException e) {
@@ -180,18 +182,17 @@
*
* @param fileHandle file to open
*/
- protected abstract void onOpen(CloudFileHandle fileHandle, FileReadWriteMode rwMode, FileSyncMode syncMode)
- throws HyracksDataException;
+ protected abstract void onOpen(CloudFileHandle fileHandle) throws HyracksDataException;
@Override
public final long doSyncWrite(IFileHandle fHandle, long offset, ByteBuffer[] dataArray)
throws HyracksDataException {
long writtenBytes = localIoManager.doSyncWrite(fHandle, offset, dataArray);
- CloudResettableInputStream inputStream = ((CloudFileHandle) fHandle).getInputStream();
+ ICloudWriter cloudWriter = ((CloudFileHandle) fHandle).getCloudWriter();
try {
- inputStream.write(dataArray[0], dataArray[1]);
+ cloudWriter.write(dataArray[0], dataArray[1]);
} catch (HyracksDataException e) {
- inputStream.abort();
+ cloudWriter.abort();
throw e;
}
return writtenBytes;
@@ -200,11 +201,11 @@
@Override
public final int doSyncWrite(IFileHandle fHandle, long offset, ByteBuffer dataArray) throws HyracksDataException {
int writtenBytes = localIoManager.doSyncWrite(fHandle, offset, dataArray);
- CloudResettableInputStream inputStream = ((CloudFileHandle) fHandle).getInputStream();
+ ICloudWriter cloudWriter = ((CloudFileHandle) fHandle).getCloudWriter();
try {
- inputStream.write(dataArray);
+ cloudWriter.write(dataArray);
} catch (HyracksDataException e) {
- inputStream.abort();
+ cloudWriter.abort();
throw e;
}
return writtenBytes;
@@ -231,16 +232,16 @@
if (metadata) {
// only finish writing if metadata == true to prevent write limiter from finishing the stream and
// completing the upload.
- CloudResettableInputStream stream = ((CloudFileHandle) fileHandle).getInputStream();
+ ICloudWriter cloudWriter = ((CloudFileHandle) fileHandle).getCloudWriter();
try {
- stream.finish();
+ cloudWriter.finish();
} catch (HyracksDataException e) {
savedEx = e;
}
if (savedEx != null) {
try {
- stream.abort();
+ cloudWriter.abort();
} catch (HyracksDataException e) {
savedEx.addSuppressed(e);
}
@@ -286,7 +287,7 @@
/**
* Writes the bytes to the specified key in the bucket
*
- * @param key the key where the bytes will be written
+ * @param key the key where the bytes will be written
* @param bytes the bytes to write
*/
public final void put(String key, byte[] bytes) {
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/CloudFileHandle.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/CloudFileHandle.java
index 14c44ad..0ae93cf 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/CloudFileHandle.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/CloudFileHandle.java
@@ -20,20 +20,17 @@
import java.io.IOException;
-import org.apache.asterix.cloud.clients.ICloudBufferedWriter;
-import org.apache.asterix.cloud.clients.ICloudClient;
+import org.apache.asterix.cloud.clients.ICloudWriter;
import org.apache.hyracks.api.io.FileReference;
import org.apache.hyracks.api.io.IIOManager;
import org.apache.hyracks.control.nc.io.FileHandle;
public class CloudFileHandle extends FileHandle {
- private final CloudResettableInputStream inputStream;
+ private final ICloudWriter cloudWriter;
- public CloudFileHandle(ICloudClient cloudClient, String bucket, FileReference fileRef,
- IWriteBufferProvider bufferProvider) {
+ public CloudFileHandle(FileReference fileRef, ICloudWriter cloudWriter) {
super(fileRef);
- ICloudBufferedWriter bufferedWriter = cloudClient.createBufferedWriter(bucket, fileRef.getRelativePath());
- inputStream = new CloudResettableInputStream(bufferedWriter, bufferProvider);
+ this.cloudWriter = cloudWriter;
}
@Override
@@ -43,13 +40,7 @@
}
}
- @Override
- public synchronized void close() throws IOException {
- inputStream.close();
- super.close();
- }
-
- public CloudResettableInputStream getInputStream() {
- return inputStream;
+ public ICloudWriter getCloudWriter() {
+ return cloudWriter;
}
}
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/CloudOutputStream.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/CloudOutputStream.java
index 349b1b1..bea91fb 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/CloudOutputStream.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/CloudOutputStream.java
@@ -21,29 +21,31 @@
import java.io.IOException;
import java.io.OutputStream;
-public final class CloudOutputStream extends OutputStream {
- private final CloudResettableInputStream inputStream;
+import org.apache.asterix.cloud.clients.ICloudWriter;
- public CloudOutputStream(CloudResettableInputStream inputStream) {
- this.inputStream = inputStream;
+public final class CloudOutputStream extends OutputStream {
+ private final ICloudWriter cloudWriter;
+
+ public CloudOutputStream(ICloudWriter cloudWriter) {
+ this.cloudWriter = cloudWriter;
}
@Override
public void write(byte[] b, int off, int len) throws IOException {
- inputStream.write(b, off, len);
+ cloudWriter.write(b, off, len);
}
@Override
public void write(int b) throws IOException {
- inputStream.write(b);
+ cloudWriter.write(b);
}
@Override
public void close() throws IOException {
- inputStream.finish();
+ cloudWriter.finish();
}
public void abort() throws IOException {
- inputStream.abort();
+ cloudWriter.abort();
}
}
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/CloudResettableInputStream.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/CloudResettableInputStream.java
index 0533184..885d612 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/CloudResettableInputStream.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/CloudResettableInputStream.java
@@ -23,14 +23,13 @@
import java.nio.ByteBuffer;
import org.apache.asterix.cloud.clients.ICloudBufferedWriter;
+import org.apache.asterix.cloud.clients.ICloudWriter;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
-public class CloudResettableInputStream extends InputStream {
+public class CloudResettableInputStream extends InputStream implements ICloudWriter {
private static final Logger LOGGER = LogManager.getLogger();
- // TODO: make configurable
- public static final int MIN_BUFFER_SIZE = 5 * 1024 * 1024;
private final IWriteBufferProvider bufferProvider;
private ByteBuffer writeBuffer;
@@ -41,12 +40,10 @@
this.bufferProvider = bufferProvider;
}
- private void open() {
- if (writeBuffer == null) {
- writeBuffer = bufferProvider.getBuffer();
- writeBuffer.clear();
- }
- }
+ /* ************************************************************
+ * InputStream methods
+ * ************************************************************
+ */
@Override
public void reset() {
@@ -63,16 +60,23 @@
writeBuffer.mark();
}
- public void write(ByteBuffer header, ByteBuffer page) throws HyracksDataException {
- write(header);
- write(page);
+ /* ************************************************************
+ * ICloudWriter methods
+ * ************************************************************
+ */
+
+ @Override
+ public int write(ByteBuffer header, ByteBuffer page) throws HyracksDataException {
+ return write(header) + write(page);
}
+ @Override
public int write(ByteBuffer page) throws HyracksDataException {
open();
return write(page.array(), 0, page.limit());
}
+ @Override
public void write(int b) throws HyracksDataException {
if (writeBuffer.remaining() == 0) {
uploadAndWait();
@@ -80,6 +84,7 @@
writeBuffer.put((byte) b);
}
+ @Override
public int write(byte[] b, int off, int len) throws HyracksDataException {
open();
@@ -108,6 +113,23 @@
return len;
}
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException {
+ if (writeBuffer.remaining() == 0) {
+ return -1;
+ }
+
+ int length = Math.min(len, writeBuffer.remaining());
+ writeBuffer.get(b, off, length);
+ return length;
+ }
+
+ @Override
+ public int read() throws IOException {
+ return writeBuffer.get();
+ }
+
+ @Override
public void finish() throws HyracksDataException {
open();
try {
@@ -124,14 +146,32 @@
} finally {
returnBuffer();
}
+ doClose();
}
+ @Override
public void abort() throws HyracksDataException {
try {
bufferedWriter.abort();
} finally {
returnBuffer();
}
+ doClose();
+ }
+
+ private void open() {
+ if (writeBuffer == null) {
+ writeBuffer = bufferProvider.getBuffer();
+ writeBuffer.clear();
+ }
+ }
+
+ private void doClose() throws HyracksDataException {
+ try {
+ close();
+ } catch (IOException e) {
+ throw HyracksDataException.create(e);
+ }
}
private void uploadAndWait() throws HyracksDataException {
@@ -146,22 +186,6 @@
writeBuffer.clear();
}
- @Override
- public int read(byte[] b, int off, int len) throws IOException {
- if (writeBuffer.remaining() == 0) {
- return -1;
- }
-
- int length = Math.min(len, writeBuffer.remaining());
- writeBuffer.get(b, off, length);
- return length;
- }
-
- @Override
- public int read() throws IOException {
- return writeBuffer.get();
- }
-
private void returnBuffer() {
if (writeBuffer != null) {
bufferProvider.recycle(writeBuffer);
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java
index d0b982c..0b4200c 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java
@@ -65,7 +65,7 @@
}
@Override
- protected void onOpen(CloudFileHandle fileHandle, FileReadWriteMode rwMode, FileSyncMode syncMode) {
+ protected void onOpen(CloudFileHandle fileHandle) {
// NoOp
}
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java
index 6ecd201..cb47d00 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java
@@ -136,9 +136,8 @@
}
@Override
- protected void onOpen(CloudFileHandle fileHandle, FileReadWriteMode rwMode, FileSyncMode syncMode)
- throws HyracksDataException {
- accessor.doOnOpen(fileHandle, rwMode, syncMode);
+ protected void onOpen(CloudFileHandle fileHandle) throws HyracksDataException {
+ accessor.doOnOpen(fileHandle);
}
/*
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/WriteBufferProvider.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/WriteBufferProvider.java
index ee17400..d1eb1ee 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/WriteBufferProvider.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/WriteBufferProvider.java
@@ -18,8 +18,6 @@
*/
package org.apache.asterix.cloud;
-import static org.apache.asterix.cloud.CloudResettableInputStream.MIN_BUFFER_SIZE;
-
import java.nio.ByteBuffer;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
@@ -27,11 +25,13 @@
import org.apache.hyracks.util.annotations.ThreadSafe;
@ThreadSafe
-public class WriteBufferProvider implements IWriteBufferProvider {
+public final class WriteBufferProvider implements IWriteBufferProvider {
+ private final int bufferSize;
private final BlockingQueue<ByteBuffer> writeBuffers;
- public WriteBufferProvider(int ioParallelism) {
- writeBuffers = new ArrayBlockingQueue<>(ioParallelism);
+ public WriteBufferProvider(int numberOfBuffers, int bufferSize) {
+ this.bufferSize = bufferSize;
+ writeBuffers = new ArrayBlockingQueue<>(numberOfBuffers);
}
@Override
@@ -43,7 +43,7 @@
public ByteBuffer getBuffer() {
ByteBuffer writeBuffer = writeBuffers.poll();
if (writeBuffer == null) {
- return ByteBuffer.allocate(MIN_BUFFER_SIZE);
+ return ByteBuffer.allocate(bufferSize);
}
return writeBuffer;
}
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/WriterSingleBufferProvider.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/WriterSingleBufferProvider.java
index 287900d..fab1cc2 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/WriterSingleBufferProvider.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/WriterSingleBufferProvider.java
@@ -18,19 +18,16 @@
*/
package org.apache.asterix.cloud;
-import static org.apache.asterix.cloud.CloudResettableInputStream.MIN_BUFFER_SIZE;
-
import java.nio.ByteBuffer;
import org.apache.hyracks.util.annotations.NotThreadSafe;
@NotThreadSafe
-public class WriterSingleBufferProvider implements IWriteBufferProvider {
-
+public final class WriterSingleBufferProvider implements IWriteBufferProvider {
private final ByteBuffer buffer;
- public WriterSingleBufferProvider() {
- buffer = ByteBuffer.allocate(MIN_BUFFER_SIZE);
+ public WriterSingleBufferProvider(int size) {
+ buffer = ByteBuffer.allocate(size);
}
@Override
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/CloudClientProvider.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/CloudClientProvider.java
index cc511c7..35ab467 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/CloudClientProvider.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/CloudClientProvider.java
@@ -26,6 +26,8 @@
import org.apache.hyracks.api.exceptions.HyracksDataException;
public class CloudClientProvider {
+ private static final String S3 = "s3";
+ private static final String GCS = "gcs";
private CloudClientProvider() {
throw new AssertionError("do not instantiate");
@@ -33,10 +35,10 @@
public static ICloudClient getClient(CloudProperties cloudProperties) throws HyracksDataException {
String storageScheme = cloudProperties.getStorageScheme();
- if ("s3".equalsIgnoreCase(storageScheme)) {
+ if (S3.equalsIgnoreCase(storageScheme)) {
S3ClientConfig config = S3ClientConfig.of(cloudProperties);
return new S3CloudClient(config);
- } else if ("gcs".equalsIgnoreCase(storageScheme)) {
+ } else if (GCS.equalsIgnoreCase(storageScheme)) {
GCSClientConfig config = GCSClientConfig.of(cloudProperties);
return new GCSCloudClient(config);
}
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/ICloudClient.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/ICloudClient.java
index 2bd0802..0307846 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/ICloudClient.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/ICloudClient.java
@@ -24,6 +24,7 @@
import java.util.Collection;
import java.util.Set;
+import org.apache.asterix.cloud.IWriteBufferProvider;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.io.FileReference;
import org.apache.hyracks.control.nc.io.IOManager;
@@ -35,15 +36,20 @@
* Interface containing methods to perform IO operation on the Cloud Storage
*/
public interface ICloudClient {
+ /**
+ * @return write buffer size
+ */
+ int getWriteBufferSize();
/**
* Creates a cloud buffered writer
*
- * @param bucket bucket to write to
- * @param path path to write to
- * @return buffered writer
+ * @param bucket bucket to write to
+ * @param path path to write to
+ * @param bufferProvider buffer provider
+ * @return cloud writer
*/
- ICloudBufferedWriter createBufferedWriter(String bucket, String path);
+ ICloudWriter createdWriter(String bucket, String path, IWriteBufferProvider bufferProvider);
/**
* Lists objects at the specified bucket and path, and applies the file name filter on the returned objects
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/ICloudWriter.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/ICloudWriter.java
new file mode 100644
index 0000000..15822c4
--- /dev/null
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/ICloudWriter.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.cloud.clients;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+/**
+ * A cloud-based writer that write bytes sequentially in a cloud blob storage
+ */
+public interface ICloudWriter {
+ /**
+ * Write a header and a page
+ *
+ * @param header to write
+ * @param page to write
+ * @return written bytes
+ */
+ int write(ByteBuffer header, ByteBuffer page) throws HyracksDataException;
+
+ /**
+ * Write a page
+ *
+ * @param page to write
+ * @return written bytes
+ */
+ int write(ByteBuffer page) throws HyracksDataException;
+
+ /**
+ * Write a byte
+ *
+ * @param b to write
+ */
+ void write(int b) throws HyracksDataException;
+
+ /**
+ * Write a byte array
+ *
+ * @param b bytes to write
+ * @param off starting offset
+ * @param len length to write
+ * @return written bytes
+ */
+ int write(byte[] b, int off, int len) throws HyracksDataException;
+
+ /**
+ * Finish the write operation
+ * Note: this should be called upon successful write
+ */
+ void finish() throws HyracksDataException;
+
+ /**
+ * Abort the write operation
+ * Note: should be called instead of {@link #finish()} when the write operation encountered an error
+ */
+ void abort() throws HyracksDataException;
+}
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java
index bc13078..161fb37 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java
@@ -22,12 +22,14 @@
import org.apache.asterix.common.config.CloudProperties;
import org.apache.asterix.external.util.aws.s3.S3Constants;
+import org.apache.hyracks.util.StorageUtil;
import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider;
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider;
public final class S3ClientConfig {
+ static final int WRITE_BUFFER_SIZE = StorageUtil.getIntSizeInBytes(5, StorageUtil.StorageUnit.MEGABYTE);
// The maximum number of file that can be deleted (AWS restriction)
static final int DELETE_BATCH_SIZE = 1000;
private final String region;
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java
index 5cdf971..9c31e17 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java
@@ -35,8 +35,11 @@
import java.util.List;
import java.util.Set;
+import org.apache.asterix.cloud.CloudResettableInputStream;
+import org.apache.asterix.cloud.IWriteBufferProvider;
import org.apache.asterix.cloud.clients.ICloudBufferedWriter;
import org.apache.asterix.cloud.clients.ICloudClient;
+import org.apache.asterix.cloud.clients.ICloudWriter;
import org.apache.asterix.cloud.clients.IParallelDownloader;
import org.apache.asterix.cloud.clients.profiler.CountRequestProfiler;
import org.apache.asterix.cloud.clients.profiler.IRequestProfiler;
@@ -69,7 +72,7 @@
import software.amazon.awssdk.services.s3.model.S3Object;
@ThreadSafe
-public class S3CloudClient implements ICloudClient {
+public final class S3CloudClient implements ICloudClient {
private final S3ClientConfig config;
private final S3Client s3Client;
private final IRequestProfiler profiler;
@@ -90,8 +93,14 @@
}
@Override
- public ICloudBufferedWriter createBufferedWriter(String bucket, String path) {
- return new S3BufferedWriter(s3Client, profiler, bucket, path);
+ public int getWriteBufferSize() {
+ return S3ClientConfig.WRITE_BUFFER_SIZE;
+ }
+
+ @Override
+ public ICloudWriter createdWriter(String bucket, String path, IWriteBufferProvider bufferProvider) {
+ ICloudBufferedWriter bufferedWriter = new S3BufferedWriter(s3Client, profiler, bucket, path);
+ return new CloudResettableInputStream(bufferedWriter, bufferProvider);
}
@Override
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/google/gcs/GCSClientConfig.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/google/gcs/GCSClientConfig.java
index e8e4480..4edb7a7 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/google/gcs/GCSClientConfig.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/google/gcs/GCSClientConfig.java
@@ -25,12 +25,14 @@
import org.apache.asterix.common.config.CloudProperties;
import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.util.StorageUtil;
import com.google.auth.oauth2.GoogleCredentials;
import com.google.auth.oauth2.OAuth2Credentials;
import com.google.cloud.NoCredentials;
public class GCSClientConfig {
+ public static final int WRITE_BUFFER_SIZE = StorageUtil.getIntSizeInBytes(1, StorageUtil.StorageUnit.MEGABYTE);
// The maximum number of files that can be deleted (GCS restriction): https://cloud.google.com/storage/quotas#json-requests
static final int DELETE_BATCH_SIZE = 100;
private final String region;
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/google/gcs/GCSCloudClient.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/google/gcs/GCSCloudClient.java
index 2b7303d..c725ca5 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/google/gcs/GCSCloudClient.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/google/gcs/GCSCloudClient.java
@@ -32,8 +32,9 @@
import java.util.List;
import java.util.Set;
-import org.apache.asterix.cloud.clients.ICloudBufferedWriter;
+import org.apache.asterix.cloud.IWriteBufferProvider;
import org.apache.asterix.cloud.clients.ICloudClient;
+import org.apache.asterix.cloud.clients.ICloudWriter;
import org.apache.asterix.cloud.clients.IParallelDownloader;
import org.apache.asterix.cloud.clients.profiler.CountRequestProfiler;
import org.apache.asterix.cloud.clients.profiler.IRequestProfiler;
@@ -60,7 +61,6 @@
import com.google.cloud.storage.StorageOptions;
public class GCSCloudClient implements ICloudClient {
-
private final Storage gcsClient;
private final GCSClientConfig config;
private final IRequestProfiler profiler;
@@ -80,18 +80,14 @@
this(config, buildClient(config));
}
- private static Storage buildClient(GCSClientConfig config) throws HyracksDataException {
- StorageOptions.Builder builder = StorageOptions.newBuilder().setCredentials(config.createCredentialsProvider());
-
- if (config.getEndpoint() != null && !config.getEndpoint().isEmpty()) {
- builder.setHost(config.getEndpoint());
- }
- return builder.build().getService();
+ @Override
+ public int getWriteBufferSize() {
+ return GCSClientConfig.WRITE_BUFFER_SIZE;
}
@Override
- public ICloudBufferedWriter createBufferedWriter(String bucket, String path) {
- return new GCSBufferedWriter(bucket, path, gcsClient, profiler);
+ public ICloudWriter createdWriter(String bucket, String path, IWriteBufferProvider bufferProvider) {
+ return new GCSWriter(bucket, path, gcsClient, profiler);
}
@Override
@@ -115,12 +111,10 @@
BlobId blobId = BlobId.of(bucket, path);
long readTo = offset + buffer.remaining();
int totalRead = 0;
- int read = 0;
try (ReadChannel from = gcsClient.reader(blobId).limit(readTo)) {
while (buffer.remaining() > 0) {
from.seek(offset + totalRead);
- read = from.read(buffer);
- totalRead += read;
+ totalRead += from.read(buffer);
}
} catch (IOException | StorageException ex) {
throw HyracksDataException.create(ex);
@@ -248,4 +242,13 @@
throw HyracksDataException.create(ex);
}
}
+
+ private static Storage buildClient(GCSClientConfig config) throws HyracksDataException {
+ StorageOptions.Builder builder = StorageOptions.newBuilder().setCredentials(config.createCredentialsProvider());
+
+ if (config.getEndpoint() != null && !config.getEndpoint().isEmpty()) {
+ builder.setHost(config.getEndpoint());
+ }
+ return builder.build().getService();
+ }
}
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/google/gcs/GCSBufferedWriter.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/google/gcs/GCSWriter.java
similarity index 74%
rename from asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/google/gcs/GCSBufferedWriter.java
rename to asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/google/gcs/GCSWriter.java
index 4f9d437..cccd9ec 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/google/gcs/GCSBufferedWriter.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/google/gcs/GCSWriter.java
@@ -18,13 +18,12 @@
*/
package org.apache.asterix.cloud.clients.google.gcs;
-import static org.apache.asterix.cloud.CloudResettableInputStream.MIN_BUFFER_SIZE;
+import static org.apache.asterix.cloud.clients.google.gcs.GCSClientConfig.WRITE_BUFFER_SIZE;
import java.io.IOException;
-import java.io.InputStream;
import java.nio.ByteBuffer;
-import org.apache.asterix.cloud.clients.ICloudBufferedWriter;
+import org.apache.asterix.cloud.clients.ICloudWriter;
import org.apache.asterix.cloud.clients.profiler.IRequestProfiler;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.logging.log4j.LogManager;
@@ -35,17 +34,16 @@
import com.google.cloud.storage.BlobInfo;
import com.google.cloud.storage.Storage;
-public class GCSBufferedWriter implements ICloudBufferedWriter {
+public class GCSWriter implements ICloudWriter {
private static final Logger LOGGER = LogManager.getLogger();
private final String bucket;
private final String path;
private final IRequestProfiler profiler;
private final Storage gcsClient;
private boolean uploadStarted = false;
- private int partNumber;
private WriteChannel writer = null;
- public GCSBufferedWriter(String bucket, String path, Storage gcsClient, IRequestProfiler profiler) {
+ public GCSWriter(String bucket, String path, Storage gcsClient, IRequestProfiler profiler) {
this.bucket = bucket;
this.path = path;
this.profiler = profiler;
@@ -53,30 +51,39 @@
}
@Override
- public int upload(InputStream stream, int length) throws HyracksDataException {
+ public int write(ByteBuffer header, ByteBuffer page) throws HyracksDataException {
+ return write(header) + write(page);
+ }
+
+ @Override
+ public int write(ByteBuffer page) throws HyracksDataException {
profiler.objectMultipartUpload();
setUploadId();
+ int written = 0;
try {
- ByteBuffer buffer = ByteBuffer.wrap(stream.readNBytes(length));
- while (buffer.hasRemaining()) {
- writer.write(buffer);
+ while (page.hasRemaining()) {
+ written += writer.write(page);
}
} catch (IOException e) {
throw HyracksDataException.create(e);
}
- return partNumber++;
+
+ return written;
}
@Override
- public boolean isEmpty() {
- return !uploadStarted;
+ public int write(byte[] b, int off, int len) throws HyracksDataException {
+ return write(ByteBuffer.wrap(b, off, len));
+ }
+
+ @Override
+ public void write(int b) throws HyracksDataException {
+ write(ByteBuffer.wrap(new byte[] { (byte) b }));
}
@Override
public void finish() throws HyracksDataException {
- if (!uploadStarted) {
- throw new IllegalStateException("Cannot finish without writing any bytes");
- }
+ setUploadId();
profiler.objectMultipartUpload();
try {
writer.close();
@@ -99,9 +106,8 @@
private void setUploadId() {
if (!uploadStarted) {
uploadStarted = true;
- partNumber = 1;
writer = gcsClient.writer(BlobInfo.newBuilder(BlobId.of(bucket, path)).build());
- writer.setChunkSize(MIN_BUFFER_SIZE);
+ writer.setChunkSize(WRITE_BUFFER_SIZE);
log("STARTED");
}
}
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java
index 8f803a0..534ff5d 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java
@@ -25,15 +25,13 @@
import org.apache.asterix.cloud.bulk.IBulkOperationCallBack;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.io.FileReference;
-import org.apache.hyracks.api.io.IIOManager;
public interface ILazyAccessor {
boolean isLocalAccessor();
IBulkOperationCallBack getBulkOperationCallBack();
- void doOnOpen(CloudFileHandle fileHandle, IIOManager.FileReadWriteMode rwMode, IIOManager.FileSyncMode syncMode)
- throws HyracksDataException;
+ void doOnOpen(CloudFileHandle fileHandle) throws HyracksDataException;
Set<FileReference> doList(FileReference dir, FilenameFilter filter) throws HyracksDataException;
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java
index 378cf03..ae32402 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java
@@ -27,7 +27,6 @@
import org.apache.asterix.cloud.clients.ICloudClient;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.io.FileReference;
-import org.apache.hyracks.api.io.IIOManager;
import org.apache.hyracks.control.nc.io.IOManager;
/**
@@ -50,8 +49,7 @@
}
@Override
- public void doOnOpen(CloudFileHandle fileHandle, IIOManager.FileReadWriteMode rwMode,
- IIOManager.FileSyncMode syncMode) throws HyracksDataException {
+ public void doOnOpen(CloudFileHandle fileHandle) throws HyracksDataException {
// NoOp
}
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java
index 277d425..e4e168e 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java
@@ -30,7 +30,6 @@
import org.apache.asterix.common.utils.StoragePathUtil;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.io.FileReference;
-import org.apache.hyracks.api.io.IIOManager;
import org.apache.hyracks.control.nc.io.IOManager;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -70,8 +69,7 @@
}
@Override
- public void doOnOpen(CloudFileHandle fileHandle, IIOManager.FileReadWriteMode rwMode,
- IIOManager.FileSyncMode syncMode) throws HyracksDataException {
+ public void doOnOpen(CloudFileHandle fileHandle) throws HyracksDataException {
FileReference fileRef = fileHandle.getFileReference();
if (!localIoManager.exists(fileRef) && cloudClient.exists(bucket, fileRef.getRelativePath())) {
if (cacher.downloadData(fileRef)) {
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/writer/AbstractCloudExternalFileWriter.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/writer/AbstractCloudExternalFileWriter.java
index bbae29a..4277800 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/writer/AbstractCloudExternalFileWriter.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/writer/AbstractCloudExternalFileWriter.java
@@ -21,11 +21,10 @@
import static org.apache.hyracks.api.util.ExceptionUtils.getMessageOrToString;
import org.apache.asterix.cloud.CloudOutputStream;
-import org.apache.asterix.cloud.CloudResettableInputStream;
import org.apache.asterix.cloud.IWriteBufferProvider;
import org.apache.asterix.cloud.WriterSingleBufferProvider;
-import org.apache.asterix.cloud.clients.ICloudBufferedWriter;
import org.apache.asterix.cloud.clients.ICloudClient;
+import org.apache.asterix.cloud.clients.ICloudWriter;
import org.apache.asterix.common.exceptions.ErrorCode;
import org.apache.asterix.common.exceptions.RuntimeDataException;
import org.apache.asterix.runtime.writer.IExternalFileWriter;
@@ -46,7 +45,7 @@
private final IWarningCollector warningCollector;
private final SourceLocation pathSourceLocation;
private final IWriteBufferProvider bufferProvider;
- private ICloudBufferedWriter bufferedWriter;
+ private ICloudWriter cloudWriter;
AbstractCloudExternalFileWriter(IExternalPrinter printer, ICloudClient cloudClient, String bucket,
boolean partitionedPath, IWarningCollector warningCollector, SourceLocation pathSourceLocation) {
@@ -56,7 +55,7 @@
this.partitionedPath = partitionedPath;
this.warningCollector = warningCollector;
this.pathSourceLocation = pathSourceLocation;
- bufferProvider = new WriterSingleBufferProvider();
+ bufferProvider = new WriterSingleBufferProvider(cloudClient.getWriteBufferSize());
}
@Override
@@ -82,10 +81,8 @@
return false;
}
- bufferedWriter = cloudClient.createBufferedWriter(bucket, fullPath);
- CloudResettableInputStream inputStream = new CloudResettableInputStream(bufferedWriter, bufferProvider);
-
- CloudOutputStream outputStream = new CloudOutputStream(inputStream);
+ cloudWriter = cloudClient.createdWriter(bucket, fullPath, bufferProvider);
+ CloudOutputStream outputStream = new CloudOutputStream(cloudWriter);
printer.newStream(outputStream);
return true;
@@ -108,8 +105,8 @@
@Override
public final void abort() throws HyracksDataException {
try {
- if (bufferedWriter != null) {
- bufferedWriter.abort();
+ if (cloudWriter != null) {
+ cloudWriter.abort();
}
printer.close();
} catch (HyracksDataException e) {
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/writer/AbstractCloudExternalFileWriterFactory.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/writer/AbstractCloudExternalFileWriterFactory.java
new file mode 100644
index 0000000..75c4ec5
--- /dev/null
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/writer/AbstractCloudExternalFileWriterFactory.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.cloud.writer;
+
+import static org.apache.asterix.cloud.writer.AbstractCloudExternalFileWriter.isExceedingMaxLength;
+import static org.apache.hyracks.api.util.ExceptionUtils.getMessageOrToString;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.asterix.cloud.IWriteBufferProvider;
+import org.apache.asterix.cloud.WriterSingleBufferProvider;
+import org.apache.asterix.cloud.clients.ICloudClient;
+import org.apache.asterix.cloud.clients.ICloudWriter;
+import org.apache.asterix.common.exceptions.CompilationException;
+import org.apache.asterix.common.exceptions.ErrorCode;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.runtime.writer.ExternalFileWriterConfiguration;
+import org.apache.asterix.runtime.writer.IExternalFileWriterFactory;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.exceptions.SourceLocation;
+import org.apache.hyracks.api.util.ExceptionUtils;
+import org.apache.hyracks.data.std.primitive.LongPointable;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+abstract class AbstractCloudExternalFileWriterFactory implements IExternalFileWriterFactory {
+ private static final long serialVersionUID = -6204498482419719403L;
+ private static final Logger LOGGER = LogManager.getLogger();
+
+ protected final Map<String, String> configuration;
+ protected final SourceLocation pathSourceLocation;
+ protected final String staticPath;
+ protected transient ICloudClient cloudClient;
+
+ AbstractCloudExternalFileWriterFactory(ExternalFileWriterConfiguration externalConfig) {
+ configuration = externalConfig.getConfiguration();
+ pathSourceLocation = externalConfig.getPathSourceLocation();
+ staticPath = externalConfig.getStaticPath();
+ }
+
+ abstract ICloudClient createCloudClient() throws CompilationException;
+
+ abstract boolean isNoContainerFoundException(IOException e);
+
+ abstract boolean isSdkException(Throwable e);
+
+ final void buildClient() throws HyracksDataException {
+ try {
+ synchronized (this) {
+ if (cloudClient == null) {
+ cloudClient = createCloudClient();
+ }
+ }
+ } catch (CompilationException e) {
+ throw HyracksDataException.create(e);
+ }
+ }
+
+ @Override
+ public final void validate() throws AlgebricksException {
+ ICloudClient testClient = createCloudClient();
+ String bucket = configuration.get(ExternalDataConstants.CONTAINER_NAME_FIELD_NAME);
+
+ if (bucket == null || bucket.isEmpty()) {
+ throw new CompilationException(ErrorCode.PARAMETERS_REQUIRED,
+ ExternalDataConstants.CONTAINER_NAME_FIELD_NAME);
+ }
+
+ try {
+ doValidate(testClient, bucket);
+ } catch (IOException e) {
+ if (isNoContainerFoundException(e)) {
+ throw CompilationException.create(ErrorCode.EXTERNAL_SOURCE_CONTAINER_NOT_FOUND, bucket);
+ } else {
+ throw CompilationException.create(ErrorCode.EXTERNAL_SINK_ERROR,
+ ExceptionUtils.getMessageOrToString(e));
+ }
+ } catch (Throwable e) {
+ if (isSdkException(e)) {
+ throw CompilationException.create(ErrorCode.EXTERNAL_SINK_ERROR, e, getMessageOrToString(e));
+ }
+ throw e;
+ }
+ }
+
+ private void doValidate(ICloudClient testClient, String bucket) throws IOException, AlgebricksException {
+ if (staticPath != null) {
+ if (isExceedingMaxLength(staticPath, S3ExternalFileWriter.MAX_LENGTH_IN_BYTES)) {
+ throw new CompilationException(ErrorCode.WRITE_PATH_LENGTH_EXCEEDS_MAX_LENGTH, pathSourceLocation,
+ staticPath, S3ExternalFileWriter.MAX_LENGTH_IN_BYTES,
+ ExternalDataConstants.KEY_ADAPTER_NAME_AWS_S3);
+ }
+
+ if (!testClient.isEmptyPrefix(bucket, staticPath)) {
+ // Ensure that the static path is empty
+ throw new CompilationException(ErrorCode.DIRECTORY_IS_NOT_EMPTY, pathSourceLocation, staticPath);
+ }
+ }
+
+ // do not validate write permissions if specified by the user not to do so
+ String validateWritePermissions = configuration
+ .getOrDefault(ExternalDataConstants.KEY_VALIDATE_WRITE_PERMISSION, Boolean.TRUE.toString());
+ if (!Boolean.parseBoolean(validateWritePermissions)) {
+ return;
+ }
+
+ Random random = new Random();
+ String pathPrefix = "testFile";
+ String path = pathPrefix + random.nextInt();
+ while (testClient.exists(bucket, path)) {
+ path = pathPrefix + random.nextInt();
+ }
+
+ long writeValue = random.nextLong();
+ byte[] data = new byte[Long.BYTES];
+ LongPointable.setLong(data, 0, writeValue);
+ IWriteBufferProvider bufferProvider = new WriterSingleBufferProvider(testClient.getWriteBufferSize());
+ ICloudWriter writer = testClient.createdWriter(bucket, path, bufferProvider);
+ boolean aborted = false;
+ try {
+ writer.write(data, 0, data.length);
+ } catch (HyracksDataException e) {
+ writer.abort();
+ aborted = true;
+ } finally {
+ if (writer != null && !aborted) {
+ writer.finish();
+ }
+ }
+
+ try {
+ long readValue = LongPointable.getLong(testClient.readAllBytes(bucket, path), 0);
+ if (writeValue != readValue) {
+ // This should never happen unless S3 is messed up. But log for sanity check
+ LOGGER.warn(
+ "The writer can write but the written values wasn't successfully read back (wrote: {}, read:{})",
+ writeValue, readValue);
+ }
+ } finally {
+ // Delete the written file
+ testClient.deleteObjects(bucket, Collections.singleton(path));
+ }
+ }
+}
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/writer/GCSExternalFileWriterFactory.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/writer/GCSExternalFileWriterFactory.java
index ca93ff6..3245869 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/writer/GCSExternalFileWriterFactory.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/writer/GCSExternalFileWriterFactory.java
@@ -18,22 +18,12 @@
*/
package org.apache.asterix.cloud.writer;
-import static org.apache.asterix.cloud.writer.AbstractCloudExternalFileWriter.isExceedingMaxLength;
-import static org.apache.hyracks.api.util.ExceptionUtils.getMessageOrToString;
-
import java.io.IOException;
-import java.util.Collections;
-import java.util.Map;
-import java.util.Random;
-import org.apache.asterix.cloud.CloudResettableInputStream;
-import org.apache.asterix.cloud.WriterSingleBufferProvider;
-import org.apache.asterix.cloud.clients.ICloudBufferedWriter;
import org.apache.asterix.cloud.clients.ICloudClient;
import org.apache.asterix.cloud.clients.google.gcs.GCSClientConfig;
import org.apache.asterix.cloud.clients.google.gcs.GCSCloudClient;
import org.apache.asterix.common.exceptions.CompilationException;
-import org.apache.asterix.common.exceptions.ErrorCode;
import org.apache.asterix.external.util.ExternalDataConstants;
import org.apache.asterix.external.util.google.gcs.GCSUtils;
import org.apache.asterix.runtime.writer.ExternalFileWriterConfiguration;
@@ -42,22 +32,15 @@
import org.apache.asterix.runtime.writer.IExternalFileWriterFactoryProvider;
import org.apache.asterix.runtime.writer.IExternalPrinter;
import org.apache.asterix.runtime.writer.IExternalPrinterFactory;
-import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.api.context.IHyracksTaskContext;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.exceptions.IWarningCollector;
-import org.apache.hyracks.api.exceptions.SourceLocation;
-import org.apache.hyracks.api.util.ExceptionUtils;
-import org.apache.hyracks.data.std.primitive.LongPointable;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
import com.google.cloud.BaseServiceException;
import com.google.cloud.storage.StorageException;
-public final class GCSExternalFileWriterFactory implements IExternalFileWriterFactory {
+public final class GCSExternalFileWriterFactory extends AbstractCloudExternalFileWriterFactory {
private static final long serialVersionUID = 1L;
- private static final Logger LOGGER = LogManager.getLogger();
static final char SEPARATOR = '/';
public static final IExternalFileWriterFactoryProvider PROVIDER = new IExternalFileWriterFactoryProvider() {
@Override
@@ -70,19 +53,29 @@
return SEPARATOR;
}
};
- private final Map<String, String> configuration;
- private final SourceLocation pathSourceLocation;
- private final String staticPath;
- private transient GCSCloudClient cloudClient;
private GCSExternalFileWriterFactory(ExternalFileWriterConfiguration externalConfig) {
- configuration = externalConfig.getConfiguration();
- pathSourceLocation = externalConfig.getPathSourceLocation();
- staticPath = externalConfig.getStaticPath();
+ super(externalConfig);
cloudClient = null;
}
@Override
+ ICloudClient createCloudClient() throws CompilationException {
+ GCSClientConfig config = GCSClientConfig.of(configuration);
+ return new GCSCloudClient(config, GCSUtils.buildClient(configuration));
+ }
+
+ @Override
+ boolean isNoContainerFoundException(IOException e) {
+ return e.getCause() instanceof StorageException;
+ }
+
+ @Override
+ boolean isSdkException(Throwable e) {
+ return e instanceof BaseServiceException;
+ }
+
+ @Override
public IExternalFileWriter createWriter(IHyracksTaskContext context, IExternalPrinterFactory printerFactory)
throws HyracksDataException {
buildClient();
@@ -93,103 +86,8 @@
pathSourceLocation);
}
- private void buildClient() throws HyracksDataException {
- try {
- synchronized (this) {
- if (cloudClient == null) {
- GCSClientConfig config = GCSClientConfig.of(configuration);
- cloudClient = new GCSCloudClient(config, GCSUtils.buildClient(configuration));
- }
- }
- } catch (CompilationException e) {
- throw HyracksDataException.create(e);
- }
- }
-
@Override
public char getSeparator() {
return SEPARATOR;
}
-
- @Override
- public void validate() throws AlgebricksException {
- GCSClientConfig config = GCSClientConfig.of(configuration);
- ICloudClient testClient = new GCSCloudClient(config, GCSUtils.buildClient(configuration));
- String bucket = configuration.get(ExternalDataConstants.CONTAINER_NAME_FIELD_NAME);
-
- if (bucket == null || bucket.isEmpty()) {
- throw new CompilationException(ErrorCode.PARAMETERS_REQUIRED,
- ExternalDataConstants.CONTAINER_NAME_FIELD_NAME);
- }
-
- try {
- doValidate(testClient, bucket);
- } catch (IOException e) {
- if (e.getCause() instanceof StorageException) {
- throw CompilationException.create(ErrorCode.EXTERNAL_SOURCE_CONTAINER_NOT_FOUND, bucket);
- } else {
- throw CompilationException.create(ErrorCode.EXTERNAL_SINK_ERROR,
- ExceptionUtils.getMessageOrToString(e));
- }
- } catch (BaseServiceException e) {
- throw CompilationException.create(ErrorCode.EXTERNAL_SINK_ERROR, e, getMessageOrToString(e));
- }
- }
-
- private void doValidate(ICloudClient testClient, String bucket) throws IOException, AlgebricksException {
- if (staticPath != null) {
- if (isExceedingMaxLength(staticPath, GCSExternalFileWriter.MAX_LENGTH_IN_BYTES)) {
- throw new CompilationException(ErrorCode.WRITE_PATH_LENGTH_EXCEEDS_MAX_LENGTH, pathSourceLocation,
- staticPath, GCSExternalFileWriter.MAX_LENGTH_IN_BYTES,
- ExternalDataConstants.KEY_ADAPTER_NAME_GCS);
- }
-
- if (!testClient.isEmptyPrefix(bucket, staticPath)) {
- throw new CompilationException(ErrorCode.DIRECTORY_IS_NOT_EMPTY, pathSourceLocation, staticPath);
- }
- }
-
- String validateWritePermissions = configuration
- .getOrDefault(ExternalDataConstants.KEY_VALIDATE_WRITE_PERMISSION, Boolean.TRUE.toString());
- if (!Boolean.parseBoolean(validateWritePermissions)) {
- return;
- }
-
- Random random = new Random();
- String pathPrefix = "testFile";
- String path = pathPrefix + random.nextInt();
- while (testClient.exists(bucket, path)) {
- path = pathPrefix + random.nextInt();
- }
-
- long writeValue = random.nextLong();
- byte[] data = new byte[Long.BYTES];
- LongPointable.setLong(data, 0, writeValue);
- ICloudBufferedWriter writer = testClient.createBufferedWriter(bucket, path);
- CloudResettableInputStream stream = null;
- boolean aborted = false;
- try {
- stream = new CloudResettableInputStream(writer, new WriterSingleBufferProvider());
- stream.write(data, 0, data.length);
- } catch (HyracksDataException e) {
- stream.abort();
- aborted = true;
- } finally {
- if (stream != null && !aborted) {
- stream.finish();
- stream.close();
- }
- }
-
- try {
- long readValue = LongPointable.getLong(testClient.readAllBytes(bucket, path), 0);
- if (writeValue != readValue) {
- LOGGER.warn(
- "The writer can write but the written values wasn't successfully read back (wrote: {}, read:{})",
- writeValue, readValue);
- }
- } finally {
- testClient.deleteObjects(bucket, Collections.singleton(path));
- }
- }
}
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/writer/S3ExternalFileWriterFactory.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/writer/S3ExternalFileWriterFactory.java
index cdaa6dc..96aa929 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/writer/S3ExternalFileWriterFactory.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/writer/S3ExternalFileWriterFactory.java
@@ -18,22 +18,12 @@
*/
package org.apache.asterix.cloud.writer;
-import static org.apache.asterix.cloud.writer.AbstractCloudExternalFileWriter.isExceedingMaxLength;
-import static org.apache.hyracks.api.util.ExceptionUtils.getMessageOrToString;
-
import java.io.IOException;
-import java.util.Collections;
-import java.util.Map;
-import java.util.Random;
-import org.apache.asterix.cloud.CloudResettableInputStream;
-import org.apache.asterix.cloud.WriterSingleBufferProvider;
-import org.apache.asterix.cloud.clients.ICloudBufferedWriter;
import org.apache.asterix.cloud.clients.ICloudClient;
import org.apache.asterix.cloud.clients.aws.s3.S3ClientConfig;
import org.apache.asterix.cloud.clients.aws.s3.S3CloudClient;
import org.apache.asterix.common.exceptions.CompilationException;
-import org.apache.asterix.common.exceptions.ErrorCode;
import org.apache.asterix.external.util.ExternalDataConstants;
import org.apache.asterix.external.util.aws.s3.S3Utils;
import org.apache.asterix.runtime.writer.ExternalFileWriterConfiguration;
@@ -42,22 +32,15 @@
import org.apache.asterix.runtime.writer.IExternalFileWriterFactoryProvider;
import org.apache.asterix.runtime.writer.IExternalPrinter;
import org.apache.asterix.runtime.writer.IExternalPrinterFactory;
-import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.api.context.IHyracksTaskContext;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.exceptions.IWarningCollector;
-import org.apache.hyracks.api.exceptions.SourceLocation;
-import org.apache.hyracks.api.util.ExceptionUtils;
-import org.apache.hyracks.data.std.primitive.LongPointable;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
import software.amazon.awssdk.core.exception.SdkException;
import software.amazon.awssdk.services.s3.model.NoSuchBucketException;
-public final class S3ExternalFileWriterFactory implements IExternalFileWriterFactory {
+public final class S3ExternalFileWriterFactory extends AbstractCloudExternalFileWriterFactory {
private static final long serialVersionUID = 4551318140901866805L;
- private static final Logger LOGGER = LogManager.getLogger();
static final char SEPARATOR = '/';
public static final IExternalFileWriterFactoryProvider PROVIDER = new IExternalFileWriterFactoryProvider() {
@Override
@@ -70,19 +53,29 @@
return SEPARATOR;
}
};
- private final Map<String, String> configuration;
- private final SourceLocation pathSourceLocation;
- private final String staticPath;
- private transient S3CloudClient cloudClient;
private S3ExternalFileWriterFactory(ExternalFileWriterConfiguration externalConfig) {
- configuration = externalConfig.getConfiguration();
- pathSourceLocation = externalConfig.getPathSourceLocation();
- staticPath = externalConfig.getStaticPath();
+ super(externalConfig);
cloudClient = null;
}
@Override
+ ICloudClient createCloudClient() throws CompilationException {
+ S3ClientConfig config = S3ClientConfig.of(configuration);
+ return new S3CloudClient(config, S3Utils.buildAwsS3Client(configuration));
+ }
+
+ @Override
+ boolean isNoContainerFoundException(IOException e) {
+ return e.getCause() instanceof NoSuchBucketException;
+ }
+
+ @Override
+ boolean isSdkException(Throwable e) {
+ return e instanceof SdkException;
+ }
+
+ @Override
public IExternalFileWriter createWriter(IHyracksTaskContext context, IExternalPrinterFactory printerFactory)
throws HyracksDataException {
buildClient();
@@ -93,108 +86,8 @@
pathSourceLocation);
}
- private void buildClient() throws HyracksDataException {
- try {
- synchronized (this) {
- if (cloudClient == null) {
- // only a single client should be built
- S3ClientConfig config = S3ClientConfig.of(configuration);
- cloudClient = new S3CloudClient(config, S3Utils.buildAwsS3Client(configuration));
- }
- }
- } catch (CompilationException e) {
- throw HyracksDataException.create(e);
- }
- }
-
@Override
public char getSeparator() {
return SEPARATOR;
}
-
- @Override
- public void validate() throws AlgebricksException {
- S3ClientConfig config = S3ClientConfig.of(configuration);
- ICloudClient testClient = new S3CloudClient(config, S3Utils.buildAwsS3Client(configuration));
- String bucket = configuration.get(ExternalDataConstants.CONTAINER_NAME_FIELD_NAME);
-
- if (bucket == null || bucket.isEmpty()) {
- throw new CompilationException(ErrorCode.PARAMETERS_REQUIRED,
- ExternalDataConstants.CONTAINER_NAME_FIELD_NAME);
- }
-
- try {
- doValidate(testClient, bucket);
- } catch (IOException e) {
- if (e.getCause() instanceof NoSuchBucketException) {
- throw CompilationException.create(ErrorCode.EXTERNAL_SOURCE_CONTAINER_NOT_FOUND, bucket);
- } else {
- throw CompilationException.create(ErrorCode.EXTERNAL_SINK_ERROR,
- ExceptionUtils.getMessageOrToString(e));
- }
- } catch (SdkException e) {
- throw CompilationException.create(ErrorCode.EXTERNAL_SINK_ERROR, e, getMessageOrToString(e));
- }
- }
-
- private void doValidate(ICloudClient testClient, String bucket) throws IOException, AlgebricksException {
- if (staticPath != null) {
- if (isExceedingMaxLength(staticPath, S3ExternalFileWriter.MAX_LENGTH_IN_BYTES)) {
- throw new CompilationException(ErrorCode.WRITE_PATH_LENGTH_EXCEEDS_MAX_LENGTH, pathSourceLocation,
- staticPath, S3ExternalFileWriter.MAX_LENGTH_IN_BYTES,
- ExternalDataConstants.KEY_ADAPTER_NAME_AWS_S3);
- }
-
- if (!testClient.isEmptyPrefix(bucket, staticPath)) {
- // Ensure that the static path is empty
- throw new CompilationException(ErrorCode.DIRECTORY_IS_NOT_EMPTY, pathSourceLocation, staticPath);
- }
- }
-
- // do not validate write permissions if specified by the user not to do so
- String validateWritePermissions = configuration
- .getOrDefault(ExternalDataConstants.KEY_VALIDATE_WRITE_PERMISSION, Boolean.TRUE.toString());
- if (!Boolean.parseBoolean(validateWritePermissions)) {
- return;
- }
-
- Random random = new Random();
- String pathPrefix = "testFile";
- String path = pathPrefix + random.nextInt();
- while (testClient.exists(bucket, path)) {
- path = pathPrefix + random.nextInt();
- }
-
- long writeValue = random.nextLong();
- byte[] data = new byte[Long.BYTES];
- LongPointable.setLong(data, 0, writeValue);
- ICloudBufferedWriter writer = testClient.createBufferedWriter(bucket, path);
- CloudResettableInputStream stream = null;
- boolean aborted = false;
- try {
- stream = new CloudResettableInputStream(writer, new WriterSingleBufferProvider());
- stream.write(data, 0, data.length);
- } catch (HyracksDataException e) {
- stream.abort();
- aborted = true;
- } finally {
- if (stream != null && !aborted) {
- stream.finish();
- stream.close();
- }
- }
-
- try {
- long readValue = LongPointable.getLong(testClient.readAllBytes(bucket, path), 0);
- if (writeValue != readValue) {
- // This should never happen unless S3 is messed up. But log for sanity check
- LOGGER.warn(
- "The writer can write but the written values wasn't successfully read back (wrote: {}, read:{})",
- writeValue, readValue);
- }
- } finally {
- // Delete the written file
- testClient.deleteObjects(bucket, Collections.singleton(path));
- }
- }
}
diff --git a/asterixdb/asterix-cloud/src/test/java/org/apache/asterix/cloud/LSMTest.java b/asterixdb/asterix-cloud/src/test/java/org/apache/asterix/cloud/AbstractLSMTest.java
similarity index 83%
rename from asterixdb/asterix-cloud/src/test/java/org/apache/asterix/cloud/LSMTest.java
rename to asterixdb/asterix-cloud/src/test/java/org/apache/asterix/cloud/AbstractLSMTest.java
index 92d7f12..484f372 100644
--- a/asterixdb/asterix-cloud/src/test/java/org/apache/asterix/cloud/LSMTest.java
+++ b/asterixdb/asterix-cloud/src/test/java/org/apache/asterix/cloud/AbstractLSMTest.java
@@ -23,8 +23,8 @@
import java.nio.ByteBuffer;
import java.util.Collections;
-import org.apache.asterix.cloud.clients.ICloudBufferedWriter;
import org.apache.asterix.cloud.clients.ICloudClient;
+import org.apache.asterix.cloud.clients.ICloudWriter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.junit.FixMethodOrder;
@@ -32,7 +32,7 @@
import org.junit.runners.MethodSorters;
@FixMethodOrder(MethodSorters.NAME_ASCENDING)
-public abstract class LSMTest {
+public abstract class AbstractLSMTest {
public static final Logger LOGGER = LogManager.getLogger();
public static final String BTREE_SUFFIX = "b";
@@ -53,28 +53,23 @@
@Test
public void a1writeToS3Test() throws IOException {
- CloudResettableInputStream stream = null;
+ IWriteBufferProvider bufferProvider = new WriterSingleBufferProvider(CLOUD_CLIENT.getWriteBufferSize());
+ ICloudWriter cloudWriter =
+ CLOUD_CLIENT.createdWriter(PLAYGROUND_CONTAINER, BUCKET_STORAGE_ROOT + "/0_b", bufferProvider);
try {
- ICloudBufferedWriter s3BufferedWriter =
- CLOUD_CLIENT.createBufferedWriter(PLAYGROUND_CONTAINER, BUCKET_STORAGE_ROOT + "/0_b");
- stream = new CloudResettableInputStream(s3BufferedWriter, new WriteBufferProvider(1));
ByteBuffer content = createContent(BUFFER_SIZE);
int size = 0;
for (int i = 0; i < 10; i++) {
content.clear();
- size += stream.write(content);
+ size += cloudWriter.write(content);
}
- stream.finish();
+ cloudWriter.finish();
System.err.println(size);
} catch (Exception e) {
e.printStackTrace();
- if (stream != null) {
- stream.abort();
- }
- } finally {
- if (stream != null) {
- stream.close();
+ if (cloudWriter != null) {
+ cloudWriter.abort();
}
}
}
diff --git a/asterixdb/asterix-cloud/src/test/java/org/apache/asterix/cloud/gcs/LSMGCSTest.java b/asterixdb/asterix-cloud/src/test/java/org/apache/asterix/cloud/gcs/LSMGCSTest.java
index 87a3e29..86bb5ad 100644
--- a/asterixdb/asterix-cloud/src/test/java/org/apache/asterix/cloud/gcs/LSMGCSTest.java
+++ b/asterixdb/asterix-cloud/src/test/java/org/apache/asterix/cloud/gcs/LSMGCSTest.java
@@ -18,7 +18,7 @@
*/
package org.apache.asterix.cloud.gcs;
-import org.apache.asterix.cloud.LSMTest;
+import org.apache.asterix.cloud.AbstractLSMTest;
import org.apache.asterix.cloud.clients.google.gcs.GCSClientConfig;
import org.apache.asterix.cloud.clients.google.gcs.GCSCloudClient;
import org.junit.AfterClass;
@@ -31,7 +31,7 @@
import com.google.cloud.storage.StorageClass;
import com.google.cloud.storage.StorageOptions;
-public class LSMGCSTest extends LSMTest {
+public class LSMGCSTest extends AbstractLSMTest {
private static Storage client;
private static final int MOCK_SERVER_PORT = 4443;
private static final String MOCK_SERVER_HOSTNAME = "http://127.0.0.1:" + MOCK_SERVER_PORT;
diff --git a/asterixdb/asterix-cloud/src/test/java/org/apache/asterix/cloud/s3/LSMS3Test.java b/asterixdb/asterix-cloud/src/test/java/org/apache/asterix/cloud/s3/LSMS3Test.java
index 0452da0..c785e57 100644
--- a/asterixdb/asterix-cloud/src/test/java/org/apache/asterix/cloud/s3/LSMS3Test.java
+++ b/asterixdb/asterix-cloud/src/test/java/org/apache/asterix/cloud/s3/LSMS3Test.java
@@ -20,7 +20,7 @@
import java.net.URI;
-import org.apache.asterix.cloud.LSMTest;
+import org.apache.asterix.cloud.AbstractLSMTest;
import org.apache.asterix.cloud.clients.aws.s3.S3ClientConfig;
import org.apache.asterix.cloud.clients.aws.s3.S3CloudClient;
import org.junit.AfterClass;
@@ -34,7 +34,7 @@
import software.amazon.awssdk.services.s3.model.CreateBucketRequest;
import software.amazon.awssdk.services.s3.model.DeleteBucketRequest;
-public class LSMS3Test extends LSMTest {
+public class LSMS3Test extends AbstractLSMTest {
private static S3Client client;
private static S3Mock s3MockServer;