[ASTERIXDB-3509]: COPY TO CSV Perf Issue
Details: Instance creation of csv printerFactory was getting created for all the data in the records. Change that to single instance creation per query.
Change-Id: I55b114c81707e57307b52a1828952fa56847b2dc
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19354
Reviewed-by: Hussain Towaileb <hussainht@gmail.com>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java
index 3d3ca3e..101d7c2 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java
@@ -19,7 +19,6 @@
package org.apache.asterix.dataflow.data.nontagged.printers.csv;
import java.io.PrintStream;
-import java.util.concurrent.ConcurrentHashMap;
import org.apache.hyracks.algebricks.data.IPrinter;
import org.apache.hyracks.algebricks.data.IPrinterFactory;
@@ -27,8 +26,6 @@
public class ANullPrinterFactory implements IPrinterFactory {
private static final long serialVersionUID = 1L;
private static final String DEFAULT_NULL_STRING = "";
- // Store the information about the instance based on the parameters
- private static final ConcurrentHashMap<String, ANullPrinterFactory> instanceCache = new ConcurrentHashMap<>();
private String nullString;
private ANullPrinterFactory(String nullString) {
@@ -36,8 +33,7 @@
}
public static ANullPrinterFactory createInstance(String nullString) {
- String key = CSVUtils.generateKey(nullString);
- return instanceCache.computeIfAbsent(key, k -> new ANullPrinterFactory(nullString));
+ return new ANullPrinterFactory(nullString);
}
private final IPrinter PRINTER = (byte[] b, int s, int l, PrintStream ps) -> {
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java
index 20b1ef0..fdde82c 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java
@@ -27,7 +27,6 @@
import java.io.PrintStream;
import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
import org.apache.asterix.om.pointables.ARecordVisitablePointable;
import org.apache.asterix.om.pointables.base.DefaultOpenFieldType;
@@ -43,7 +42,8 @@
public class AObjectPrinterFactory implements IPrinterFactory {
private static final long serialVersionUID = 1L;
- private static final ConcurrentHashMap<String, AObjectPrinterFactory> instanceCache = new ConcurrentHashMap<>();
+ private final IPrinter nullPrinter;
+ private final IPrinter stringPrinter;
private ARecordType itemType;
private Map<String, String> configuration;
private boolean emptyFieldAsNull;
@@ -53,12 +53,15 @@
this.configuration = configuration;
String emptyFieldAsNullStr = configuration.get(KEY_EMPTY_FIELD_AS_NULL);
this.emptyFieldAsNull = emptyFieldAsNullStr != null && Boolean.parseBoolean(emptyFieldAsNullStr);
+ this.nullPrinter = ANullPrinterFactory.createInstance(configuration.get(KEY_NULL)).createPrinter();
+ this.stringPrinter =
+ AStringPrinterFactory.createInstance(configuration.get(KEY_QUOTE), configuration.get(KEY_FORCE_QUOTE),
+ configuration.get(KEY_ESCAPE), configuration.get(KEY_DELIMITER)).createPrinter();
+
}
public static AObjectPrinterFactory createInstance(ARecordType itemType, Map<String, String> configuration) {
- // generate a unique identifier based on the parameters and hash the instance corresponding to it.
- String key = CSVUtils.generateKey(itemType, configuration);
- return instanceCache.computeIfAbsent(key, k -> new AObjectPrinterFactory(itemType, configuration));
+ return new AObjectPrinterFactory(itemType, configuration);
}
public boolean printFlatValue(ATypeTag typeTag, byte[] b, int s, int l, PrintStream ps)
@@ -78,7 +81,7 @@
return true;
case MISSING:
case NULL:
- ANullPrinterFactory.createInstance(configuration.get(KEY_NULL)).createPrinter().print(b, s, l, ps);
+ nullPrinter.print(b, s, l, ps);
return true;
case BOOLEAN:
ABooleanPrinterFactory.PRINTER.print(b, s, l, ps);
@@ -130,12 +133,9 @@
return true;
case STRING:
if (emptyFieldAsNull && CSVUtils.isEmptyString(b, s, l)) {
- ANullPrinterFactory.createInstance(configuration.get(KEY_NULL)).createPrinter().print(b, s, l, ps);
+ nullPrinter.print(b, s, l, ps);
} else {
- AStringPrinterFactory
- .createInstance(configuration.get(KEY_QUOTE), configuration.get(KEY_FORCE_QUOTE),
- configuration.get(KEY_ESCAPE), configuration.get(KEY_DELIMITER))
- .createPrinter().print(b, s, l, ps);
+ stringPrinter.print(b, s, l, ps);
}
return true;
case BINARY:
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java
index ae368bd..d17b7cb 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java
@@ -25,7 +25,6 @@
import java.io.IOException;
import java.io.PrintStream;
-import java.util.concurrent.ConcurrentHashMap;
import org.apache.asterix.dataflow.data.nontagged.printers.PrintTools;
import org.apache.hyracks.algebricks.data.IPrinter;
@@ -34,7 +33,6 @@
public class AStringPrinterFactory implements IPrinterFactory {
private static final long serialVersionUID = 1L;
- private static final ConcurrentHashMap<String, AStringPrinterFactory> instanceCache = new ConcurrentHashMap<>();
private static final String NONE = "none";
private String quote;
private Boolean forceQuote;
@@ -51,8 +49,7 @@
public static AStringPrinterFactory createInstance(String quote, String forceQuoteStr, String escape,
String delimiter) {
boolean forceQuote = forceQuoteStr == null || Boolean.parseBoolean(forceQuoteStr);
- String key = CSVUtils.generateKey(quote, forceQuoteStr, escape, delimiter);
- return instanceCache.computeIfAbsent(key, k -> new AStringPrinterFactory(quote, forceQuote, escape, delimiter));
+ return new AStringPrinterFactory(quote, forceQuote, escape, delimiter);
}
private final IPrinter PRINTER = (byte[] b, int s, int l, PrintStream ps) -> {
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java
index 22c502b..bbe5286 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java
@@ -44,6 +44,7 @@
public class APrintVisitor extends AbstractPrintVisitor {
private final ARecordType itemType;
private final Map<String, String> configuration;
+ private AObjectPrinterFactory objectPrinterFactory;
public APrintVisitor(ARecordType itemType, Map<String, String> configuration) {
super();
@@ -67,6 +68,9 @@
@Override
protected boolean printFlatValue(ATypeTag typeTag, byte[] b, int s, int l, PrintStream ps)
throws HyracksDataException {
- return AObjectPrinterFactory.createInstance(itemType, configuration).printFlatValue(typeTag, b, s, l, ps);
+ if (objectPrinterFactory == null) {
+ objectPrinterFactory = AObjectPrinterFactory.createInstance(itemType, configuration);
+ }
+ return objectPrinterFactory.printFlatValue(typeTag, b, s, l, ps);
}
}