Issue 548: Initial implementation of CSV output method.
Can be selected via the HTTP interface by setting the Accept: header to
text/csv.
Displays strings, numerics, booleans, and a couple duration types. Detects
situations that cannot be respresented as CSV (list values, nested records)
and throws an exception.
Introduces "outputRecordType" set option to define a fixed RecordType that
all results will be coerced to, to ensure consistent CSV output.
Added test support for CSV output, with one test case for now.
Change-Id: Ib53da6b3c69e38095bdc684b0e8cd53b9f4b1543
Reviewed-on: http://fulliautomatix.ics.uci.edu:8443/165
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Till Westmann <westmann@gmail.com>
diff --git a/asterix-app/src/main/java/edu/uci/ics/asterix/api/common/APIFramework.java b/asterix-app/src/main/java/edu/uci/ics/asterix/api/common/APIFramework.java
index 0376e11..1ee9a3e 100644
--- a/asterix-app/src/main/java/edu/uci/ics/asterix/api/common/APIFramework.java
+++ b/asterix-app/src/main/java/edu/uci/ics/asterix/api/common/APIFramework.java
@@ -161,7 +161,8 @@
public enum OutputFormat {
ADM,
HTML,
- JSON
+ JSON,
+ CSV
}
public static Pair<Query, Integer> reWriteQuery(List<FunctionDecl> declaredFunctions,
@@ -362,6 +363,9 @@
case JSON:
builder.setPrinterProvider(format.getJSONPrinterFactoryProvider());
break;
+ case CSV:
+ builder.setPrinterProvider(format.getCSVPrinterFactoryProvider());
+ break;
default:
builder.setPrinterProvider(format.getPrinterFactoryProvider());
break;
diff --git a/asterix-app/src/main/java/edu/uci/ics/asterix/api/http/servlet/QueryResultAPIServlet.java b/asterix-app/src/main/java/edu/uci/ics/asterix/api/http/servlet/QueryResultAPIServlet.java
index e60ebe1..5ccbfc8 100644
--- a/asterix-app/src/main/java/edu/uci/ics/asterix/api/http/servlet/QueryResultAPIServlet.java
+++ b/asterix-app/src/main/java/edu/uci/ics/asterix/api/http/servlet/QueryResultAPIServlet.java
@@ -92,6 +92,9 @@
} else if (accept.contains("text/html")) {
format = APIFramework.OutputFormat.HTML;
response.setContentType("text/html");
+ } else if (accept.contains("text/csv")) {
+ format = APIFramework.OutputFormat.CSV;
+ response.setContentType("text/csv; header=present");
} else {
// JSON output is the default; most generally useful for a
// programmatic HTTP API
diff --git a/asterix-app/src/main/java/edu/uci/ics/asterix/api/http/servlet/RESTAPIServlet.java b/asterix-app/src/main/java/edu/uci/ics/asterix/api/http/servlet/RESTAPIServlet.java
index 6d66921..e783741 100644
--- a/asterix-app/src/main/java/edu/uci/ics/asterix/api/http/servlet/RESTAPIServlet.java
+++ b/asterix-app/src/main/java/edu/uci/ics/asterix/api/http/servlet/RESTAPIServlet.java
@@ -85,6 +85,9 @@
} else if (accept.contains("text/html")) {
format = OutputFormat.HTML;
response.setContentType("text/html");
+ } else if (accept.contains("text/csv")) {
+ format = OutputFormat.CSV;
+ response.setContentType("text/csv; header=present");
} else {
// JSON output is the default; most generally useful for a
// programmatic HTTP API
diff --git a/asterix-app/src/main/java/edu/uci/ics/asterix/aql/translator/AqlTranslator.java b/asterix-app/src/main/java/edu/uci/ics/asterix/aql/translator/AqlTranslator.java
index 5bac705..25dc341 100644
--- a/asterix-app/src/main/java/edu/uci/ics/asterix/aql/translator/AqlTranslator.java
+++ b/asterix-app/src/main/java/edu/uci/ics/asterix/aql/translator/AqlTranslator.java
@@ -496,19 +496,19 @@
dataverseName, mdTxnCtx);
String filterField = ((InternalDetailsDecl) dd.getDatasetDetailsDecl()).getFilterField();
if (compactionPolicy == null) {
- if (filterField != null) {
- // If the dataset has a filter and the user didn't specify a merge policy, then we will pick the
- // correlated-prefix as the default merge policy.
- compactionPolicy = GlobalConfig.DEFAULT_FILTERED_DATASET_COMPACTION_POLICY_NAME;
- compactionPolicyProperties = GlobalConfig.DEFAULT_COMPACTION_POLICY_PROPERTIES;
- } else {
- compactionPolicy = GlobalConfig.DEFAULT_COMPACTION_POLICY_NAME;
- compactionPolicyProperties = GlobalConfig.DEFAULT_COMPACTION_POLICY_PROPERTIES;
- }
- } else {
- validateCompactionPolicy(compactionPolicy,
- compactionPolicyProperties, mdTxnCtx, false);
- }
+ if (filterField != null) {
+ // If the dataset has a filter and the user didn't specify a merge policy, then we will pick the
+ // correlated-prefix as the default merge policy.
+ compactionPolicy = GlobalConfig.DEFAULT_FILTERED_DATASET_COMPACTION_POLICY_NAME;
+ compactionPolicyProperties = GlobalConfig.DEFAULT_COMPACTION_POLICY_PROPERTIES;
+ } else {
+ compactionPolicy = GlobalConfig.DEFAULT_COMPACTION_POLICY_NAME;
+ compactionPolicyProperties = GlobalConfig.DEFAULT_COMPACTION_POLICY_PROPERTIES;
+ }
+ } else {
+ validateCompactionPolicy(compactionPolicy,
+ compactionPolicyProperties, mdTxnCtx, false);
+ }
if (filterField != null) {
aRecordType.validateFilterField(filterField);
}
@@ -2058,6 +2058,9 @@
// In this case (the normal case), we don't use the
// "response" JSONObject - just stream the results
// to the "out" PrintWriter
+ if (pdf == OutputFormat.CSV) {
+ ResultUtils.displayCSVHeader(metadataProvider.findOutputRecordType(), out);
+ }
ResultUtils.displayResults(resultReader, out, pdf);
hcc.waitForCompletion(jobId);
diff --git a/asterix-app/src/main/java/edu/uci/ics/asterix/result/ResultUtils.java b/asterix-app/src/main/java/edu/uci/ics/asterix/result/ResultUtils.java
index 88d1e6d..1900d50 100644
--- a/asterix-app/src/main/java/edu/uci/ics/asterix/result/ResultUtils.java
+++ b/asterix-app/src/main/java/edu/uci/ics/asterix/result/ResultUtils.java
@@ -34,6 +34,7 @@
import edu.uci.ics.asterix.api.common.APIFramework;
import edu.uci.ics.asterix.api.http.servlet.APIServlet;
+import edu.uci.ics.asterix.om.types.ARecordType;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
@@ -60,21 +61,41 @@
return s;
}
+ public static void displayCSVHeader(ARecordType recordType, PrintWriter out) {
+ String[] fieldNames = recordType.getFieldNames();
+ boolean notfirst = false;
+ for (String name : fieldNames) {
+ if (notfirst) {
+ out.print(',');
+ }
+ notfirst = true;
+ out.print('"');
+ out.print(name.replace("\"", "\"\""));
+ out.print('"');
+ }
+ out.print("\r\n");
+ }
+
public static void displayResults(ResultReader resultReader, PrintWriter out, APIFramework.OutputFormat pdf)
throws HyracksDataException {
IFrameTupleAccessor fta = resultReader.getFrameTupleAccessor();
ByteBuffer buffer = ByteBuffer.allocate(ResultReader.FRAME_SIZE);
buffer.clear();
-
int bytesRead = resultReader.read(buffer);
ByteBufferInputStream bbis = new ByteBufferInputStream();
+
+ // Whether we need to separate top-level ADM instances with commas
boolean need_commas = true;
+ // Whether this is the first instance being output
boolean notfirst = false;
+
switch (pdf) {
case HTML:
out.println("<h4>Results:</h4>");
out.println("<pre>");
+ // Fall through
+ case CSV:
need_commas = false;
break;
case JSON:
@@ -85,6 +106,7 @@
out.print("[ ");
break;
}
+
if (bytesRead > 0) {
do {
try {
@@ -96,13 +118,21 @@
int length = fta.getTupleEndOffset(tIndex) - start;
bbis.setByteBuffer(buffer, start);
byte[] recordBytes = new byte[length];
- bbis.read(recordBytes, 0, length);
- result = new String(recordBytes, 0, length, UTF_8);
+ int numread = bbis.read(recordBytes, 0, length);
+ if (pdf == APIFramework.OutputFormat.CSV) {
+ if ( (numread > 0) && (recordBytes[numread-1] == '\n') ) {
+ numread--;
+ }
+ }
+ result = new String(recordBytes, 0, numread, UTF_8);
if (need_commas && notfirst) {
out.print(", ");
}
notfirst = true;
out.print(result);
+ if (pdf == APIFramework.OutputFormat.CSV) {
+ out.print("\r\n");
+ }
}
buffer.clear();
} finally {
@@ -125,6 +155,9 @@
case ADM:
out.println(" ]");
break;
+ case CSV:
+ // Nothing to do
+ break;
}
}
diff --git a/asterix-app/src/test/resources/runtimets/queries/csv/basic-types/basic-types.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/csv/basic-types/basic-types.1.ddl.aql
new file mode 100644
index 0000000..fafef26
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/csv/basic-types/basic-types.1.ddl.aql
@@ -0,0 +1,10 @@
+drop dataverse test if exists;
+create dataverse test;
+
+use dataverse test;
+
+create type "foo" as {
+ "id": int32,
+ "name": string,
+ "money": float
+};
diff --git a/asterix-app/src/test/resources/runtimets/queries/csv/basic-types/basic-types.2.query.aql b/asterix-app/src/test/resources/runtimets/queries/csv/basic-types/basic-types.2.query.aql
new file mode 100644
index 0000000..6ef20f2
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/csv/basic-types/basic-types.2.query.aql
@@ -0,0 +1,3 @@
+use dataverse "test";
+set outputRecordType "foo";
+{ "money": float("18.25"), "id": 12345, "name": "Chris"}
diff --git a/asterix-app/src/test/resources/runtimets/results/csv/basic-types/basic-types.1.csv b/asterix-app/src/test/resources/runtimets/results/csv/basic-types/basic-types.1.csv
new file mode 100644
index 0000000..941639e
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/csv/basic-types/basic-types.1.csv
@@ -0,0 +1,2 @@
+"id","name","money"
+12345,Chris,18.25
diff --git a/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterix-app/src/test/resources/runtimets/testsuite.xml
index 6fdef2a..c3b097a 100644
--- a/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -5523,6 +5523,13 @@
</compilation-unit>
</test-case>
</test-group>
+ <test-group name="csv">
+ <test-case FilePath="csv">
+ <compilation-unit name="basic-types">
+ <output-dir compare="CSV">basic-types</output-dir>
+ </compilation-unit>
+ </test-case>
+ </test-group>
<test-group name="binary">
<test-case FilePath="binary">
<compilation-unit name="parse">