[NO ISSUE][HTTP] Character encoding fixes
- Support alternate charset for application/x-www-form-urlencoded requests
- Use a random charset in TestExecutor for each query request
Change-Id: I4a982f7c6c34bb32652c1bdd9b546780a2d967d0
Reviewed-on: https://asterix-gerrit.ics.uci.edu/3265
Sonar-Qube: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/common/TestExecutor.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/common/TestExecutor.java
index 4129b5a..4d9ceeb 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/common/TestExecutor.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/common/TestExecutor.java
@@ -39,16 +39,18 @@
import java.net.URISyntaxException;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
+import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Optional;
+import java.util.Queue;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
@@ -143,6 +145,7 @@
public static final int TRUNCATE_THRESHOLD = 16384;
public static final Set<String> NON_CANCELLABLE =
Collections.unmodifiableSet(new HashSet<>(Arrays.asList("store", "validate")));
+ private static final int MAX_NON_UTF_8_STATEMENT_SIZE = 64 * 1024;
private final IPollTask plainExecutor = this::executeTestFile;
@@ -156,7 +159,8 @@
private static Map<String, InetSocketAddress> ncEndPoints;
private static Map<String, InetSocketAddress> replicationAddress;
- private static final List<Charset> charsetsRemaining = new ArrayList<>();
+ private final List<Charset> allCharsets;
+ private final Queue<Charset> charsetsRemaining = new ArrayDeque<>();
/*
* Instance members
@@ -181,6 +185,10 @@
public TestExecutor(List<InetSocketAddress> endpoints) {
this.endpoints = endpoints;
+ this.allCharsets = Stream
+ .of("UTF-8", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE", "x-UTF-32BE-BOM",
+ "x-UTF-32LE-BOM", "x-UTF-16LE-BOM")
+ .filter(Charset::isSupported).map(Charset::forName).collect(Collectors.toList());
}
public void setLibrarian(IExternalUDFLibrarian librarian) {
@@ -612,33 +620,20 @@
return response.getEntity().getContent();
}
- private Charset selectCharset(File result) throws IOException {
- // choose an encoding that works for this input
- return selectCharset(FileUtils.readFileToString(result, UTF_8));
+ public synchronized void setAvailableCharsets(Charset... charsets) {
+ allCharsets.clear();
+ allCharsets.addAll(Arrays.asList(charsets));
+ charsetsRemaining.clear();
}
- private Charset selectCharset(String payload) {
- // choose an encoding that works for this input
- return nextCharset(charset -> canEncodeDecode(charset, payload));
- }
-
- public static Charset nextCharset(Predicate<Charset> test) {
- synchronized (charsetsRemaining) {
- while (true) {
- for (Iterator<Charset> iter = charsetsRemaining.iterator(); iter.hasNext();) {
- Charset next = iter.next();
- if (test.test(next)) {
- iter.remove();
- return next;
- }
- }
- List<Charset> allCharsets = Stream
- .of("UTF-8", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE",
- "x-UTF-32BE-BOM", "x-UTF-32LE-BOM", "x-UTF-16LE-BOM")
- .filter(Charset::isSupported).map(Charset::forName).collect(Collectors.toList());
- Collections.shuffle(allCharsets);
- charsetsRemaining.addAll(allCharsets);
+ private synchronized Charset nextCharset() {
+ while (true) {
+ Charset nextCharset = charsetsRemaining.poll();
+ if (nextCharset != null) {
+ return nextCharset;
}
+ Collections.shuffle(allCharsets);
+ charsetsRemaining.addAll(allCharsets);
}
}
@@ -739,12 +734,12 @@
for (Parameter param : upsertParam(otherParams, stmtParam, ParameterTypeEnum.STRING, statement)) {
builder.addParameter(param.getName(), param.getValue());
}
- builder.addParameter(stmtParam, statement);
+ builder.setCharset(statement.length() > MAX_NON_UTF_8_STATEMENT_SIZE ? UTF_8 : nextCharset());
} else {
// this seems pretty bad - we should probably fix the API and not the client
- builder.setEntity(new StringEntity(statement, UTF_8));
+ builder.setEntity(new StringEntity(statement,
+ statement.length() > MAX_NON_UTF_8_STATEMENT_SIZE ? UTF_8 : nextCharset()));
}
- builder.setCharset(UTF_8);
return builder.build();
}
@@ -775,11 +770,12 @@
}
}
try {
- builder.setEntity(new StringEntity(om.writeValueAsString(content), ContentType.APPLICATION_JSON));
+ builder.setEntity(new StringEntity(om.writeValueAsString(content),
+ ContentType.create(ContentType.APPLICATION_JSON.getMimeType(),
+ statement.length() > MAX_NON_UTF_8_STATEMENT_SIZE ? UTF_8 : nextCharset())));
} catch (JsonProcessingException e) {
e.printStackTrace();
}
- builder.setCharset(UTF_8);
return builder.build();
}
@@ -1253,7 +1249,7 @@
URI uri = testFile.getName().endsWith("aql") ? getEndpoint(Servlets.QUERY_AQL)
: getEndpoint(Servlets.QUERY_SERVICE);
boolean isJsonEncoded = isJsonEncoded(extractHttpRequestType(statement));
- Charset responseCharset = expectedResultFile == null ? UTF_8 : selectCharset(expectedResultFile);
+ Charset responseCharset = expectedResultFile == null ? UTF_8 : nextCharset();
InputStream resultStream;
if (DELIVERY_IMMEDIATE.equals(delivery)) {
resultStream = executeQueryService(statement, fmt, uri, params, isJsonEncoded, responseCharset, null,
diff --git a/hyracks-fullstack/hyracks/hyracks-http/pom.xml b/hyracks-fullstack/hyracks/hyracks-http/pom.xml
index 46e2004..9bfbfc2 100644
--- a/hyracks-fullstack/hyracks/hyracks-http/pom.xml
+++ b/hyracks-fullstack/hyracks/hyracks-http/pom.xml
@@ -54,12 +54,10 @@
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
- <scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
- <scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
diff --git a/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/FormUrlEncodedRequest.java b/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/FormUrlEncodedRequest.java
index 4609967..05a7e5e 100644
--- a/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/FormUrlEncodedRequest.java
+++ b/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/FormUrlEncodedRequest.java
@@ -18,82 +18,32 @@
*/
package org.apache.hyracks.http.server;
-import java.io.IOException;
+import java.nio.charset.Charset;
import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
-import java.util.Set;
+import org.apache.http.client.utils.URLEncodedUtils;
import org.apache.hyracks.http.api.IServletRequest;
import org.apache.hyracks.http.server.utils.HttpUtil;
import io.netty.handler.codec.http.FullHttpRequest;
import io.netty.handler.codec.http.QueryStringDecoder;
-import io.netty.handler.codec.http.multipart.Attribute;
-import io.netty.handler.codec.http.multipart.HttpPostRequestDecoder;
-import io.netty.handler.codec.http.multipart.InterfaceHttpData;
-import io.netty.handler.codec.http.multipart.MixedAttribute;
public class FormUrlEncodedRequest extends BaseRequest implements IServletRequest {
- private final List<String> names;
- private final List<String> values;
-
- public static IServletRequest create(FullHttpRequest request) throws IOException {
- List<String> names = new ArrayList<>();
- List<String> values = new ArrayList<>();
- HttpPostRequestDecoder decoder = new HttpPostRequestDecoder(request);
- try {
- List<InterfaceHttpData> bodyHttpDatas = decoder.getBodyHttpDatas();
- for (InterfaceHttpData data : bodyHttpDatas) {
- if (data.getHttpDataType().equals(InterfaceHttpData.HttpDataType.Attribute)) {
- Attribute attr = (MixedAttribute) data;
- names.add(data.getName());
- values.add(attr.getValue());
- }
- }
- } finally {
- decoder.destroy();
- }
- return new FormUrlEncodedRequest(request, new QueryStringDecoder(request.uri()).parameters(), names, values);
+ public static IServletRequest create(FullHttpRequest request) {
+ Charset charset = HttpUtil.getRequestCharset(request);
+ Map<String, List<String>> parameters = new LinkedHashMap<>();
+ URLEncodedUtils.parse(request.content().toString(charset), charset).forEach(
+ pair -> parameters.computeIfAbsent(pair.getName(), a -> new ArrayList<>()).add(pair.getValue()));
+ new QueryStringDecoder(request.uri()).parameters()
+ .forEach((name, value) -> parameters.computeIfAbsent(name, a -> new ArrayList<>()).addAll(value));
+ return new FormUrlEncodedRequest(request, parameters);
}
- protected FormUrlEncodedRequest(FullHttpRequest request, Map<String, List<String>> parameters, List<String> names,
- List<String> values) {
+ private FormUrlEncodedRequest(FullHttpRequest request, Map<String, List<String>> parameters) {
super(request, parameters);
- this.names = names;
- this.values = values;
- }
-
- @Override
- public String getParameter(CharSequence name) {
- for (int i = 0; i < names.size(); i++) {
- if (name.equals(names.get(i))) {
- return values.get(i);
- }
- }
- return HttpUtil.getParameter(parameters, name);
- }
-
- @Override
- public Set<String> getParameterNames() {
- HashSet<String> paramNames = new HashSet<>();
- paramNames.addAll(parameters.keySet());
- paramNames.addAll(names);
- return Collections.unmodifiableSet(paramNames);
- }
-
- @Override
- public Map<String, String> getParameters() {
- HashMap<String, String> paramMap = new HashMap<>();
- paramMap.putAll(super.getParameters());
- for (int i = 0; i < names.size(); i++) {
- paramMap.put(names.get(i), values.get(i));
- }
-
- return Collections.unmodifiableMap(paramMap);
}
}
diff --git a/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/utils/HttpUtil.java b/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/utils/HttpUtil.java
index 6e4a273..5326019 100644
--- a/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/utils/HttpUtil.java
+++ b/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/utils/HttpUtil.java
@@ -73,17 +73,7 @@
public static String getParameter(Map<String, List<String>> parameters, CharSequence name) {
List<String> parameter = parameters.get(String.valueOf(name));
- if (parameter == null) {
- return null;
- } else if (parameter.size() == 1) {
- return parameter.get(0);
- } else {
- StringBuilder aString = new StringBuilder(parameter.get(0));
- for (int i = 1; i < parameter.size(); i++) {
- aString.append(",").append(parameter.get(i));
- }
- return aString.toString();
- }
+ return parameter == null ? null : String.join(",", parameter);
}
public static IServletRequest toServletRequest(FullHttpRequest request) throws IOException {