[NO ISSUE][HYR][HTTP] Include 'Permanent' response header on unknown servlet 404

- also, fix processing order of specific over wildcard for
  non-primary servlet paths

Change-Id: I9b838abc3b8af327b3597bf75039550147f6ee61
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/5143
Contrib: Michael Blow <mblow@apache.org>
Reviewed-by: Michael Blow <mblow@apache.org>
Reviewed-by: Till Westmann <tillw@apache.org>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/HttpServer.java b/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/HttpServer.java
index 2a7b47e..4cae09f 100644
--- a/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/HttpServer.java
+++ b/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/HttpServer.java
@@ -19,9 +19,7 @@
 package org.apache.hyracks.http.server;
 
 import java.net.InetSocketAddress;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
+import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.LinkedBlockingQueue;
@@ -71,7 +69,7 @@
     private final AtomicInteger threadId = new AtomicInteger();
     private final ConcurrentMap<String, Object> ctx;
     private final LinkedBlockingQueue<Runnable> workQueue;
-    private final List<IServlet> servlets;
+    private final ServletRegistry servlets;
     private final EventLoopGroup bossGroup;
     private final EventLoopGroup workerGroup;
     private final InetSocketAddress address;
@@ -100,7 +98,7 @@
         this.closedHandler = closeHandler;
         this.config = config;
         ctx = new ConcurrentHashMap<>();
-        servlets = new ArrayList<>();
+        servlets = new ServletRegistry();
         workQueue = new LinkedBlockingQueue<>(config.getRequestQueueSize());
         int numExecutorThreads = config.getThreadCount();
         executor = new ThreadPoolExecutor(numExecutorThreads, numExecutorThreads, 0L, TimeUnit.MILLISECONDS, workQueue,
@@ -219,21 +217,14 @@
     }
 
     public void addServlet(IServlet let) {
-        servlets.add(let);
+        servlets.register(let);
+    }
+
+    public Set<IServlet> getServlets() {
+        return servlets.getServlets();
     }
 
     protected void doStart() throws InterruptedException {
-        /*
-         * This is a hacky way to ensure that IServlets with more specific paths are checked first.
-         * For example:
-         * "/path/to/resource/"
-         * is checked before
-         * "/path/to/"
-         * which in turn is checked before
-         * "/path/"
-         * Note that it doesn't work for the case where multiple paths map to a single IServlet
-         */
-        Collections.sort(servlets, (l1, l2) -> l2.getPaths()[0].length() - l1.getPaths()[0].length());
         channel = bind();
     }
 
@@ -341,44 +332,7 @@
     }
 
     public IServlet getServlet(FullHttpRequest request) {
-        String uri = request.uri();
-        int i = uri.indexOf('?');
-        if (i >= 0) {
-            uri = uri.substring(0, i);
-        }
-        for (IServlet servlet : servlets) {
-            for (String path : servlet.getPaths()) {
-                if (match(path, uri)) {
-                    return servlet;
-                }
-            }
-        }
-        return null;
-    }
-
-    static boolean match(String pathSpec, String path) {
-        char c = pathSpec.charAt(0);
-        if (c == '/') {
-            if (pathSpec.equals(path) || (pathSpec.length() == 1 && path.isEmpty())) {
-                return true;
-            }
-            if (isPathWildcardMatch(pathSpec, path)) {
-                return true;
-            }
-        } else if (c == '*') {
-            return path.regionMatches(path.length() - pathSpec.length() + 1, pathSpec, 1, pathSpec.length() - 1);
-        }
-        return false;
-    }
-
-    static boolean isPathWildcardMatch(String pathSpec, String path) {
-        final int length = pathSpec.length();
-        if (length < 2) {
-            return false;
-        }
-        final int cpl = length - 2;
-        final boolean b = pathSpec.endsWith("/*") && path.regionMatches(0, pathSpec, 0, cpl);
-        return b && (path.length() == cpl || '/' == path.charAt(cpl));
+        return servlets.getServlet(request.uri());
     }
 
     protected HttpServerHandler<? extends HttpServer> createHttpHandler(int chunkSize) {
diff --git a/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/HttpServerHandler.java b/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/HttpServerHandler.java
index 4882572..0f4ce8a 100644
--- a/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/HttpServerHandler.java
+++ b/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/HttpServerHandler.java
@@ -23,6 +23,7 @@
 import java.io.IOException;
 import java.util.concurrent.Future;
 import java.util.concurrent.RejectedExecutionException;
+import java.util.function.Consumer;
 
 import org.apache.hyracks.http.api.IChannelClosedHandler;
 import org.apache.hyracks.http.api.IServlet;
@@ -43,6 +44,7 @@
 import io.netty.handler.codec.http.HttpHeaderNames;
 import io.netty.handler.codec.http.HttpHeaderValues;
 import io.netty.handler.codec.http.HttpRequest;
+import io.netty.handler.codec.http.HttpResponse;
 import io.netty.handler.codec.http.HttpResponseStatus;
 import io.netty.handler.codec.http.HttpScheme;
 
@@ -119,11 +121,19 @@
     }
 
     protected void respond(ChannelHandlerContext ctx, HttpRequest request, HttpResponseStatus status) {
+        respond(ctx, request, status, null);
+    }
+
+    protected void respond(ChannelHandlerContext ctx, HttpRequest request, HttpResponseStatus status,
+            Consumer<HttpResponse> beforeWrite) {
         final DefaultHttpResponse response = new DefaultFullHttpResponse(request.protocolVersion(), status);
         response.headers().setInt(HttpHeaderNames.CONTENT_LENGTH, 0);
         HttpUtil.setConnectionHeader(request, response);
         final ChannelPromise responseCompletionPromise = ctx.newPromise();
         responseCompletionPromise.addListener(this);
+        if (beforeWrite != null) {
+            beforeWrite.accept(response);
+        }
         final ChannelFuture clientChannel = ctx.writeAndFlush(response, responseCompletionPromise);
         if (!io.netty.handler.codec.http.HttpUtil.isKeepAlive(request)) {
             clientChannel.addListener(ChannelFutureListener.CLOSE);
@@ -160,7 +170,8 @@
         if (LOGGER.isDebugEnabled()) {
             LOGGER.debug("No servlet for " + request.uri());
         }
-        respond(ctx, request, HttpResponseStatus.NOT_FOUND);
+        respond(ctx, request, HttpResponseStatus.NOT_FOUND,
+                response -> response.headers().set(HttpUtil.PERMANENT, "true"));
     }
 
     @Override
diff --git a/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/ServletRegistry.java b/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/ServletRegistry.java
new file mode 100644
index 0000000..083c0ff
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/ServletRegistry.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.http.server;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.apache.hyracks.http.api.IServlet;
+import org.apache.hyracks.http.server.utils.HttpUtil;
+import org.apache.hyracks.util.annotations.NotThreadSafe;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+@NotThreadSafe // thread safe for concurrent reads, but concurrent registrations are not supported
+public class ServletRegistry {
+    private static final Logger LOGGER = LogManager.getLogger();
+
+    private final SortedMap<String, IServlet> servletMap = new TreeMap<>(Comparator.reverseOrder());
+    private final Set<IServlet> servlets = new HashSet<>();
+
+    public void register(IServlet let) {
+        servlets.add(let);
+        for (String path : let.getPaths()) {
+            LOGGER.debug("registering servlet {}[{}] with path {}", let, let.getClass().getName(), path);
+            IServlet prev = servletMap.put(path, let);
+            if (prev != null) {
+                throw new IllegalStateException("duplicate servlet mapping! (path = " + path + ", orig = " + prev + "["
+                        + prev.getClass().getName() + "], dup = " + let + "[" + let.getClass().getName() + "])");
+            }
+        }
+    }
+
+    public Set<IServlet> getServlets() {
+        return Collections.unmodifiableSet(servlets);
+    }
+
+    public IServlet getServlet(String uri) {
+        String baseUri = HttpUtil.trimQuery(uri);
+        return servletMap.entrySet().stream().filter(entry -> match(entry.getKey(), baseUri)).map(Map.Entry::getValue)
+                .findFirst().orElse(null);
+    }
+
+    static boolean match(String pathSpec, String path) {
+        char c = pathSpec.charAt(0);
+        if (c == '/') {
+            if (pathSpec.equals(path) || (pathSpec.length() == 1 && path.isEmpty())) {
+                return true;
+            }
+            return isPathWildcardMatch(pathSpec, path);
+        } else if (c == '*') {
+            return path.regionMatches(path.length() - pathSpec.length() + 1, pathSpec, 1, pathSpec.length() - 1);
+        }
+        return false;
+    }
+
+    static boolean isPathWildcardMatch(String pathSpec, String path) {
+        final int length = pathSpec.length();
+        if (length < 2) {
+            return false;
+        }
+        final int cpl = length - 2;
+        final boolean b = pathSpec.endsWith("/*") && path.regionMatches(0, pathSpec, 0, cpl);
+        return b && (path.length() == cpl || '/' == path.charAt(cpl));
+    }
+
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/utils/HttpUtil.java b/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/utils/HttpUtil.java
index 591cf8c..8451898 100644
--- a/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/utils/HttpUtil.java
+++ b/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/utils/HttpUtil.java
@@ -53,32 +53,11 @@
     private static final Charset DEFAULT_RESPONSE_CHARSET = StandardCharsets.UTF_8;
 
     public static final AsciiString X_FORWARDED_PROTO = AsciiString.cached("x-forwarded-proto");
+    public static final AsciiString PERMANENT = AsciiString.cached("permanent");
 
     private HttpUtil() {
     }
 
-    public static class Encoding {
-        public static final String UTF8 = "utf-8";
-
-        private Encoding() {
-        }
-    }
-
-    public static class ContentType {
-        public static final String APPLICATION_ADM = "application/x-adm";
-        public static final String APPLICATION_JSON = "application/json";
-        public static final String JSON = "json";
-        public static final String APPLICATION_X_WWW_FORM_URLENCODED = "application/x-www-form-urlencoded";
-        public static final String CSV = "csv";
-        public static final String TEXT_CSV = "text/csv";
-        public static final String IMG_PNG = "image/png";
-        public static final String TEXT_HTML = "text/html";
-        public static final String TEXT_PLAIN = "text/plain";
-
-        private ContentType() {
-        }
-    }
-
     public static String getParameter(Map<String, List<String>> parameters, CharSequence name) {
         List<String> parameter = parameters.get(String.valueOf(name));
         return parameter == null ? null : String.join(",", parameter);
@@ -200,6 +179,33 @@
         return preferredCharset.orElse(defaultCharset);
     }
 
+    public static String trimQuery(String uri) {
+        int i = uri.indexOf('?');
+        return i < 0 ? uri : uri.substring(0, i);
+    }
+
+    public static class Encoding {
+        public static final String UTF8 = "utf-8";
+
+        private Encoding() {
+        }
+    }
+
+    public static class ContentType {
+        public static final String APPLICATION_ADM = "application/x-adm";
+        public static final String APPLICATION_JSON = "application/json";
+        public static final String JSON = "json";
+        public static final String APPLICATION_X_WWW_FORM_URLENCODED = "application/x-www-form-urlencoded";
+        public static final String CSV = "csv";
+        public static final String TEXT_CSV = "text/csv";
+        public static final String IMG_PNG = "image/png";
+        public static final String TEXT_HTML = "text/html";
+        public static final String TEXT_PLAIN = "text/plain";
+
+        private ContentType() {
+        }
+    }
+
     private static class WeightedHeaderValue implements Comparable<WeightedHeaderValue> {
 
         final String value;
diff --git a/hyracks-fullstack/hyracks/hyracks-http/src/test/java/org/apache/hyracks/http/server/PathMatchTest.java b/hyracks-fullstack/hyracks/hyracks-http/src/test/java/org/apache/hyracks/http/server/PathMatchTest.java
index 89260c5..9f9e174 100644
--- a/hyracks-fullstack/hyracks/hyracks-http/src/test/java/org/apache/hyracks/http/server/PathMatchTest.java
+++ b/hyracks-fullstack/hyracks/hyracks-http/src/test/java/org/apache/hyracks/http/server/PathMatchTest.java
@@ -24,18 +24,18 @@
 public class PathMatchTest {
     @Test
     public void test() {
-        Assert.assertTrue(HttpServer.match("/", ""));
+        Assert.assertTrue(ServletRegistry.match("/", ""));
 
-        Assert.assertTrue(HttpServer.match("/", "/"));
-        Assert.assertFalse(HttpServer.match("/", "/a"));
+        Assert.assertTrue(ServletRegistry.match("/", "/"));
+        Assert.assertFalse(ServletRegistry.match("/", "/a"));
 
-        Assert.assertFalse(HttpServer.match("/a", "/"));
-        Assert.assertTrue(HttpServer.match("/a", "/a"));
+        Assert.assertFalse(ServletRegistry.match("/a", "/"));
+        Assert.assertTrue(ServletRegistry.match("/a", "/a"));
 
-        Assert.assertTrue(HttpServer.match("/*", "/"));
-        Assert.assertTrue(HttpServer.match("/*", "/a"));
+        Assert.assertTrue(ServletRegistry.match("/*", "/"));
+        Assert.assertTrue(ServletRegistry.match("/*", "/a"));
 
-        Assert.assertFalse(HttpServer.match("/a/*", "/"));
-        Assert.assertTrue(HttpServer.match("/a/*", "/a"));
+        Assert.assertFalse(ServletRegistry.match("/a/*", "/"));
+        Assert.assertTrue(ServletRegistry.match("/a/*", "/a"));
     }
 }