Load parser from classpath in ParserFactoryProvider

Change the behavior of ParserFactoryProvider to allow it loads parser
from classpath.

Change-Id: I2ac039fe3daaf0636cf004289bd0c8a3229197a9
Reviewed-on: https://asterix-gerrit.ics.uci.edu/1416
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Till Westmann <tillw@apache.org>
diff --git a/asterixdb/asterix-external-data/pom.xml b/asterixdb/asterix-external-data/pom.xml
index 72c7997..3796a0d 100644
--- a/asterixdb/asterix-external-data/pom.xml
+++ b/asterixdb/asterix-external-data/pom.xml
@@ -106,6 +106,7 @@
             <include>**/NOTICE</include>
             <include>**/LICENSE</include>
             <include>**/DEPENDENCIES</include>
+            <include>**/services/**</include>
           </includes>
         </configuration>
         <executions>
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java
index 1fc97c9..3dd3903 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java
@@ -58,4 +58,6 @@
      * @param metaType
      */
     public void setMetaType(ARecordType metaType);
+
+    public String[] getFormats();
 }
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java
index efc9574..b0a1db2 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java
@@ -30,6 +30,7 @@
 public class ADMDataParserFactory extends AbstractRecordStreamParserFactory<char[]> {
 
     private static final long serialVersionUID = 1L;
+    private static String[] formats = { "adm", "json", "semi-structured" };
 
     @Override
     public IRecordDataParser<char[]> createRecordParser(IHyracksTaskContext ctx) {
@@ -56,4 +57,9 @@
     public void setMetaType(ARecordType metaType) {
     }
 
+    @Override
+    public String[] getFormats() {
+        return formats;
+    }
+
 }
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
index 786dac0..0c9fd34 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
@@ -36,6 +36,7 @@
 public class DelimitedDataParserFactory extends AbstractRecordStreamParserFactory<char[]> {
 
     private static final long serialVersionUID = 1L;
+    private static String[] formats = { "csv", "delimited-text" };
 
     @Override
     public IRecordDataParser<char[]> createRecordParser(IHyracksTaskContext ctx) throws HyracksDataException {
@@ -100,4 +101,9 @@
     public void setMetaType(ARecordType metaType) {
     }
 
+    @Override
+    public String[] getFormats() {
+        return formats;
+    }
+
 }
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java
index 15ecbfd..8914152 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java
@@ -35,6 +35,7 @@
 public class HiveDataParserFactory implements IRecordDataParserFactory<Writable> {
 
     private static final long serialVersionUID = 1L;
+    private static String[] formats = { "hive", "hive-parser"};
     private Map<String, String> configuration;
     private ARecordType recordType;
     private String hiveSerdeClassName;
@@ -72,4 +73,8 @@
     public void setMetaType(ARecordType metaType) {
     }
 
+    @Override public String[] getFormats() {
+        return formats;
+    }
+
 }
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java
index 7465455..25308f2 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java
@@ -31,6 +31,7 @@
 public class RSSParserFactory implements IRecordDataParserFactory<SyndEntryImpl> {
 
     private static final long serialVersionUID = 1L;
+    private static String[] formats = { "rss" };
     private ARecordType recordType;
 
     @Override
@@ -58,4 +59,9 @@
     public void setMetaType(ARecordType metaType) {
     }
 
+    @Override
+    public String[] getFormats() {
+        return formats;
+    }
+
 }
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RecordWithMetadataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RecordWithMetadataParserFactory.java
index 484fafb..06de407 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RecordWithMetadataParserFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RecordWithMetadataParserFactory.java
@@ -38,6 +38,7 @@
 public class RecordWithMetadataParserFactory<I, O> implements IRecordDataParserFactory<I> {
 
     private static final long serialVersionUID = 1L;
+    private static String[] formats = { "record-with-metadata" };
     private ARecordType metaType;
     private ARecordType recordType;
     private IRecordDataParserFactory<O> recordParserFactory;
@@ -82,6 +83,11 @@
     }
 
     @Override
+    public String[] getFormats() {
+        return formats;
+    }
+
+    @Override
     public Class<?> getRecordClass() {
         return converterFactory.getInputClass();
     }
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java
index 3539f6e..771f56a 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java
@@ -31,6 +31,7 @@
 public class TweetParserFactory implements IRecordDataParserFactory<String> {
 
     private static final long serialVersionUID = 1L;
+    private static String[] formats = { "twitter-status" };
     private ARecordType recordType;
 
     @Override
@@ -59,4 +60,9 @@
         // do nothing
     }
 
+    @Override
+    public String[] getFormats() {
+        return formats;
+    }
+
 }
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
index ebe3276..76235fa 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
@@ -18,22 +18,27 @@
  */
 package org.apache.asterix.external.provider;
 
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.asterix.common.exceptions.AsterixException;
 import org.apache.asterix.common.library.ILibraryManager;
 import org.apache.asterix.external.api.IDataParserFactory;
-import org.apache.asterix.external.parser.factory.ADMDataParserFactory;
-import org.apache.asterix.external.parser.factory.DelimitedDataParserFactory;
-import org.apache.asterix.external.parser.factory.HiveDataParserFactory;
-import org.apache.asterix.external.parser.factory.RSSParserFactory;
-import org.apache.asterix.external.parser.factory.RecordWithMetadataParserFactory;
-import org.apache.asterix.external.parser.factory.TweetParserFactory;
 import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.ExternalDataUtils;
+import org.apache.commons.io.IOUtils;
 
 public class ParserFactoryProvider {
 
+    private static final String RESOURCE = "META-INF/services/org.apache.asterix.external.api.IDataParserFactory";
+    private static Map<String, Class> factories = null;
+
     private ParserFactoryProvider() {
     }
 
@@ -54,32 +59,59 @@
         return parserFactory;
     }
 
+    protected static IDataParserFactory getInstance(Class clazz) throws AsterixException {
+        try {
+            return (IDataParserFactory) clazz.newInstance();
+        } catch (IllegalAccessException | InstantiationException | ClassCastException e) {
+            throw new AsterixException("Cannot create: " + clazz.getSimpleName(), e);
+        }
+    }
+
     @SuppressWarnings("rawtypes")
     public static IDataParserFactory getDataParserFactory(String parser) throws AsterixException {
-        switch (parser) {
-            case ExternalDataConstants.FORMAT_ADM:
-            case ExternalDataConstants.FORMAT_JSON:
-            case ExternalDataConstants.FORMAT_SEMISTRUCTURED:
-                return new ADMDataParserFactory();
-            case ExternalDataConstants.FORMAT_DELIMITED_TEXT:
-            case ExternalDataConstants.FORMAT_CSV:
-                return new DelimitedDataParserFactory();
-            case ExternalDataConstants.FORMAT_HIVE:
-            case ExternalDataConstants.PARSER_HIVE:
-                return new HiveDataParserFactory();
-            case ExternalDataConstants.FORMAT_TWEET:
-                return new TweetParserFactory();
-            case ExternalDataConstants.FORMAT_RSS:
-                return new RSSParserFactory();
-            case ExternalDataConstants.FORMAT_RECORD_WITH_METADATA:
-                return new RecordWithMetadataParserFactory();
-            default:
-                try {
-                    return (IDataParserFactory) Class.forName(parser).newInstance();
-                } catch (IllegalAccessException | ClassNotFoundException | InstantiationException
-                        | ClassCastException e) {
-                    throw new AsterixException("Unknown format: " + parser, e);
-                }
+
+        if (factories == null) {
+            factories = initFactories();
         }
+
+        if (factories.containsKey(parser)) {
+            return getInstance(factories.get(parser));
+        }
+
+        try {
+            // ideally, this should not happen
+            return (IDataParserFactory) Class.forName(parser).newInstance();
+        } catch (IllegalAccessException | ClassNotFoundException | InstantiationException | ClassCastException e) {
+            throw new AsterixException("Unknown format: " + parser, e);
+        }
+    }
+
+    protected static Map<String, Class> initFactories() throws AsterixException {
+        Map<String, Class> factories = new HashMap<>();
+        ClassLoader cl = ParserFactoryProvider.class.getClassLoader();
+        final Charset encoding = Charset.forName("UTF-8");
+        try {
+            Enumeration<URL> urls = cl.getResources(RESOURCE);
+            for (URL url : Collections.list(urls)) {
+                InputStream is = url.openStream();
+                String config = IOUtils.toString(is, encoding);
+                is.close();
+                String[] classNames = config.split("\n");
+                for (String className : classNames) {
+                    final Class<?> clazz = Class.forName(className);
+                    String[] formats = ((IDataParserFactory) clazz.newInstance()).getFormats();
+                    for (String format : formats) {
+                        if (factories.containsKey(format)) {
+                            throw new AsterixException("Duplicate format " + format);
+                        }
+                        factories.put(format, clazz);
+                    }
+                }
+            }
+        } catch (IOException | ClassNotFoundException | InstantiationException
+                | IllegalAccessException e) {
+            throw new AsterixException(e);
+        }
+        return factories;
     }
 }
diff --git a/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory b/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory
new file mode 100644
index 0000000..840f619
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory
@@ -0,0 +1,6 @@
+org.apache.asterix.external.parser.factory.ADMDataParserFactory
+org.apache.asterix.external.parser.factory.DelimitedDataParserFactory
+org.apache.asterix.external.parser.factory.HiveDataParserFactory
+org.apache.asterix.external.parser.factory.RecordWithMetadataParserFactory
+org.apache.asterix.external.parser.factory.RSSParserFactory
+org.apache.asterix.external.parser.factory.TweetParserFactory
\ No newline at end of file
diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParserFactory.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParserFactory.java
index 17b83c2..d8cc3bb 100644
--- a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParserFactory.java
+++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParserFactory.java
@@ -34,6 +34,7 @@
 public class ClassAdParserFactory implements IRecordDataParserFactory<char[]> {
 
     private static final long serialVersionUID = 1L;
+    private static final String[] formats = { "line-separated" };
     public static final String KEY_OLD_FORMAT = "old-format";
     public static final String KEY_EVALUATE = "evaluate";
     public static final String KEY_KEEP_EXPR = "keep-expr";
@@ -121,4 +122,9 @@
     public void setMetaType(ARecordType metaType) {
     }
 
+    @Override
+    public String[] getFormats() {
+        return formats;
+    }
+
 }
diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/factory/TestRecordWithPKParserFactory.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/factory/TestRecordWithPKParserFactory.java
index 5b23094..91919d1 100644
--- a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/factory/TestRecordWithPKParserFactory.java
+++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/factory/TestRecordWithPKParserFactory.java
@@ -18,6 +18,7 @@
  */
 package org.apache.asterix.external.parser.factory;
 
+import java.util.ArrayList;
 import java.util.Map;
 import java.util.TreeMap;
 
@@ -37,6 +38,7 @@
 public class TestRecordWithPKParserFactory<T> implements IRecordDataParserFactory<RecordWithPK<T>> {
 
     private static final long serialVersionUID = 1L;
+    private static final ArrayList<String> formats = new ArrayList<>();
     private ARecordType recordType;
     private IRecordDataParserFactory<char[]> recordParserFactory;
     private String format;
@@ -49,6 +51,7 @@
     public void configure(Map<String, String> configuration) throws AsterixException {
         TreeMap<String, String> parserConf = new TreeMap<String, String>();
         format = configuration.get(ExternalDataConstants.KEY_RECORD_FORMAT);
+        formats.add(format);
         parserConf.put(ExternalDataConstants.KEY_FORMAT, format);
         recordParserFactory =
                 (IRecordDataParserFactory<char[]>) ParserFactoryProvider.getDataParserFactory(null, parserConf);
@@ -75,4 +78,10 @@
     @Override
     public void setMetaType(ARecordType metaType) {
     }
+
+    @Override
+    public String[] getFormats() {
+        return (String[]) formats.toArray();
+    }
+
 }
diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ParserFactoryProviderLoadParserTest.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ParserFactoryProviderLoadParserTest.java
new file mode 100644
index 0000000..effb7cd
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ParserFactoryProviderLoadParserTest.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.parser.test;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IDataParserFactory;
+import org.apache.asterix.external.parser.factory.ADMDataParserFactory;
+import org.apache.asterix.external.parser.factory.DelimitedDataParserFactory;
+import org.apache.asterix.external.parser.factory.HiveDataParserFactory;
+import org.apache.asterix.external.parser.factory.RSSParserFactory;
+import org.apache.asterix.external.parser.factory.TweetParserFactory;
+import org.apache.asterix.external.provider.ParserFactoryProvider;
+import org.junit.Assert;
+import org.junit.Test;
+
+
+public class ParserFactoryProviderLoadParserTest {
+
+    IDataParserFactory factory;
+    @Test
+    public void test() throws AsterixException {
+        boolean result = true;
+        factory = ParserFactoryProvider.getDataParserFactory("csv");
+        result = result && factory instanceof DelimitedDataParserFactory;
+        factory = ParserFactoryProvider.getDataParserFactory("adm");
+        result = result && factory instanceof ADMDataParserFactory;
+        factory = ParserFactoryProvider.getDataParserFactory("rss");
+        result = result && factory instanceof RSSParserFactory;
+        factory = ParserFactoryProvider.getDataParserFactory("hive");
+        result = result && factory instanceof HiveDataParserFactory;
+        factory = ParserFactoryProvider.getDataParserFactory("twitter-status");
+        result = result && factory instanceof TweetParserFactory;
+        Assert.assertTrue(result);
+    }
+}