Load parser from classpath in ParserFactoryProvider
Change the behavior of ParserFactoryProvider to allow it loads parser
from classpath.
Change-Id: I2ac039fe3daaf0636cf004289bd0c8a3229197a9
Reviewed-on: https://asterix-gerrit.ics.uci.edu/1416
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Till Westmann <tillw@apache.org>
diff --git a/asterixdb/asterix-external-data/pom.xml b/asterixdb/asterix-external-data/pom.xml
index 72c7997..3796a0d 100644
--- a/asterixdb/asterix-external-data/pom.xml
+++ b/asterixdb/asterix-external-data/pom.xml
@@ -106,6 +106,7 @@
<include>**/NOTICE</include>
<include>**/LICENSE</include>
<include>**/DEPENDENCIES</include>
+ <include>**/services/**</include>
</includes>
</configuration>
<executions>
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java
index 1fc97c9..3dd3903 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java
@@ -58,4 +58,6 @@
* @param metaType
*/
public void setMetaType(ARecordType metaType);
+
+ public String[] getFormats();
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java
index efc9574..b0a1db2 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java
@@ -30,6 +30,7 @@
public class ADMDataParserFactory extends AbstractRecordStreamParserFactory<char[]> {
private static final long serialVersionUID = 1L;
+ private static String[] formats = { "adm", "json", "semi-structured" };
@Override
public IRecordDataParser<char[]> createRecordParser(IHyracksTaskContext ctx) {
@@ -56,4 +57,9 @@
public void setMetaType(ARecordType metaType) {
}
+ @Override
+ public String[] getFormats() {
+ return formats;
+ }
+
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
index 786dac0..0c9fd34 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
@@ -36,6 +36,7 @@
public class DelimitedDataParserFactory extends AbstractRecordStreamParserFactory<char[]> {
private static final long serialVersionUID = 1L;
+ private static String[] formats = { "csv", "delimited-text" };
@Override
public IRecordDataParser<char[]> createRecordParser(IHyracksTaskContext ctx) throws HyracksDataException {
@@ -100,4 +101,9 @@
public void setMetaType(ARecordType metaType) {
}
+ @Override
+ public String[] getFormats() {
+ return formats;
+ }
+
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java
index 15ecbfd..8914152 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java
@@ -35,6 +35,7 @@
public class HiveDataParserFactory implements IRecordDataParserFactory<Writable> {
private static final long serialVersionUID = 1L;
+ private static String[] formats = { "hive", "hive-parser"};
private Map<String, String> configuration;
private ARecordType recordType;
private String hiveSerdeClassName;
@@ -72,4 +73,8 @@
public void setMetaType(ARecordType metaType) {
}
+ @Override public String[] getFormats() {
+ return formats;
+ }
+
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java
index 7465455..25308f2 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java
@@ -31,6 +31,7 @@
public class RSSParserFactory implements IRecordDataParserFactory<SyndEntryImpl> {
private static final long serialVersionUID = 1L;
+ private static String[] formats = { "rss" };
private ARecordType recordType;
@Override
@@ -58,4 +59,9 @@
public void setMetaType(ARecordType metaType) {
}
+ @Override
+ public String[] getFormats() {
+ return formats;
+ }
+
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RecordWithMetadataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RecordWithMetadataParserFactory.java
index 484fafb..06de407 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RecordWithMetadataParserFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RecordWithMetadataParserFactory.java
@@ -38,6 +38,7 @@
public class RecordWithMetadataParserFactory<I, O> implements IRecordDataParserFactory<I> {
private static final long serialVersionUID = 1L;
+ private static String[] formats = { "record-with-metadata" };
private ARecordType metaType;
private ARecordType recordType;
private IRecordDataParserFactory<O> recordParserFactory;
@@ -82,6 +83,11 @@
}
@Override
+ public String[] getFormats() {
+ return formats;
+ }
+
+ @Override
public Class<?> getRecordClass() {
return converterFactory.getInputClass();
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java
index 3539f6e..771f56a 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java
@@ -31,6 +31,7 @@
public class TweetParserFactory implements IRecordDataParserFactory<String> {
private static final long serialVersionUID = 1L;
+ private static String[] formats = { "twitter-status" };
private ARecordType recordType;
@Override
@@ -59,4 +60,9 @@
// do nothing
}
+ @Override
+ public String[] getFormats() {
+ return formats;
+ }
+
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
index ebe3276..76235fa 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
@@ -18,22 +18,27 @@
*/
package org.apache.asterix.external.provider;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashMap;
import java.util.Map;
import org.apache.asterix.common.exceptions.AsterixException;
import org.apache.asterix.common.library.ILibraryManager;
import org.apache.asterix.external.api.IDataParserFactory;
-import org.apache.asterix.external.parser.factory.ADMDataParserFactory;
-import org.apache.asterix.external.parser.factory.DelimitedDataParserFactory;
-import org.apache.asterix.external.parser.factory.HiveDataParserFactory;
-import org.apache.asterix.external.parser.factory.RSSParserFactory;
-import org.apache.asterix.external.parser.factory.RecordWithMetadataParserFactory;
-import org.apache.asterix.external.parser.factory.TweetParserFactory;
import org.apache.asterix.external.util.ExternalDataConstants;
import org.apache.asterix.external.util.ExternalDataUtils;
+import org.apache.commons.io.IOUtils;
public class ParserFactoryProvider {
+ private static final String RESOURCE = "META-INF/services/org.apache.asterix.external.api.IDataParserFactory";
+ private static Map<String, Class> factories = null;
+
private ParserFactoryProvider() {
}
@@ -54,32 +59,59 @@
return parserFactory;
}
+ protected static IDataParserFactory getInstance(Class clazz) throws AsterixException {
+ try {
+ return (IDataParserFactory) clazz.newInstance();
+ } catch (IllegalAccessException | InstantiationException | ClassCastException e) {
+ throw new AsterixException("Cannot create: " + clazz.getSimpleName(), e);
+ }
+ }
+
@SuppressWarnings("rawtypes")
public static IDataParserFactory getDataParserFactory(String parser) throws AsterixException {
- switch (parser) {
- case ExternalDataConstants.FORMAT_ADM:
- case ExternalDataConstants.FORMAT_JSON:
- case ExternalDataConstants.FORMAT_SEMISTRUCTURED:
- return new ADMDataParserFactory();
- case ExternalDataConstants.FORMAT_DELIMITED_TEXT:
- case ExternalDataConstants.FORMAT_CSV:
- return new DelimitedDataParserFactory();
- case ExternalDataConstants.FORMAT_HIVE:
- case ExternalDataConstants.PARSER_HIVE:
- return new HiveDataParserFactory();
- case ExternalDataConstants.FORMAT_TWEET:
- return new TweetParserFactory();
- case ExternalDataConstants.FORMAT_RSS:
- return new RSSParserFactory();
- case ExternalDataConstants.FORMAT_RECORD_WITH_METADATA:
- return new RecordWithMetadataParserFactory();
- default:
- try {
- return (IDataParserFactory) Class.forName(parser).newInstance();
- } catch (IllegalAccessException | ClassNotFoundException | InstantiationException
- | ClassCastException e) {
- throw new AsterixException("Unknown format: " + parser, e);
- }
+
+ if (factories == null) {
+ factories = initFactories();
}
+
+ if (factories.containsKey(parser)) {
+ return getInstance(factories.get(parser));
+ }
+
+ try {
+ // ideally, this should not happen
+ return (IDataParserFactory) Class.forName(parser).newInstance();
+ } catch (IllegalAccessException | ClassNotFoundException | InstantiationException | ClassCastException e) {
+ throw new AsterixException("Unknown format: " + parser, e);
+ }
+ }
+
+ protected static Map<String, Class> initFactories() throws AsterixException {
+ Map<String, Class> factories = new HashMap<>();
+ ClassLoader cl = ParserFactoryProvider.class.getClassLoader();
+ final Charset encoding = Charset.forName("UTF-8");
+ try {
+ Enumeration<URL> urls = cl.getResources(RESOURCE);
+ for (URL url : Collections.list(urls)) {
+ InputStream is = url.openStream();
+ String config = IOUtils.toString(is, encoding);
+ is.close();
+ String[] classNames = config.split("\n");
+ for (String className : classNames) {
+ final Class<?> clazz = Class.forName(className);
+ String[] formats = ((IDataParserFactory) clazz.newInstance()).getFormats();
+ for (String format : formats) {
+ if (factories.containsKey(format)) {
+ throw new AsterixException("Duplicate format " + format);
+ }
+ factories.put(format, clazz);
+ }
+ }
+ }
+ } catch (IOException | ClassNotFoundException | InstantiationException
+ | IllegalAccessException e) {
+ throw new AsterixException(e);
+ }
+ return factories;
}
}
diff --git a/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory b/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory
new file mode 100644
index 0000000..840f619
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory
@@ -0,0 +1,6 @@
+org.apache.asterix.external.parser.factory.ADMDataParserFactory
+org.apache.asterix.external.parser.factory.DelimitedDataParserFactory
+org.apache.asterix.external.parser.factory.HiveDataParserFactory
+org.apache.asterix.external.parser.factory.RecordWithMetadataParserFactory
+org.apache.asterix.external.parser.factory.RSSParserFactory
+org.apache.asterix.external.parser.factory.TweetParserFactory
\ No newline at end of file
diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParserFactory.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParserFactory.java
index 17b83c2..d8cc3bb 100644
--- a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParserFactory.java
+++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParserFactory.java
@@ -34,6 +34,7 @@
public class ClassAdParserFactory implements IRecordDataParserFactory<char[]> {
private static final long serialVersionUID = 1L;
+ private static final String[] formats = { "line-separated" };
public static final String KEY_OLD_FORMAT = "old-format";
public static final String KEY_EVALUATE = "evaluate";
public static final String KEY_KEEP_EXPR = "keep-expr";
@@ -121,4 +122,9 @@
public void setMetaType(ARecordType metaType) {
}
+ @Override
+ public String[] getFormats() {
+ return formats;
+ }
+
}
diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/factory/TestRecordWithPKParserFactory.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/factory/TestRecordWithPKParserFactory.java
index 5b23094..91919d1 100644
--- a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/factory/TestRecordWithPKParserFactory.java
+++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/factory/TestRecordWithPKParserFactory.java
@@ -18,6 +18,7 @@
*/
package org.apache.asterix.external.parser.factory;
+import java.util.ArrayList;
import java.util.Map;
import java.util.TreeMap;
@@ -37,6 +38,7 @@
public class TestRecordWithPKParserFactory<T> implements IRecordDataParserFactory<RecordWithPK<T>> {
private static final long serialVersionUID = 1L;
+ private static final ArrayList<String> formats = new ArrayList<>();
private ARecordType recordType;
private IRecordDataParserFactory<char[]> recordParserFactory;
private String format;
@@ -49,6 +51,7 @@
public void configure(Map<String, String> configuration) throws AsterixException {
TreeMap<String, String> parserConf = new TreeMap<String, String>();
format = configuration.get(ExternalDataConstants.KEY_RECORD_FORMAT);
+ formats.add(format);
parserConf.put(ExternalDataConstants.KEY_FORMAT, format);
recordParserFactory =
(IRecordDataParserFactory<char[]>) ParserFactoryProvider.getDataParserFactory(null, parserConf);
@@ -75,4 +78,10 @@
@Override
public void setMetaType(ARecordType metaType) {
}
+
+ @Override
+ public String[] getFormats() {
+ return (String[]) formats.toArray();
+ }
+
}
diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ParserFactoryProviderLoadParserTest.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ParserFactoryProviderLoadParserTest.java
new file mode 100644
index 0000000..effb7cd
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ParserFactoryProviderLoadParserTest.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.parser.test;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IDataParserFactory;
+import org.apache.asterix.external.parser.factory.ADMDataParserFactory;
+import org.apache.asterix.external.parser.factory.DelimitedDataParserFactory;
+import org.apache.asterix.external.parser.factory.HiveDataParserFactory;
+import org.apache.asterix.external.parser.factory.RSSParserFactory;
+import org.apache.asterix.external.parser.factory.TweetParserFactory;
+import org.apache.asterix.external.provider.ParserFactoryProvider;
+import org.junit.Assert;
+import org.junit.Test;
+
+
+public class ParserFactoryProviderLoadParserTest {
+
+ IDataParserFactory factory;
+ @Test
+ public void test() throws AsterixException {
+ boolean result = true;
+ factory = ParserFactoryProvider.getDataParserFactory("csv");
+ result = result && factory instanceof DelimitedDataParserFactory;
+ factory = ParserFactoryProvider.getDataParserFactory("adm");
+ result = result && factory instanceof ADMDataParserFactory;
+ factory = ParserFactoryProvider.getDataParserFactory("rss");
+ result = result && factory instanceof RSSParserFactory;
+ factory = ParserFactoryProvider.getDataParserFactory("hive");
+ result = result && factory instanceof HiveDataParserFactory;
+ factory = ParserFactoryProvider.getDataParserFactory("twitter-status");
+ result = result && factory instanceof TweetParserFactory;
+ Assert.assertTrue(result);
+ }
+}