[ASTERIXDB-2855] Allow additions to Python UDF env
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- Add a new configuration parameter to allow
appending new environment variables to the
Python interpreter process used in Python
UDFs.
- Add test to check that it works.
- Skip adding empty args, path or env during
setup of Python UDF commandline
Change-Id: Ib6e1ee7debc9c2e07d24163542b1f98886792161
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/10644
Reviewed-by: Ian Maxon <imaxon@uci.edu>
Reviewed-by: Dmitry Lychagin <dmitry.lychagin@couchbase.com>
Reviewed-by: Till Westmann <tillw@apache.org>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-app/src/test/resources/TweetSent/roundtrip.py b/asterixdb/asterix-app/src/test/resources/TweetSent/roundtrip.py
index 8b8fced..9058a01 100644
--- a/asterixdb/asterix-app/src/test/resources/TweetSent/roundtrip.py
+++ b/asterixdb/asterix-app/src/test/resources/TweetSent/roundtrip.py
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
import math
+import os
def sqrt(num):
return math.sqrt(num)
@@ -26,3 +27,6 @@
def warning(self):
raise ArithmeticError("oof")
+
+ def env_test(self, key):
+ return os.environ[key]
diff --git a/asterixdb/asterix-app/src/test/resources/cc.conf b/asterixdb/asterix-app/src/test/resources/cc.conf
index e2cd5b9..953284964 100644
--- a/asterixdb/asterix-app/src/test/resources/cc.conf
+++ b/asterixdb/asterix-app/src/test/resources/cc.conf
@@ -34,6 +34,7 @@
[nc]
credential.file=src/test/resources/security/passwd
python.cmd.autolocate=true
+python.env=FOO=BAR=BAZ,BAR=BAZ
address=127.0.0.1
command=asterixnc
app.class=org.apache.asterix.hyracks.bootstrap.NCApplication
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.2.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.2.ddl.sqlpp
index 0ad9fb3..74fe03f 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.2.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.2.ddl.sqlpp
@@ -23,4 +23,7 @@
as "roundtrip", "Tests.warning" at testlib;
create function roundtrip(s)
- as "roundtrip", "Tests.roundtrip" at testlib;
\ No newline at end of file
+ as "roundtrip", "Tests.roundtrip" at testlib;
+
+create function env_test(k)
+ as "roundtrip", "Tests.env_test" at testlib;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.5.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.5.query.sqlpp
new file mode 100644
index 0000000..7334470
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.5.query.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Access a records nested records at each level.
+* Expected Res : Success
+* Date : 04 Jun 2015
+*/
+// param max-warnings:json=0
+
+use test;
+
+env_test("FOO");
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.6.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.6.query.sqlpp
new file mode 100644
index 0000000..092af06
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/py_function_error/py_function_error.6.query.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Access a records nested records at each level.
+* Expected Res : Success
+* Date : 04 Jun 2015
+*/
+// param max-warnings:json=0
+
+use test;
+
+env_test("BAR");
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/py_function_error/py_function_error.3.json b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/py_function_error/py_function_error.3.json
new file mode 100644
index 0000000..58de662
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/py_function_error/py_function_error.3.json
@@ -0,0 +1 @@
+"BAR=BAZ"
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/py_function_error/py_function_error.4.json b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/py_function_error/py_function_error.4.json
new file mode 100644
index 0000000..f73df98
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/py_function_error/py_function_error.4.json
@@ -0,0 +1 @@
+"BAZ"
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_it_python.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_it_python.xml
index 4e1d5b2..35bec85 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_it_python.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_it_python.xml
@@ -57,7 +57,7 @@
result[0].append(self.next_tuple(*arg, key=self.mid))
File "entrypoint.py", line 99, in next_tuple
return self.wrapped_fns[key](*args)
- File "site-packages/roundtrip.py", line 28, in warning
+ File "site-packages/roundtrip.py", line 29, in warning
raise ArithmeticError("oof")
ArithmeticError: oof
(in line 28, at column 1)</expected-warn>
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/PythonLibraryEvaluator.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/PythonLibraryEvaluator.java
index e2229ee..457b86a 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/PythonLibraryEvaluator.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/PythonLibraryEvaluator.java
@@ -27,6 +27,7 @@
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.apache.asterix.common.exceptions.AsterixException;
@@ -65,18 +66,21 @@
private IPCSystem ipcSys;
private String sitePkgs;
private List<String> pythonArgs;
+ private Map<String, String> pythonEnv;
private TaskAttemptId task;
private IWarningCollector warningCollector;
private SourceLocation sourceLoc;
public PythonLibraryEvaluator(JobId jobId, PythonLibraryEvaluatorId evaluatorId, ILibraryManager libMgr,
- File pythonHome, String sitePkgs, List<String> pythonArgs, ExternalFunctionResultRouter router,
- IPCSystem ipcSys, TaskAttemptId task, IWarningCollector warningCollector, SourceLocation sourceLoc) {
+ File pythonHome, String sitePkgs, List<String> pythonArgs, Map<String, String> pythonEnv,
+ ExternalFunctionResultRouter router, IPCSystem ipcSys, TaskAttemptId task,
+ IWarningCollector warningCollector, SourceLocation sourceLoc) {
super(jobId, evaluatorId);
this.libMgr = libMgr;
this.pythonHome = pythonHome;
this.sitePkgs = sitePkgs;
this.pythonArgs = pythonArgs;
+ this.pythonEnv = pythonEnv;
this.router = router;
this.task = task;
this.ipcSys = ipcSys;
@@ -98,8 +102,8 @@
args.add(InetAddress.getLoopbackAddress().getHostAddress());
args.add(Integer.toString(port));
args.add(sitePkgs);
-
ProcessBuilder pb = new ProcessBuilder(args.toArray(new String[0]));
+ pb.environment().putAll(pythonEnv);
pb.directory(new File(wd));
p = pb.start();
proto = new PythonIPCProto(p.getOutputStream(), router, p);
@@ -199,14 +203,15 @@
public static PythonLibraryEvaluator getInstance(IExternalFunctionInfo finfo, ILibraryManager libMgr,
ExternalFunctionResultRouter router, IPCSystem ipcSys, File pythonHome, IHyracksTaskContext ctx,
- String sitePkgs, List<String> pythonArgs, IWarningCollector warningCollector, SourceLocation sourceLoc)
- throws IOException, AsterixException {
+ String sitePkgs, List<String> pythonArgs, Map<String, String> pythonEnv, IWarningCollector warningCollector,
+ SourceLocation sourceLoc) throws IOException, AsterixException {
PythonLibraryEvaluatorId evaluatorId = new PythonLibraryEvaluatorId(finfo.getLibraryDataverseName(),
finfo.getLibraryName(), Thread.currentThread());
PythonLibraryEvaluator evaluator = (PythonLibraryEvaluator) ctx.getStateObject(evaluatorId);
if (evaluator == null) {
evaluator = new PythonLibraryEvaluator(ctx.getJobletContext().getJobId(), evaluatorId, libMgr, pythonHome,
- sitePkgs, pythonArgs, router, ipcSys, ctx.getTaskAttemptId(), warningCollector, sourceLoc);
+ sitePkgs, pythonArgs, pythonEnv, router, ipcSys, ctx.getTaskAttemptId(), warningCollector,
+ sourceLoc);
ctx.getJobletContext().registerDeallocatable(evaluator);
evaluator.initialize();
ctx.setStateObject(evaluator);
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/PythonLibraryEvaluatorFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/PythonLibraryEvaluatorFactory.java
index 86d51de..06c9bc9 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/PythonLibraryEvaluatorFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/PythonLibraryEvaluatorFactory.java
@@ -23,8 +23,9 @@
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Arrays;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import org.apache.asterix.common.api.INcApplicationContext;
import org.apache.asterix.common.exceptions.AsterixException;
@@ -46,6 +47,7 @@
private final ExternalFunctionResultRouter router;
private final String sitePackagesPath;
private final List<String> pythonArgs;
+ private final Map<String, String> pythonEnv;
public PythonLibraryEvaluatorFactory(IHyracksTaskContext ctx) throws AsterixException {
this.ctx = ctx;
@@ -67,17 +69,41 @@
+ NCConfig.Option.PYTHON_CMD_AUTOLOCATE.ini() + " is false");
}
}
+ pythonEnv = new HashMap<>();
+ String[] envRaw = appCfg.getStringArray((NCConfig.Option.PYTHON_ENV));
+ if (envRaw != null) {
+ for (String rawEnvArg : envRaw) {
+ //TODO: i think equals is shared among all unixes and windows. but it needs verification
+ if (rawEnvArg.length() < 1) {
+ continue;
+ }
+ String[] rawArgSplit = rawEnvArg.split("(?<!\\\\)=", 2);
+ if (rawArgSplit.length < 2) {
+ throw AsterixException.create(ErrorCode.EXTERNAL_UDF_EXCEPTION,
+ "Invalid environment variable format detected.");
+ }
+ pythonEnv.put(rawArgSplit[0], rawArgSplit[1]);
+ }
+ }
pythonPath = new File(pythonPathCmd);
List<String> sitePkgs = new ArrayList<>();
sitePkgs.add(SITE_PACKAGES);
String[] addlSitePackages = appCfg.getStringArray((NCConfig.Option.PYTHON_ADDITIONAL_PACKAGES));
- sitePkgs.addAll(Arrays.asList(addlSitePackages));
+ for (String sitePkg : addlSitePackages) {
+ if (sitePkg.length() > 0) {
+ sitePkgs.add(sitePkg);
+ }
+ }
if (appCfg.getBoolean(NCConfig.Option.PYTHON_USE_BUNDLED_MSGPACK)) {
sitePkgs.add("ipc" + File.separator + SITE_PACKAGES + File.separator);
}
String[] pythonArgsRaw = appCfg.getStringArray(NCConfig.Option.PYTHON_ARGS);
if (pythonArgsRaw != null) {
- pythonArgs.addAll(Arrays.asList(pythonArgsRaw));
+ for (String arg : pythonArgsRaw) {
+ if (arg.length() > 0) {
+ pythonArgs.add(arg);
+ }
+ }
}
StringBuilder sitePackagesPathBuilder = new StringBuilder();
for (int i = 0; i < sitePkgs.size() - 1; i++) {
@@ -91,6 +117,6 @@
public PythonLibraryEvaluator getEvaluator(IExternalFunctionInfo fnInfo, SourceLocation sourceLoc)
throws IOException, AsterixException {
return PythonLibraryEvaluator.getInstance(fnInfo, libraryManager, router, ipcSys, pythonPath, ctx,
- sitePackagesPath, pythonArgs, ctx.getWarningCollector(), sourceLoc);
+ sitePackagesPath, pythonArgs, pythonEnv, ctx.getWarningCollector(), sourceLoc);
}
}
diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
index 22b240a..01cb9bf 100644
--- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
+++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
@@ -98,6 +98,7 @@
PYTHON_ADDITIONAL_PACKAGES(STRING_ARRAY, new String[0]),
PYTHON_USE_BUNDLED_MSGPACK(BOOLEAN, true),
PYTHON_ARGS(STRING_ARRAY, (String[]) null),
+ PYTHON_ENV(STRING_ARRAY, (String[]) null),
CREDENTIAL_FILE(
OptionTypes.STRING,
(Function<IApplicationConfig, String>) appConfig -> FileUtil
@@ -245,6 +246,8 @@
return "Python args to pass to Python interpreter";
case PYTHON_CMD_AUTOLOCATE:
return "Whether or not to attempt to automatically set PYTHON_CMD to a usable interpreter";
+ case PYTHON_ENV:
+ return "List of environment variables to set when invoking the Python interpreter for Python UDFs. E.g. FOO=1";
case CREDENTIAL_FILE:
return "Path to HTTP basic credentials";
default:
@@ -621,4 +624,5 @@
public String getCredentialFilePath() {
return getAppConfig().getString(Option.CREDENTIAL_FILE);
}
+
}