<Hermetic> Implement CPython2 Launcher in C++.
The launcher is actually a wrapper which will statically link Python
interpreter within the .par file. It is used to bootstrap embedded
interpreter.
The next step is to change/integrate with Soong to make this hermetic .par
generation process more automatic.
Bug: b/62380596
Test: The launcher has been tested using real files:
zip -r hermetic.zip entry_point.txt Stdlib/ runfiles/
cat launcher | cat - hermetic.zip > executable && chmod u+x executable
Change-Id: I293cae2fe74d46766044f3e3c4b654a54d319b67
diff --git a/Launcher/launcher_internal.cpp b/Launcher/launcher_internal.cpp
new file mode 100644
index 0000000..be4d48e
--- /dev/null
+++ b/Launcher/launcher_internal.cpp
@@ -0,0 +1,216 @@
+// Copyright 2017 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "launcher_internal.h"
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+extern "C" {
+// Cpython built-in C functions.
+/*
+ read_directory(archive) -> files dict (new reference)
+
+ Given a path to a Zip archive, build a dict, mapping file names
+ (local to the archive, using SEP as a separator) to toc entries.
+*/
+PyObject *read_directory(const char *archive);
+
+/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
+ data as a new reference. */
+PyObject *get_data(const char *archive, PyObject *toc_entry);
+}
+
+namespace android {
+namespace cpython2 {
+namespace python_launcher {
+namespace internal{
+
+int RunModule(const char *module, int set_argv0) {
+ PyObject *runpy, *runmodule, *runargs, *result;
+ runpy = PyImport_ImportModule("runpy");
+ if (runpy == NULL) {
+ fprintf(stderr, "Could not import runpy module\n");
+ return -1;
+ }
+ runmodule = PyObject_GetAttrString(runpy, "_run_module_as_main");
+ if (runmodule == NULL) {
+ fprintf(stderr, "Could not access runpy._run_module_as_main\n");
+ Py_DECREF(runpy);
+ return -1;
+ }
+ runargs = Py_BuildValue("(si)", module, set_argv0);
+ if (runargs == NULL) {
+ fprintf(stderr,
+ "Could not create arguments for runpy._run_module_as_main\n");
+ Py_DECREF(runpy);
+ Py_DECREF(runmodule);
+ return -1;
+ }
+ result = PyObject_Call(runmodule, runargs, NULL);
+ if (result == NULL) {
+ PyErr_Print();
+ }
+ Py_DECREF(runpy);
+ Py_DECREF(runmodule);
+ Py_DECREF(runargs);
+ if (result == NULL) {
+ return -1;
+ }
+ Py_DECREF(result);
+ return 0;
+}
+
+std::string GetEntryPointFilePath(const char *launcher_path) {
+ PyObject *files;
+ files = read_directory(launcher_path);
+ if (files == NULL) {
+ return std::string();
+ }
+ PyObject *toc_entry;
+ // Return value: Borrowed reference.
+ toc_entry = PyDict_GetItemString(files, ENTRYPOINT_FILE);
+ if (toc_entry == NULL) {
+ Py_DECREF(files);
+ return std::string();
+ }
+ PyObject *py_data;
+ py_data = get_data(launcher_path, toc_entry);
+ if (py_data == NULL) {
+ Py_DECREF(files);
+ return std::string();
+ }
+ // PyString_AsString returns a NUL-terminated representation of the "py_data",
+ // "data" must not be modified in any way. And it must not be deallocated.
+ char *data = PyString_AsString(py_data);
+ if (data == NULL) {
+ Py_DECREF(py_data);
+ Py_DECREF(files);
+ return std::string();
+ }
+
+ char *res = strdup(data); /* deep copy of data */
+ Py_DECREF(py_data);
+ Py_DECREF(files);
+
+ int i = 0;
+ /* Strip newline and other trailing whitespace. */
+ for (i = strlen(res) - 1; i >= 0 && isspace(res[i]); i--) {
+ res[i] = '\0';
+ }
+ /* Check for the file extension. */
+ i = strlen(res);
+ if (i > 3 && strcmp(res + i - 3, ".py") == 0) {
+ res[i - 3] = '\0';
+ } else {
+ PyErr_Format(PyExc_ValueError, "Invalid entrypoint in %s: %s",
+ ENTRYPOINT_FILE, res);
+ return std::string();
+ }
+ return std::string(res);
+}
+
+int RunModuleNameFromEntryPoint(const char *launcher_path, std::string entrypoint) {
+ if (entrypoint.empty()) {
+ return -1;
+ }
+ // Has to pass to free to avoid a memory leak after use.
+ char *arr = strdup(entrypoint.c_str());
+ // Replace file system path seperator with Python package/module seperator.
+ char *ch;
+ for (ch = arr; *ch; ch++) {
+ if (*ch == '/') {
+ *ch = '.';
+ }
+ }
+
+ if (AddPathToPythonSysPath(launcher_path) < 0) {
+ free(arr);
+ return -1;
+ }
+ // Calculate the runfiles path size. Extra space for '\0'.
+ size_t size = snprintf(nullptr, 0, "%s/%s", launcher_path, RUNFILES) + 1;
+ char runfiles_path[size];
+ snprintf(runfiles_path, size, "%s/%s", launcher_path, RUNFILES);
+ if (AddPathToPythonSysPath(runfiles_path) < 0) {
+ free(arr);
+ return -1;
+ }
+ int ret = RunModule(arr, 0);
+ free(arr);
+ return ret;
+}
+
+int AddPathToPythonSysPath(const char *path) {
+ if (path == NULL) {
+ return -1;
+ }
+ PyObject *py_path;
+ py_path = PyString_FromString(path);
+ if (py_path == NULL) {
+ return -1;
+ }
+ PyObject *sys_path;
+ // Return value: Borrowed reference.
+ sys_path = PySys_GetObject(const_cast<char*>("path"));
+ if (sys_path == NULL) {
+ Py_DECREF(py_path);
+ return -1;
+ }
+ PyList_Insert(sys_path, 0, py_path);
+ Py_DECREF(py_path);
+ return 0;
+}
+
+int RunMainFromImporter(const char *launcher_path) {
+ PyObject *py_launcher_path, *importer;
+ py_launcher_path = PyString_FromString(launcher_path);
+ if (py_launcher_path == NULL) {
+ return -1;
+ }
+ importer = PyImport_GetImporter(py_launcher_path);
+ if (importer == NULL) {
+ Py_DECREF(py_launcher_path);
+ return -1;
+ }
+ if (importer != Py_None && importer->ob_type != &PyNullImporter_Type) {
+ /* Launcher path is usable as an import source, so
+ put it in sys.path[0] and import __main__ */
+ if (AddPathToPythonSysPath(launcher_path) < 0) {
+ Py_DECREF(importer);
+ Py_DECREF(py_launcher_path);
+ return -1;
+ }
+ }
+ Py_DECREF(importer);
+ Py_DECREF(py_launcher_path);
+ return RunModule("__main__", 0);
+}
+} // namespace internal
+
+int RunEntryPointOrMainModule(const char *launcher_path) {
+ std::string entrypoint = internal::GetEntryPointFilePath(launcher_path);
+ if (entrypoint.empty()) {
+ // If entry point can not be found or can not be executed, we try to
+ // run __main__.py within the .par file.
+ fprintf(stderr, "Cannot find valid entry point to execute par file!\n");
+ fprintf(stdout, "Start trying to run __main__ module within par file.\n");
+ return internal::RunMainFromImporter(launcher_path);
+ }
+ return internal::RunModuleNameFromEntryPoint(launcher_path, entrypoint);
+}
+} // namespace python_launcher
+} // namespace cpython2
+} // namespace android
diff --git a/Launcher/launcher_internal.h b/Launcher/launcher_internal.h
new file mode 100644
index 0000000..285176a
--- /dev/null
+++ b/Launcher/launcher_internal.h
@@ -0,0 +1,62 @@
+// Copyright 2017 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ANDROID_CPYTHON2_PYTHON_LAUNCHER_INTERNAL_H
+#define ANDROID_CPYTHON2_PYTHON_LAUNCHER_INTERNAL_H
+
+#include <Python.h>
+
+#include <string>
+
+namespace android {
+namespace cpython2 {
+namespace python_launcher {
+
+namespace internal{
+#define ENTRYPOINT_FILE "entry_point.txt"
+#define RUNFILES "runfiles"
+
+// Use "runpy" module to locate and run Python script using Python module
+// namespace rather than the filesystem.
+// The caller owns "module" pointer, which cannot be NULL.
+int RunModule(const char *module, int set_argv0);
+
+// Get valid entrypoint file path.
+// The caller owns "launcher_path" pointer, which cannot be NULL.
+// Return non-empty string as success. Otherwise, return empty string.
+std::string GetEntryPointFilePath(const char *launcher_path);
+
+// Run the Python script embedded in ENTRYPOINT_FILE.
+// The caller owns "launcher_path" pointer, which cannot be NULL.
+int RunModuleNameFromEntryPoint(const char *launcher_path, std::string entrypoint);
+
+// Add path to Python sys.path list.
+// The caller owns "path" pointer, which cannot be NULL.
+int AddPathToPythonSysPath(const char *path);
+
+// Run __main__ module within the hermetic .par file.
+// The caller owns "launcher_path" pointer, which cannot be NULL.
+// Return 0 as success. Otherwise, return -1.
+int RunMainFromImporter(const char *launcher_path);
+} // namespace internal
+
+// Try to run the Python script embedded in ENTRYPOINT_FILE. Otherwise,
+// run __main__ module as fallback.
+// The caller owns "launcher_path" pointer, which cannot be NULL.
+int RunEntryPointOrMainModule(const char *launcher_path);
+} // namespace python_launcher
+} // namespace cpython2
+} // namespace android
+
+#endif // ANDROID_CPYTHON2_PYTHON_LAUNCHER_INTERNAL_H
diff --git a/Launcher/launcher_main.cpp b/Launcher/launcher_main.cpp
new file mode 100644
index 0000000..a49998c
--- /dev/null
+++ b/Launcher/launcher_main.cpp
@@ -0,0 +1,124 @@
+// Copyright 2017 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "launcher_internal.h"
+
+#include <Python.h>
+#include <android-base/file.h>
+#include <osdefs.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string>
+
+int main(int argc, char *argv[]) {
+ int result = 0 /* Used to mark if current program runs with success/failure. */;
+
+ // Clear PYTHONPATH and PYTHONHOME so Python doesn't attempt to check the local
+ // disk for Python modules to load. The value of PYTHONHOME will replace "prefix"
+ // and "exe_prefix" based on the description in getpath.c.
+ // Please don't use PYTHONPATH and PYTHONHOME within user program.
+ // TODO(nanzhang): figure out if unsetenv("PYTHONPATH") is better.
+ unsetenv(const_cast<char *>("PYTHONPATH"));
+ // TODO(nanzhang): figure out if Py_SetPythonHome() is better.
+ unsetenv(const_cast<char *>("PYTHONHOME"));
+ // PYTHONEXECUTABLE is only used on MacOs X, when the Python interpreter
+ // embedded in an application bundle. It is not sure that we have this use case
+ // for Android hermetic Python. So override this environment variable to empty
+ // for now to make our self-contained environment more strict.
+ // For user (.py) program, it can access hermetic .par file path through
+ // sys.argv[0].
+ unsetenv(const_cast<char *>("PYTHONEXECUTABLE"));
+
+ // Resolving absolute path based on argv[0] is not reliable since it may
+ // include something unusable, too bad.
+ // android::base::GetExecutablePath() also handles for Darwin/Windows.
+ std::string executable_path = android::base::GetExecutablePath();
+
+ argv[0] = strdup(executable_path.c_str());
+ // argv[0] is used for setting internal path, and Python sys.argv[0]. It
+ // should not exceed MAXPATHLEN defined for CPython.
+ if (!argv[0] || strlen(argv[0]) > MAXPATHLEN) {
+ fprintf(stderr, "The executable path %s is NULL or of invalid length.\n", argv[0]);
+ return 1;
+ }
+
+ // For debugging/logging purpose, set stdin/stdout/stderr unbuffered through
+ // environment variable.
+ // TODO(nanzhang): Set Py_VerboseFlag if more debugging requests needed.
+ const char *unbuffered_env = getenv("PYTHONUNBUFFERED");
+ if (unbuffered_env && unbuffered_env[0]) {
+ #if defined(MS_WINDOWS) || defined(__CYGWIN__)
+ _setmode(fileno(stdin), O_BINARY);
+ _setmode(fileno(stdout), O_BINARY);
+ #endif
+ #ifdef HAVE_SETVBUF
+ setvbuf(stdin, (char *)NULL, _IONBF, BUFSIZ);
+ setvbuf(stdout, (char *)NULL, _IONBF, BUFSIZ);
+ setvbuf(stderr, (char *)NULL, _IONBF, BUFSIZ);
+ #else /* !HAVE_SETVBUF */
+ setbuf(stdin, (char *)NULL);
+ setbuf(stdout, (char *)NULL);
+ setbuf(stderr, (char *)NULL);
+ #endif /* !HAVE_SETVBUF */
+ }
+ //For debugging/logging purpose, Warning control.
+ //Python’s warning machinery by default prints warning messages to sys.stderr.
+ //The full form of argument is:action:message:category:module:line
+ char *warnings_env = getenv("PYTHONWARNINGS");
+ if (warnings_env && warnings_env[0]) {
+ char *warnings_buf, *warning;
+
+ // Note: "new" operation; we need free this chuck of data after use.
+ warnings_buf = new char[strlen(warnings_env) + 1];
+ if (warnings_buf == NULL)
+ Py_FatalError(
+ "not enough memory to copy PYTHONWARNINGS");
+ strcpy(warnings_buf, warnings_env);
+ for (warning = strtok(warnings_buf, ",");
+ warning != NULL;
+ warning = strtok(NULL, ","))
+ PySys_AddWarnOption(warning);
+ delete[] warnings_buf;
+ }
+
+ // Always enable Python "-s" option. We don't need user-site directories,
+ // everything's supposed to be hermetic.
+ Py_NoUserSiteDirectory = 1;
+
+ Py_SetProgramName(argv[0]);
+ Py_Initialize();
+ PySys_SetArgvEx(argc, argv, 0);
+
+ // Set sys.executable to None. The real executable is available as
+ // sys.argv[0], and too many things assume sys.executable is a regular Python
+ // binary, which isn't available. By setting it to None we get clear errors
+ // when people try to use it.
+ if (PySys_SetObject(const_cast<char *>("executable"), Py_None) < 0) {
+ PyErr_Print();
+ result = 1;
+ goto error;
+ }
+
+ result = android::cpython2::python_launcher::RunEntryPointOrMainModule(argv[0]);
+ if (result < 0) {
+ PyErr_Print();
+ goto error;
+ }
+
+error:
+ Py_Finalize();
+
+ free(argv[0]);
+ exit(abs(result));
+}