# Copyright 2014 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
from __future__ import absolute_import, print_function
import os
import pkgutil
import sys
from distutils import sysconfig
from site import USER_SITE
import pkg_resources
from pkg_resources import EntryPoint, WorkingSet, find_distributions
from .common import die
from .compatibility import exec_function
from .environment import PEXEnvironment
from .executor import Executor
from .finders import get_entry_point_from_console_script, get_script_from_distributions
from .interpreter import PythonInterpreter
from .orderedset import OrderedSet
from .pex_info import PexInfo
from .tracer import TRACER
from .util import iter_pth_paths, merge_split, named_temporary_file
from .variables import ENV
class DevNull(object):
def __init__(self):
pass
def write(self, *args, **kw):
pass
def flush(self):
pass
[docs]class PEX(object): # noqa: T000
"""PEX, n. A self-contained python environment."""
[docs] class Error(Exception): pass
[docs] class NotFound(Error): pass
[docs] class InvalidEntryPoint(Error): pass
@classmethod
def clean_environment(cls):
try:
del os.environ['MACOSX_DEPLOYMENT_TARGET']
except KeyError:
pass
# Cannot change dictionary size during __iter__
filter_keys = [key for key in os.environ if key.startswith('PEX_')]
for key in filter_keys:
del os.environ[key]
def __init__(self, pex=sys.argv[0], interpreter=None, env=ENV, verify_entry_point=False):
self._pex = pex
self._interpreter = interpreter or PythonInterpreter.get()
self._pex_info = PexInfo.from_pex(self._pex)
self._pex_info_overrides = PexInfo.from_env(env=env)
self._vars = env
self._envs = []
self._working_set = None
if verify_entry_point:
self._do_entry_point_verification()
def _activate(self):
if not self._working_set:
working_set = WorkingSet([])
# set up the local .pex environment
pex_info = self._pex_info.copy()
pex_info.update(self._pex_info_overrides)
pex_info.merge_pex_path(self._vars.PEX_PATH)
self._envs.append(PEXEnvironment(self._pex, pex_info, interpreter=self._interpreter))
# N.B. by this point, `pex_info.pex_path` will contain a single pex path
# merged from pex_path in `PEX-INFO` and `PEX_PATH` set in the environment.
# `PEX_PATH` entries written into `PEX-INFO` take precedence over those set
# in the environment.
if pex_info.pex_path:
# set up other environments as specified in pex_path
for pex_path in filter(None, pex_info.pex_path.split(os.pathsep)):
pex_info = PexInfo.from_pex(pex_path)
pex_info.update(self._pex_info_overrides)
self._envs.append(PEXEnvironment(pex_path, pex_info, interpreter=self._interpreter))
# activate all of them
for env in self._envs:
for dist in env.activate():
working_set.add(dist)
self._working_set = working_set
return self._working_set
@classmethod
def _extras_paths(cls):
standard_lib = sysconfig.get_python_lib(standard_lib=True)
try:
makefile = sysconfig.parse_makefile(sysconfig.get_makefile_filename())
except (AttributeError, IOError):
# This is not available by default in PyPy's distutils.sysconfig or it simply is
# no longer available on the system (IOError ENOENT)
makefile = {}
extras_paths = filter(None, makefile.get('EXTRASPATH', '').split(':'))
for path in extras_paths:
yield os.path.join(standard_lib, path)
# Handle .pth injected paths as extras.
sitedirs = cls._get_site_packages()
for pth_path in cls._scan_pth_files(sitedirs):
TRACER.log('Found .pth file: %s' % pth_path, V=3)
for extras_path in iter_pth_paths(pth_path):
yield extras_path
@staticmethod
def _scan_pth_files(dir_paths):
"""Given an iterable of directory paths, yield paths to all .pth files within."""
for dir_path in dir_paths:
if not os.path.exists(dir_path):
continue
pth_filenames = (f for f in os.listdir(dir_path) if f.endswith('.pth'))
for pth_filename in pth_filenames:
yield os.path.join(dir_path, pth_filename)
@staticmethod
def _get_site_packages():
try:
from site import getsitepackages
return set(getsitepackages())
except ImportError:
return set()
@classmethod
def site_libs(cls):
site_libs = cls._get_site_packages()
site_libs.update([sysconfig.get_python_lib(plat_specific=False),
sysconfig.get_python_lib(plat_specific=True)])
# On windows getsitepackages() returns the python stdlib too.
if sys.prefix in site_libs:
site_libs.remove(sys.prefix)
real_site_libs = set(os.path.realpath(path) for path in site_libs)
return site_libs | real_site_libs
@classmethod
def _tainted_path(cls, path, site_libs):
paths = frozenset([path, os.path.realpath(path)])
return any(path.startswith(site_lib) for site_lib in site_libs for path in paths)
[docs] @classmethod
def minimum_sys_modules(cls, site_libs, modules=None):
"""Given a set of site-packages paths, return a "clean" sys.modules.
When importing site, modules within sys.modules have their __path__'s populated with
additional paths as defined by *-nspkg.pth in site-packages, or alternately by distribution
metadata such as *.dist-info/namespace_packages.txt. This can possibly cause namespace
packages to leak into imports despite being scrubbed from sys.path.
NOTE: This method mutates modules' __path__ attributes in sys.module, so this is currently an
irreversible operation.
"""
modules = modules or sys.modules
new_modules = {}
for module_name, module in modules.items():
# builtins can stay
if not hasattr(module, '__path__'):
new_modules[module_name] = module
continue
# Unexpected objects, e.g. namespace packages, should just be dropped:
if not isinstance(module.__path__, list):
TRACER.log('Dropping %s' % (module_name,), V=3)
continue
# Pop off site-impacting __path__ elements in-place.
for k in reversed(range(len(module.__path__))):
if cls._tainted_path(module.__path__[k], site_libs):
TRACER.log('Scrubbing %s.__path__: %s' % (module_name, module.__path__[k]), V=3)
module.__path__.pop(k)
# It still contains path elements not in site packages, so it can stay in sys.modules
if module.__path__:
new_modules[module_name] = module
return new_modules
@classmethod
def minimum_sys_path(cls, site_libs, inherit_path):
scrub_paths = OrderedSet()
site_distributions = OrderedSet()
user_site_distributions = OrderedSet()
def all_distribution_paths(path):
locations = set(dist.location for dist in find_distributions(path))
return set([path]) | locations | set(os.path.realpath(path) for path in locations)
for path_element in sys.path:
if cls._tainted_path(path_element, site_libs):
TRACER.log('Tainted path element: %s' % path_element)
site_distributions.update(all_distribution_paths(path_element))
else:
TRACER.log('Not a tainted path element: %s' % path_element, V=2)
user_site_distributions.update(all_distribution_paths(USER_SITE))
if inherit_path == 'false':
scrub_paths = site_distributions | user_site_distributions
for path in user_site_distributions:
TRACER.log('Scrubbing from user site: %s' % path)
for path in site_distributions:
TRACER.log('Scrubbing from site-packages: %s' % path)
scrubbed_sys_path = list(OrderedSet(sys.path) - scrub_paths)
scrub_from_importer_cache = filter(
lambda key: any(key.startswith(path) for path in scrub_paths),
sys.path_importer_cache.keys())
scrubbed_importer_cache = dict((key, value) for (key, value) in sys.path_importer_cache.items()
if key not in scrub_from_importer_cache)
for importer_cache_entry in scrub_from_importer_cache:
TRACER.log('Scrubbing from path_importer_cache: %s' % importer_cache_entry, V=2)
return scrubbed_sys_path, scrubbed_importer_cache
[docs] @classmethod
def minimum_sys(cls, inherit_path):
"""Return the minimum sys necessary to run this interpreter, a la python -S.
:returns: (sys.path, sys.path_importer_cache, sys.modules) tuple of a
bare python installation.
"""
site_libs = set(cls.site_libs())
for site_lib in site_libs:
TRACER.log('Found site-library: %s' % site_lib)
for extras_path in cls._extras_paths():
TRACER.log('Found site extra: %s' % extras_path)
site_libs.add(extras_path)
site_libs = set(os.path.normpath(path) for path in site_libs)
sys_path, sys_path_importer_cache = cls.minimum_sys_path(site_libs, inherit_path)
sys_modules = cls.minimum_sys_modules(site_libs)
return sys_path, sys_path_importer_cache, sys_modules
[docs] @classmethod
def patch_pkg_resources(cls, working_set):
"""Patch pkg_resources given a new working set."""
pkg_resources.working_set = working_set
pkg_resources.require = working_set.require
pkg_resources.iter_entry_points = working_set.iter_entry_points
pkg_resources.run_script = pkg_resources.run_main = working_set.run_script
pkg_resources.add_activation_listener = working_set.subscribe
# Thar be dragons -- when this function exits, the interpreter is potentially in a wonky state
# since the patches here (minimum_sys_modules for example) actually mutate global state.
[docs] def patch_sys(self, inherit_path):
"""Patch sys with all site scrubbed."""
def patch_dict(old_value, new_value):
old_value.clear()
old_value.update(new_value)
def patch_all(path, path_importer_cache, modules):
sys.path[:] = path
patch_dict(sys.path_importer_cache, path_importer_cache)
patch_dict(sys.modules, modules)
new_sys_path, new_sys_path_importer_cache, new_sys_modules = self.minimum_sys(inherit_path)
new_sys_path.extend(merge_split(self._pex_info.pex_path, self._vars.PEX_PATH))
patch_all(new_sys_path, new_sys_path_importer_cache, new_sys_modules)
def _wrap_coverage(self, runner, *args):
if not self._vars.PEX_COVERAGE and self._vars.PEX_COVERAGE_FILENAME is None:
return runner(*args)
try:
import coverage
except ImportError:
die('Could not bootstrap coverage module, aborting.')
pex_coverage_filename = self._vars.PEX_COVERAGE_FILENAME
if pex_coverage_filename is not None:
cov = coverage.coverage(data_file=pex_coverage_filename)
else:
cov = coverage.coverage(data_suffix=True)
TRACER.log('Starting coverage.')
cov.start()
try:
return runner(*args)
finally:
TRACER.log('Stopping coverage')
cov.stop()
# TODO(wickman) Post-process coverage to elide $PEX_ROOT and make
# the report more useful/less noisy. #89
if pex_coverage_filename:
cov.save()
else:
cov.report(show_missing=False, ignore_errors=True, file=sys.stdout)
def _wrap_profiling(self, runner, *args):
if not self._vars.PEX_PROFILE and self._vars.PEX_PROFILE_FILENAME is None:
return runner(*args)
pex_profile_filename = self._vars.PEX_PROFILE_FILENAME
pex_profile_sort = self._vars.PEX_PROFILE_SORT
try:
import cProfile as profile
except ImportError:
import profile
profiler = profile.Profile()
try:
return profiler.runcall(runner, *args)
finally:
if pex_profile_filename is not None:
profiler.dump_stats(pex_profile_filename)
else:
profiler.print_stats(sort=pex_profile_sort)
[docs] def path(self):
"""Return the path this PEX was built at."""
return self._pex
[docs] def execute(self):
"""Execute the PEX.
This function makes assumptions that it is the last function called by
the interpreter.
"""
teardown_verbosity = self._vars.PEX_TEARDOWN_VERBOSE
try:
pex_inherit_path = self._vars.PEX_INHERIT_PATH
if pex_inherit_path == "false":
pex_inherit_path = self._pex_info.inherit_path
self.patch_sys(pex_inherit_path)
working_set = self._activate()
self.patch_pkg_resources(working_set)
exit_code = self._wrap_coverage(self._wrap_profiling, self._execute)
if exit_code:
sys.exit(exit_code)
except Exception:
# Allow the current sys.excepthook to handle this app exception before we tear things down in
# finally, then reraise so that the exit status is reflected correctly.
sys.excepthook(*sys.exc_info())
raise
except SystemExit as se:
# Print a SystemExit error message, avoiding a traceback in python3.
# This must happen here, as sys.stderr is about to be torn down
if not isinstance(se.code, int) and se.code is not None:
print(se.code, file=sys.stderr)
raise
finally:
# squash all exceptions on interpreter teardown -- the primary type here are
# atexit handlers failing to run because of things such as:
# http://stackoverflow.com/questions/2572172/referencing-other-modules-in-atexit
if not teardown_verbosity:
sys.stderr.flush()
sys.stderr = DevNull()
sys.excepthook = lambda *a, **kw: None
def _execute(self):
force_interpreter = self._vars.PEX_INTERPRETER
self.clean_environment()
if force_interpreter:
TRACER.log('PEX_INTERPRETER specified, dropping into interpreter')
return self.execute_interpreter()
if self._pex_info_overrides.script and self._pex_info_overrides.entry_point:
die('Cannot specify both script and entry_point for a PEX!')
if self._pex_info.script and self._pex_info.entry_point:
die('Cannot specify both script and entry_point for a PEX!')
if self._pex_info_overrides.script:
return self.execute_script(self._pex_info_overrides.script)
elif self._pex_info_overrides.entry_point:
return self.execute_entry(self._pex_info_overrides.entry_point)
elif self._pex_info.script:
return self.execute_script(self._pex_info.script)
elif self._pex_info.entry_point:
return self.execute_entry(self._pex_info.entry_point)
else:
TRACER.log('No entry point specified, dropping into interpreter')
return self.execute_interpreter()
@classmethod
def demote_bootstrap(cls):
TRACER.log('Bootstrap complete, performing final sys.path modifications...')
bootstrap_path = __file__
module_import_path = __name__.split('.')
root_package = module_import_path[0]
# For example, our __file__ might be requests.pex/.bootstrap/_pex/pex.pyc and our import path
# _pex.pex; so we walk back through all the module components of our import path to find the
# base sys.path entry where we were found (requests.pex/.bootstrap in this example).
for _ in module_import_path:
bootstrap_path = os.path.dirname(bootstrap_path)
bootstrap_path_index = sys.path.index(bootstrap_path)
# Move the third party resources pex uses to the end of sys.path for the duration of the run to
# allow conflicting versions supplied by user dependencies to win during the course of the
# execution of user code.
for _, mod, _ in pkgutil.iter_modules([bootstrap_path]):
if mod != root_package: # We let _pex stay imported
TRACER.log('Un-importing third party bootstrap dependency %s from %s'
% (mod, bootstrap_path))
sys.modules.pop(mod)
submod_prefix = mod + '.'
for submod in [m for m in sys.modules.keys() if m.startswith(submod_prefix)]:
sys.modules.pop(submod)
sys.path.pop(bootstrap_path_index)
sys.path.append(bootstrap_path)
TRACER.log('PYTHONPATH contains:')
for element in sys.path:
TRACER.log(' %c %s' % (' ' if os.path.exists(element) else '*', element))
TRACER.log(' * - paths that do not exist or will be imported via zipimport')
def execute_interpreter(self):
args = sys.argv[1:]
if args:
# NB: We take care here to setup sys.argv to match how CPython does it for each case.
arg = args[0]
if arg == '-c':
content = args[1]
sys.argv = ['-c'] + args[2:]
self.execute_content('-c <cmd>', content, argv0='-c')
elif arg == '-m':
module = args[1]
sys.argv = args[1:]
self.execute_module(module)
else:
try:
if arg == '-':
content = sys.stdin.read()
else:
with open(arg) as fp:
content = fp.read()
except IOError as e:
die("Could not open %s in the environment [%s]: %s" % (arg, sys.argv[0], e))
sys.argv = args
self.execute_content(arg, content)
else:
self.demote_bootstrap()
import code
code.interact()
def execute_script(self, script_name):
dists = list(self._activate())
dist, entry_point = get_entry_point_from_console_script(script_name, dists)
if entry_point:
TRACER.log('Found console_script %r in %r' % (entry_point, dist))
sys.exit(self.execute_entry(entry_point))
dist, script_path, script_content = get_script_from_distributions(script_name, dists)
if not dist:
raise self.NotFound('Could not find script %r in pex!' % script_name)
TRACER.log('Found script %r in %r' % (script_name, dist))
return self.execute_content(script_path, script_content, argv0=script_name)
@classmethod
def execute_content(cls, name, content, argv0=None):
cls.demote_bootstrap()
argv0 = argv0 or name
try:
ast = compile(content, name, 'exec', flags=0, dont_inherit=1)
except SyntaxError:
die('Unable to parse %s. PEX script support only supports Python scripts.' % name)
old_name, old_file = globals().get('__name__'), globals().get('__file__')
try:
old_argv0, sys.argv[0] = sys.argv[0], argv0
globals()['__name__'] = '__main__'
globals()['__file__'] = name
exec_function(ast, globals())
finally:
if old_name:
globals()['__name__'] = old_name
else:
globals().pop('__name__')
if old_file:
globals()['__file__'] = old_file
else:
globals().pop('__file__')
sys.argv[0] = old_argv0
@classmethod
def execute_entry(cls, entry_point):
runner = cls.execute_pkg_resources if ':' in entry_point else cls.execute_module
return runner(entry_point)
@classmethod
def execute_module(cls, module_name):
cls.demote_bootstrap()
import runpy
runpy.run_module(module_name, run_name='__main__')
@classmethod
def execute_pkg_resources(cls, spec):
cls.demote_bootstrap()
entry = EntryPoint.parse("run = {0}".format(spec))
# See https://pythonhosted.org/setuptools/history.html#id25 for rationale here.
if hasattr(entry, 'resolve'):
# setuptools >= 11.3
runner = entry.resolve()
else:
# setuptools < 11.3
runner = entry.load(require=False)
return runner()
[docs] def cmdline(self, args=()):
"""The commandline to run this environment.
:keyword args: Additional arguments to be passed to the application being invoked by the
environment.
"""
cmds = [self._interpreter.binary]
cmds.append(self._pex)
cmds.extend(args)
return cmds
[docs] def run(self, args=(), with_chroot=False, blocking=True, setsid=False, **kwargs):
"""Run the PythonEnvironment in an interpreter in a subprocess.
:keyword args: Additional arguments to be passed to the application being invoked by the
environment.
:keyword with_chroot: Run with cwd set to the environment's working directory.
:keyword blocking: If true, return the return code of the subprocess.
If false, return the Popen object of the invoked subprocess.
:keyword setsid: If true, run the PEX in a separate operating system session.
Remaining keyword arguments are passed directly to subprocess.Popen.
"""
self.clean_environment()
cmdline = self.cmdline(args)
TRACER.log('PEX.run invoking %s' % ' '.join(cmdline))
process = Executor.open_process(cmdline,
cwd=self._pex if with_chroot else os.getcwd(),
preexec_fn=os.setsid if setsid else None,
stdin=kwargs.pop('stdin', None),
stdout=kwargs.pop('stdout', None),
stderr=kwargs.pop('stderr', None),
**kwargs)
return process.wait() if blocking else process
def _do_entry_point_verification(self):
entry_point = self._pex_info.entry_point
ep_split = entry_point.split(':')
# a.b.c:m ->
# ep_module = 'a.b.c'
# ep_method = 'm'
# Only module is specified
if len(ep_split) == 1:
ep_module = ep_split[0]
import_statement = 'import {}'.format(ep_module)
elif len(ep_split) == 2:
ep_module = ep_split[0]
ep_method = ep_split[1]
import_statement = 'from {} import {}'.format(ep_module, ep_method)
else:
raise self.InvalidEntryPoint("Failed to parse: `{}`".format(entry_point))
with named_temporary_file() as fp:
fp.write(import_statement.encode('utf-8'))
fp.close()
retcode = self.run([fp.name], env={'PEX_INTERPRETER': '1'})
if retcode != 0:
raise self.InvalidEntryPoint('Invalid entry point: `{}`\n'
'Entry point verification failed: `{}`'
.format(entry_point, import_statement))