Collection content loading (#52194)

* basic plugin loading working (with many hacks) * task collections working * play/block-level collection module/action working * implement PEP302 loader * implicit package support (no need for __init.py__ in collections) * provides future options for secure loading of content that shouldn't execute inside controller (eg, actively ignore __init__.py on content/module paths) * provide hook for synthetic collection setup (eg ansible.core pseudo-collection for specifying built-in plugins without legacy path, etc) * synthetic package support * ansible.core.plugins mapping works, others don't * synthetic collections working for modules/actions * fix direct-load legacy * change base package name to ansible_collections * note * collection role loading * expand paths from installed content root vars * feature complete? * rename ansible.core to ansible.builtin * and various sanity fixes * sanity tweaks * unittest fixes * less grabby error handler on has_plugin * probably need to replace with a or harden callers * fix win_ping test * disable module test with explicit file extension; might be able to support in some scenarios, but can't see any other tests that verify that behavior... * fix unicode conversion issues on py2 * attempt to keep things working-ish on py2.6 * python2.6 test fun round 2 * rename dirs/configs to "collections" * add wrapper dir for content-adjacent * fix pythoncheck to use localhost * unicode tweaks, native/bytes string prefixing * rename COLLECTION_PATHS to COLLECTIONS_PATHS * switch to pathspec * path handling cleanup * change expensive `all` back to or chain * unused import cleanup * quotes tweak * use wrapped iter/len in Jinja proxy * var name expansion * comment seemingly overcomplicated playbook_paths resolution * drop unnecessary conditional nesting * eliminate extraneous local * zap superfluous validation function * use slice for rolespec NS assembly * misc naming/unicode fixes * collection callback loader asks if valid FQ name instead of just '.' * switch collection role resolution behavior to be internally `text` as much as possible * misc fixmes * to_native in exception constructor * (slightly) detangle tuple accumulation mess in module_utils __init__ walker * more misc fixmes * tighten up action dispatch, add unqualified action test * rename Collection mixin to CollectionSearch * (attempt to) avoid potential confusion/conflict with builtin collections, etc * stale fixmes * tighten up pluginloader collections determination * sanity test fixes * ditch regex escape * clarify comment * update default collections paths config entry * use PATH format instead of list * skip integration tests on Python 2.6 ci_complete
2025-07-13 08:30:50 -07:00 · 2019-03-28 10:41:39 -07:00 · 2019-03-28 10:41:39 -07:00 · f86345f777
commit f86345f777
parent 5173548a9f
56 changed files with 1512 additions and 109 deletions
--- a/lib/ansible/executor/module_common.py
+++ b/lib/ansible/executor/module_common.py
@ -29,6 +29,7 @@ import os
 import shlex
 import zipfile
 import re
+import pkgutil
 from io import BytesIO

 from ansible.release import __version__, __author__
@ -45,6 +46,18 @@ from ansible.executor import action_write_locks

 from ansible.utils.display import Display

+# HACK: keep Python 2.6 controller tests happy in CI until they're properly split
+try:
+    from importlib import import_module
+except ImportError:
+    import_module = __import__
+
+# if we're on a Python that doesn't have FNFError, redefine it as IOError (since that's what we'll see)
+try:
+    FileNotFoundError
+except NameError:
+    FileNotFoundError = IOError
+
 display = Display()

 REPLACER = b"#<<INCLUDE_ANSIBLE_MODULE_COMMON>>"
@ -429,10 +442,14 @@ class ModuleDepFinder(ast.NodeVisitor):

    def visit_Import(self, node):
        # import ansible.module_utils.MODLIB[.MODLIBn] [as asname]
-        for alias in (a for a in node.names if a.name.startswith('ansible.module_utils.')):
-            py_mod = alias.name[self.IMPORT_PREFIX_SIZE:]
-            py_mod = tuple(py_mod.split('.'))
-            self.submodules.add(py_mod)
+        for alias in node.names:
+            if alias.name.startswith('ansible.module_utils.'):
+                py_mod = alias.name[self.IMPORT_PREFIX_SIZE:]
+                py_mod = tuple(py_mod.split('.'))
+                self.submodules.add(py_mod)
+            elif alias.name.startswith('ansible_collections.'):
+                # keep 'ansible_collections.' as a sentinel prefix to trigger collection-loaded MU path
+                self.submodules.add(tuple(alias.name.split('.')))
        self.generic_visit(node)

    def visit_ImportFrom(self, node):
@ -453,6 +470,10 @@ class ModuleDepFinder(ast.NodeVisitor):
                # from ansible.module_utils import MODLIB [,MODLIB2] [as asname]
                for alias in node.names:
                    self.submodules.add((alias.name,))
+
+        elif node.module.startswith('ansible_collections.'):
+            # TODO: finish out the subpackage et al cases
+            self.submodules.add(tuple(node.module.split('.')))
        self.generic_visit(node)


@ -555,6 +576,20 @@ def recursive_finder(name, data, py_module_names, py_module_cache, zf):
            module_info = imp.find_module('_six', [os.path.join(p, 'six') for p in module_utils_paths])
            py_module_name = ('six', '_six')
            idx = 0
+        elif py_module_name[0] == 'ansible_collections':
+            # FIXME: replicate module name resolution like below for granular imports
+            # this is a collection-hosted MU; look it up with get_data
+            package_name = '.'.join(py_module_name[:-1])
+            resource_name = py_module_name[-1] + '.py'
+            try:
+                # FIXME: need this in py2 for some reason TBD, but we shouldn't (get_data delegates to wrong loader without it)
+                pkg = import_module(package_name)
+                module_info = pkgutil.get_data(package_name, resource_name)
+            except FileNotFoundError:
+                # FIXME: implement package fallback code
+                raise AnsibleError('unable to load collection-hosted module_util {0}.{1}'.format(to_native(package_name),
+                                                                                                 to_native(resource_name)))
+            idx = 0
        else:
            # Check whether either the last or the second to last identifier is
            # a module name
@ -577,56 +612,78 @@ def recursive_finder(name, data, py_module_names, py_module_cache, zf):
                msg.append(py_module_name[-1])
            raise AnsibleError(' '.join(msg))

-        # Found a byte compiled file rather than source.  We cannot send byte
-        # compiled over the wire as the python version might be different.
-        # imp.find_module seems to prefer to return source packages so we just
-        # error out if imp.find_module returns byte compiled files (This is
-        # fragile as it depends on undocumented imp.find_module behaviour)
-        if module_info[2][2] not in (imp.PY_SOURCE, imp.PKG_DIRECTORY):
-            msg = ['Could not find python source for imported module support code for %s.  Looked for' % name]
+        if isinstance(module_info, bytes):  # collection-hosted, just the code
+            # HACK: maybe surface collection dirs in here and use existing find_module code?
+            normalized_name = py_module_name
+            normalized_data = module_info
+            normalized_path = os.path.join(*py_module_name)
+            py_module_cache[normalized_name] = (normalized_data, normalized_path)
+            normalized_modules.add(normalized_name)
+
+            # HACK: walk back up the package hierarchy to pick up package inits; this won't do the right thing
+            # for actual packages yet...
+            accumulated_pkg_name = []
+            for pkg in py_module_name[:-1]:
+                accumulated_pkg_name.append(pkg)  # we're accumulating this across iterations
+                normalized_name = tuple(accumulated_pkg_name[:] + ['__init__'])  # extra machinations to get a hashable type (list is not)
+                if normalized_name not in py_module_cache:
+                    normalized_path = os.path.join(*accumulated_pkg_name)
+                    # HACK: possibly preserve some of the actual package file contents; problematic for extend_paths and others though?
+                    normalized_data = ''
+                    py_module_cache[normalized_name] = (normalized_data, normalized_path)
+                    normalized_modules.add(normalized_name)
+
+        else:
+            # Found a byte compiled file rather than source.  We cannot send byte
+            # compiled over the wire as the python version might be different.
+            # imp.find_module seems to prefer to return source packages so we just
+            # error out if imp.find_module returns byte compiled files (This is
+            # fragile as it depends on undocumented imp.find_module behaviour)
+            if module_info[2][2] not in (imp.PY_SOURCE, imp.PKG_DIRECTORY):
+                msg = ['Could not find python source for imported module support code for %s.  Looked for' % name]
+                if idx == 2:
+                    msg.append('either %s.py or %s.py' % (py_module_name[-1], py_module_name[-2]))
+                else:
+                    msg.append(py_module_name[-1])
+                raise AnsibleError(' '.join(msg))
+
            if idx == 2:
-                msg.append('either %s.py or %s.py' % (py_module_name[-1], py_module_name[-2]))
-            else:
-                msg.append(py_module_name[-1])
-            raise AnsibleError(' '.join(msg))
+                # We've determined that the last portion was an identifier and
+                # thus, not part of the module name
+                py_module_name = py_module_name[:-1]

-        if idx == 2:
-            # We've determined that the last portion was an identifier and
-            # thus, not part of the module name
-            py_module_name = py_module_name[:-1]
+            # If not already processed then we've got work to do
+            # If not in the cache, then read the file into the cache
+            # We already have a file handle for the module open so it makes
+            # sense to read it now
+            if py_module_name not in py_module_cache:
+                if module_info[2][2] == imp.PKG_DIRECTORY:
+                    # Read the __init__.py instead of the module file as this is
+                    # a python package
+                    normalized_name = py_module_name + ('__init__',)
+                    if normalized_name not in py_module_names:
+                        normalized_path = os.path.join(module_info[1], '__init__.py')
+                        normalized_data = _slurp(normalized_path)
+                        py_module_cache[normalized_name] = (normalized_data, normalized_path)
+                        normalized_modules.add(normalized_name)
+                else:
+                    normalized_name = py_module_name
+                    if normalized_name not in py_module_names:
+                        normalized_path = module_info[1]
+                        normalized_data = module_info[0].read()
+                        module_info[0].close()
+                        py_module_cache[normalized_name] = (normalized_data, normalized_path)
+                        normalized_modules.add(normalized_name)

-        # If not already processed then we've got work to do
-        # If not in the cache, then read the file into the cache
-        # We already have a file handle for the module open so it makes
-        # sense to read it now
-        if py_module_name not in py_module_cache:
-            if module_info[2][2] == imp.PKG_DIRECTORY:
-                # Read the __init__.py instead of the module file as this is
-                # a python package
-                normalized_name = py_module_name + ('__init__',)
-                if normalized_name not in py_module_names:
-                    normalized_path = os.path.join(os.path.join(module_info[1], '__init__.py'))
-                    normalized_data = _slurp(normalized_path)
-                    py_module_cache[normalized_name] = (normalized_data, normalized_path)
-                    normalized_modules.add(normalized_name)
-            else:
-                normalized_name = py_module_name
-                if normalized_name not in py_module_names:
-                    normalized_path = module_info[1]
-                    normalized_data = module_info[0].read()
-                    module_info[0].close()
-                    py_module_cache[normalized_name] = (normalized_data, normalized_path)
-                    normalized_modules.add(normalized_name)
-
-            # Make sure that all the packages that this module is a part of
-            # are also added
-            for i in range(1, len(py_module_name)):
-                py_pkg_name = py_module_name[:-i] + ('__init__',)
-                if py_pkg_name not in py_module_names:
-                    pkg_dir_info = imp.find_module(py_pkg_name[-1],
-                                                   [os.path.join(p, *py_pkg_name[:-1]) for p in module_utils_paths])
-                    normalized_modules.add(py_pkg_name)
-                    py_module_cache[py_pkg_name] = (_slurp(pkg_dir_info[1]), pkg_dir_info[1])
+                # Make sure that all the packages that this module is a part of
+                # are also added
+                for i in range(1, len(py_module_name)):
+                    py_pkg_name = py_module_name[:-i] + ('__init__',)
+                    if py_pkg_name not in py_module_names:
+                        pkg_dir_info = imp.find_module(py_pkg_name[-1],
+                                                       [os.path.join(p, *py_pkg_name[:-1]) for p in module_utils_paths])
+                        normalized_modules.add(py_pkg_name)
+                        py_module_cache[py_pkg_name] = (_slurp(pkg_dir_info[1]), pkg_dir_info[1])

    # FIXME: Currently the AnsiBallZ wrapper monkeypatches module args into a global
    # variable in basic.py.  If a module doesn't import basic.py, then the AnsiBallZ wrapper will
@ -653,10 +710,16 @@ def recursive_finder(name, data, py_module_names, py_module_cache, zf):
    unprocessed_py_module_names = normalized_modules.difference(py_module_names)

    for py_module_name in unprocessed_py_module_names:
+        # HACK: this seems to work as a way to identify a collections-based import, but a stronger identifier would be better
+        if not py_module_cache[py_module_name][1].startswith('/'):
+            dir_prefix = ''
+        else:
+            dir_prefix = 'ansible/module_utils'
+
        py_module_path = os.path.join(*py_module_name)
        py_module_file_name = '%s.py' % py_module_path

-        zf.writestr(os.path.join("ansible/module_utils",
+        zf.writestr(os.path.join(dir_prefix,
                    py_module_file_name), py_module_cache[py_module_name][0])
        display.vvvvv("Using module_utils file %s" % py_module_cache[py_module_name][1])