""" ``Source`` objects represent a single <Source> tag in
``sources.xml``. Note that a single Source tag can itself describe
multiple repositories (if it uses the "url" attribute instead of
"rawurl"), and so can the ``Source`` object. This can be the source
(har har) of some confusion. See
:func:`Bcfg2.Server.Plugins.Packages.Collection.Collection.sourcelist`
for the proper way to get all repos from a ``Source`` object.
Source objects are aggregated into
:class:`Bcfg2.Server.Plugins.Packages.Collection.Collection`
objects, which are actually called by
:class:`Bcfg2.Server.Plugins.Packages.Packages`. This way a more
advanced subclass can query repositories in aggregate rather than
individually, which may give faster or more accurate results.
The base ``Source`` object must be subclassed to handle each
repository type. How you subclass ``Source`` will depend on how you
subclassed
:class:`Bcfg2.Server.Plugins.Packages.Collection.Collection`; see
:mod:`Bcfg2.Server.Plugins.Packages.Collection` for more details on
different methods for doing that.
If you are using the stock (or a near-stock)
:class:`Bcfg2.Server.Plugins.Packages.Collection.Collection` object,
then you will need to implement the following methods and attributes
in your ``Source`` subclass:
* :func:`Source.urls`
* :func:`Source.read_files`
Additionally, you may want to consider overriding the following
methods and attributes:
* :func:`Source.is_virtual_package`
* :func:`Source.get_group`
* :attr:`Source.unknown_filter`
* :attr:`Source.load_state`
* :attr:`Source.save_state`
For an example of this kind of ``Source`` object, see
:mod:`Bcfg2.Server.Plugins.Packages.Apt`.
If you are overriding the ``Collection`` object in more depth, then
you have more leeway in what you might want to override or implement
in your ``Source`` subclass. For an example of this kind of
``Source`` object, see :mod:`Bcfg2.Server.Plugins.Packages.Yum`.
"""
import os
import re
import sys
from Bcfg2.Logger import Debuggable
from Bcfg2.Compat import HTTPError, HTTPBasicAuthHandler, \
HTTPPasswordMgrWithDefaultRealm, install_opener, build_opener, urlopen, \
cPickle, md5
from Bcfg2.Server.Statistics import track_statistics
[docs]def fetch_url(url):
""" Return the content of the given URL.
:param url: The URL to fetch content from.
:type url: string
:raises: ValueError - Malformed URL
:raises: URLError - Failure fetching URL
:returns: string - the content of the page at the given URL """
if '@' in url:
mobj = re.match(r'(\w+://)([^:]+):([^@]+)@(.*)$', url)
if not mobj:
raise ValueError("Invalid URL")
user = mobj.group(2)
passwd = mobj.group(3)
url = mobj.group(1) + mobj.group(4)
auth = HTTPBasicAuthHandler(HTTPPasswordMgrWithDefaultRealm())
auth.add_password(None, url, user, passwd)
install_opener(build_opener(auth))
return urlopen(url).read()
[docs]class SourceInitError(Exception):
""" Raised when a :class:`Source` object fails instantiation. """
pass
#: A regular expression used to determine the base name of a repo from
#: its URL. This is used when generating repo configs and by
#: :func:`Source.get_repo_name`. It handles `Pulp
#: <http://www.pulpproject.org/>`_ and `mrepo
#: <http://dag.wieers.com/home-made/mrepo/>`_ repositories specially,
#: and otherwise grabs the last component of the URL (as delimited by
#: slashes).
REPO_RE = re.compile(r'(?:pulp/repos/|/RPMS\.|/)([^/]+)/?$')
[docs]class Source(Debuggable): # pylint: disable=R0902
""" ``Source`` objects represent a single <Source> tag in
``sources.xml``. Note that a single Source tag can itself
describe multiple repositories (if it uses the "url" attribute
instead of "rawurl"), and so can the ``Source`` object.
Note that a number of the attributes of this object may be more or
less specific to one backend (e.g., :attr:`essentialpkgs`,
:attr:`recommended`, :attr:`gpgkeys`, but they are included in the
superclass to make the parsing of sources from XML more
consistent, and to make it trivial for other backends to support
those features.
"""
#: The Package type handled by this Source class. The ``type``
#: attribute of Package entries will be set to the value ``ptype``
#: when they are handled by :mod:`Bcfg2.Server.Plugins.Packages`.
ptype = None
def __init__(self, basepath, xsource): # pylint: disable=R0912
"""
:param basepath: The base filesystem path under which cache
data for this source should be stored
:type basepath: string
:param xsource: The XML tag that describes this source
:type source: lxml.etree._Element
:raises: :class:`Bcfg2.Server.Plugins.Packages.Source.SourceInitError`
"""
Debuggable.__init__(self)
#: The base filesystem path under which cache data for this
#: source should be stored
self.basepath = basepath
#: The XML tag that describes this source
self.xsource = xsource
#: A set of package names that are deemed "essential" by this
#: source
self.essentialpkgs = set()
#: A list of the text of all 'Component' attributes of this
#: source from XML
self.components = []
#: A list of the arches supported by this source
self.arches = []
#: A list of the the names of packages that are blacklisted
#: from this source
self.blacklist = []
#: A list of the the names of packages that are whitelisted in
#: this source
self.whitelist = []
#: Whether or not to include deb-src lines in the generated APT
#: configuration
self.debsrc = False
#: A dict of repository options that will be included in the
#: configuration generated on the server side (if such is
#: applicable; most backends do not generate any sort of
#: repository configuration on the Bcfg2 server)
self.server_options = dict()
#: A dict of repository options that will be included in the
#: configuration generated for the client (if that is
#: supported by the backend)
self.client_options = dict()
#: A list of URLs to GPG keys that apply to this source
self.gpgkeys = []
#: Whether or not to include essential packages from this source
self.essential = True
#: Whether or not to include recommended packages from this source
self.recommended = False
#: The "rawurl" attribute from :attr:`xsource`, if applicable.
#: A trailing slash is automatically appended to this if there
#: wasn't one already present.
self.rawurl = None
#: The "url" attribute from :attr:`xsource`, if applicable. A
#: trailing slash is automatically appended to this if there
#: wasn't one already present.
self.url = None
#: The "version" attribute from :attr:`xsource`
self.version = None
#: The "name" attribute from :attr:`xsource`
self.name = None
#: A list of predicates that are used to determine if this
#: source applies to a given
#: :class:`Bcfg2.Server.Plugins.Metadata.ClientMetadata`
#: object.
self.conditions = []
#: Formerly, :ref:`server-plugins-generators-packages` only
#: supported applying package sources to groups; that is, they
#: could not be assigned by more complicated logic like
#: per-client repositories and group or client negation. This
#: attribute attempts to provide for some limited backwards
#: compat with older code that relies on this.
self.groups = []
#: A set of all package names in this source. This will not
#: necessarily be populated, particularly by backends that
#: reimplement large portions of
#: :class:`Bcfg2.Server.Plugins.Packages.Collection.Collection`
self.pkgnames = set()
#: A dict of ``<package name>`` -> ``<list of dependencies>``.
#: This will not necessarily be populated, particularly by
#: backends that reimplement large portions of
#: :class:`Bcfg2.Server.Plugins.Packages.Collection.Collection`
self.deps = dict()
#: A dict of ``<package name>`` -> ``<list of provided
#: symbols>``. This will not necessarily be populated,
#: particularly by backends that reimplement large portions of
#: :class:`Bcfg2.Server.Plugins.Packages.Collection.Collection`
self.provides = dict()
#: A dict of ``<package name>`` -> ``<list of recommended
#: symbols>``. This will not necessarily be populated.
self.recommends = dict()
self._init_attributes(xsource)
#: The file (or directory) used for this source's cache data
self.cachefile = os.path.join(self.basepath,
"cache-%s" % self.cachekey)
if not self.rawurl:
baseurl = self.url + "%(version)s/%(component)s/%(arch)s/"
else:
baseurl = self.rawurl
#: A list of dicts, each of which describes the URL to one
#: repository contained in this source. Each dict contains
#: the following keys:
#:
#: * ``version``: The version of the repo (``None`` for
#: ``rawurl`` repos)
#: * ``component``: The component use to form this URL
#: (``None`` for ``rawurl`` repos)
#: * ``arch``: The architecture of this repo
#: * ``baseurl``: Either the ``rawurl`` attribute, or the
#: format string built from the ``url`` attribute
#: * ``url``: The actual URL to the repository
self.url_map = []
for arch in self.arches:
if self.url:
usettings = [dict(version=self.version, component=comp,
arch=arch, debsrc=self.debsrc)
for comp in self.components]
else: # rawurl given
usettings = [dict(version=self.version, component=None,
arch=arch, debsrc=self.debsrc)]
for setting in usettings:
if not self.rawurl:
setting['baseurl'] = self.url
else:
setting['baseurl'] = self.rawurl
setting['url'] = baseurl % setting
setting['name'] = self.get_repo_name(setting)
self.url_map.extend(usettings)
def _init_attributes(self, xsource):
"""
This functions evaluates the Source tag and parses all
attributes. Override this function in a sub class to
parse specific attributes. Do not use ``__init__`` because
``Source.__init__`` may call other functions that already
need this specific fields. This functions is called before
any other function.
:param xsource: The XML tag that describes this source
:type source: lxml.etree._Element
"""
self.components = [item.text for item in xsource.findall('Component')]
self.arches = [item.text for item in xsource.findall('Arch')]
self.blacklist = [item.text for item in xsource.findall('Blacklist')]
self.whitelist = [item.text for item in xsource.findall('Whitelist')]
self.debsrc = xsource.get('debsrc', 'false') == 'true'
opts = xsource.findall("Options")
for el in opts:
repoopts = dict([(k, v)
for k, v in el.attrib.items()
if k != "clientonly" and k != "serveronly"])
if el.get("clientonly", "false").lower() == "false":
self.server_options.update(repoopts)
if el.get("serveronly", "false").lower() == "false":
self.client_options.update(repoopts)
self.gpgkeys = [el.text for el in xsource.findall("GPGKey")]
self.essential = xsource.get('essential', 'true').lower() == 'true'
self.recommended = xsource.get('recommended',
'false').lower() == 'true'
self.rawurl = xsource.get('rawurl', '')
if self.rawurl and not self.rawurl.endswith("/"):
self.rawurl += "/"
self.url = xsource.get('url', '')
if self.url and not self.url.endswith("/"):
self.url += "/"
self.version = xsource.get('version', '')
self.name = xsource.get('name', None)
for el in xsource.iterancestors():
if el.tag == "Group":
if el.get("negate", "false").lower() == "true":
self.conditions.append(lambda m, el=el:
el.get("name") not in m.groups)
else:
self.groups.append(el.get("name"))
self.conditions.append(lambda m, el=el:
el.get("name") in m.groups)
elif el.tag == "Client":
if el.get("negate", "false").lower() == "true":
self.conditions.append(lambda m, el=el:
el.get("name") != m.hostname)
else:
self.conditions.append(lambda m, el=el:
el.get("name") == m.hostname)
@property
def cachekey(self):
""" A unique key for this source that will be used to generate
:attr:`cachefile` and other cache paths """
return md5(cPickle.dumps([self.version, self.components, self.url,
self.rawurl, self.arches])).hexdigest()
[docs] def get_relevant_groups(self, metadata):
""" Get all groups that might be relevant to determining which
sources apply to this collection's client.
:return: list of strings - group names
"""
return sorted(list(set([g for g in metadata.groups
if (g in self.groups or
g in self.arches)])))
[docs] def load_state(self):
""" Load saved state from :attr:`cachefile`. If caching and
state is handled by the package library, then this function
does not need to be implemented.
:raises: OSError - If the saved data cannot be read
:raises: cPickle.UnpicklingError - If the saved data is corrupt """
data = open(self.cachefile, 'rb')
(self.pkgnames, self.deps, self.provides,
self.essentialpkgs, self.recommends) = cPickle.load(data)
[docs] def save_state(self):
""" Save state to :attr:`cachefile`. If caching and
state is handled by the package library, then this function
does not need to be implemented. """
cache = open(self.cachefile, 'wb')
cPickle.dump((self.pkgnames, self.deps, self.provides,
self.essentialpkgs, self.recommends), cache, 2)
cache.close()
[docs] @track_statistics()
def setup_data(self, force_update=False):
"""Perform all data fetching and setup tasks.
For most backends, this involves downloading all metadata from
the repository, parsing it, and caching the parsed data
locally. The order of operations is:
#. Call :func:`load_state` to try to load data from the local
cache.
#. If that fails, call :func:`read_files` to read and parse
the locally downloaded metadata files.
#. If that fails, call :func:`update` to fetch the metadata,
then :func:`read_files` to parse it.
Obviously with a backend that leverages repo access libraries
to avoid downloading all metadata, many of the functions
called by ``setup_data`` can be no-ops (or nearly so).
:param force_update: Ignore all locally cached and downloaded
data and fetch the metadata anew from the
upstream repository.
:type force_update: bool
"""
# there are a bunch of wildcard except statements here,
# because the various functions called herein (``load_state``,
# ``read_files``, ``update``) are defined entirely by the
# Packages plugins that implement them.
#
# TODO: we should define an exception subclass that each of
# these functions can raise when an *expected* error condition
# is encountered.
#
# pylint: disable=W0702
if not force_update:
if os.path.exists(self.cachefile):
try:
self.load_state()
except (OSError, cPickle.UnpicklingError):
err = sys.exc_info()[1]
self.logger.error("Packages: Cachefile %s load failed: %s"
% (self.cachefile, err))
self.logger.error("Falling back to file read")
try:
self.read_files()
except:
err = sys.exc_info()[1]
self.logger.error("Packages: File read failed: %s" %
err)
self.logger.error("Falling back to file download")
force_update = True
else:
force_update = True
if force_update:
try:
self.update()
self.read_files()
except:
err = sys.exc_info()[1]
self.logger.error("Packages: Failed to load data for %s: %s" %
(self, err))
self.logger.error("Some Packages will be missing")
# pylint: enable=W0702
[docs] def get_repo_name(self, url_map):
""" Try to find a sensible name for a repository. Since
``sources.xml`` doesn't provide for repository names, we have
to try to guess at the names when generating config files or
doing other operations that require repository names. This
function tries several approaches:
#. First, if the source element containts a ``name`` attribute,
use that as the name.
#. If the map contains a ``component`` key, use that as the
name.
#. If not, then try to match the repository URL against
:attr:`Bcfg2.Server.Plugins.Packages.Source.REPO_RE`. If
that succeeds, use the first matched group; additionally,
if the Source tag that describes this repo is contained in
a ``<Group>`` tag, prepend that to the name.
#. If :attr:`Bcfg2.Server.Plugins.Packages.Source.REPO_RE`
does not match the repository, and the Source tag that
describes this repo is contained in a ``<Group>`` tag, use
the name of the group.
#. Failing that, use the full URL to this repository, with the
protocol and trailing slash stripped off if possible.
Once that is done, all characters disallowed in yum source
names are replaced by dashes. See below for the exact regex.
The yum rules are used here because they are so restrictive.
``get_repo_name`` is **not** guaranteed to return a unique
name. If you require a unique name, then you will need to
generate all repo names and make them unique through the
approach of your choice, e.g., appending numbers to non-unique
repository names. See
:func:`Bcfg2.Server.Plugins.Packages.Yum.Source.get_repo_name`
for an example.
:param url_map: A single :attr:`url_map` dict, i.e., any
single element of :attr:`url_map`.
:type url_map: dict
:returns: string - the name of the repository.
"""
if self.name:
return self.name
if url_map['component']:
rname = url_map['component']
else:
match = REPO_RE.search(url_map['url'])
if match:
rname = match.group(1)
if self.groups:
rname = "%s-%s" % (self.groups[0], rname)
elif self.groups:
rname = self.groups[0]
else:
# a global source with no reasonable name. Try to
# strip off the protocol and trailing slash.
match = re.search(r'^[A-z]://(.*?)/?', url_map['url'])
if match:
rname = match.group(1)
else:
# what kind of crazy url is this? I give up!
# just use the full url and let the regex below
# make it even uglier.
rname = url_map['url']
# see yum/__init__.py in the yum source, lines 441-449, for
# the source of this regex. yum doesn't like anything but
# string.ascii_letters, string.digits, and [-_.:]. There
# doesn't seem to be a reason for this, because yum.
return re.sub(r'[^A-Za-z0-9-_.:]', '-', rname)
def __repr__(self):
if self.rawurl:
return "%s at %s" % (self.__class__.__name__, self.rawurl)
elif self.url:
return "%s at %s" % (self.__class__.__name__, self.url)
else:
return self.__class__.__name__
@property
def urls(self):
""" A list of URLs to the base metadata file for each
repository described by this source. """
return []
@property
def files(self):
""" A list of files stored in the local cache by this backend.
"""
return [self.escape_url(url) for url in self.urls]
[docs] def get_vpkgs(self, metadata):
""" Get a list of all virtual packages provided by all sources.
:returns: list of strings
"""
agroups = ['global'] + [a for a in self.arches
if a in metadata.groups]
vdict = dict()
for agrp in agroups:
if agrp not in self.provides:
self.logger.warning("%s provides no packages for %s" %
(self, agrp))
continue
for key, value in list(self.provides[agrp].items()):
if key not in vdict:
vdict[key] = set(value)
else:
vdict[key].update(value)
return vdict
[docs] def is_virtual_package(self, metadata, package): # pylint: disable=W0613
""" Return True if a name is a virtual package (i.e., is a
symbol provided by a real package), False otherwise.
:param package: The name of the symbol, but see :ref:`pkg-objects`
:type package: string
:returns: bool
"""
return False
[docs] def escape_url(self, url):
""" Given a URL to a repository metadata file, return the full
path to a file suitable for storing that file locally. This
is acheived by replacing all forward slashes in the URL with
``@``.
:param url: The URL to escape
:type url: string
:returns: string
"""
return os.path.join(self.basepath, url.replace('/', '@'))
[docs] def read_files(self):
""" Read and parse locally downloaded metadata files and
populates
:attr:`Bcfg2.Server.Plugins.Packages.Source.Source.pkgnames`. Should
call
:func:`Bcfg2.Server.Plugins.Packages.Source.Source.process_files`
as its final step."""
pass
[docs] def process_files(self, dependencies, # pylint: disable=R0912,W0102
provides, recommends=dict()):
""" Given dicts of depends and provides generated by
:func:`read_files`, this generates :attr:`deps` and
:attr:`provides` and calls :func:`save_state` to save the
cached data to disk.
All arguments are dicts of dicts of lists. Keys are the
arches of packages contained in this source; values are dicts
whose keys are package names and values are lists of either
dependencies for each package the symbols provided by each
package.
:param dependencies: A dict of dependencies found in the
metadata for this source.
:type dependencies: dict; see above.
:param provides: A dict of symbols provided by packages in
this repository.
:type provides: dict; see above.
:param recommends: A dict of recommended dependencies
found for this source.
:type recommends: dict; see above.
"""
self.deps['global'] = dict()
self.recommends['global'] = dict()
self.provides['global'] = dict()
for barch in dependencies:
self.deps[barch] = dict()
self.recommends[barch] = dict()
self.provides[barch] = dict()
for pkgname in self.pkgnames:
pset = set()
rset = set()
for barch in dependencies:
if pkgname not in dependencies[barch]:
dependencies[barch][pkgname] = []
pset.add(tuple(dependencies[barch][pkgname]))
if len(pset) == 1:
self.deps['global'][pkgname] = pset.pop()
else:
for barch in dependencies:
self.deps[barch][pkgname] = dependencies[barch][pkgname]
for barch in recommends:
if pkgname not in recommends[barch]:
recommends[barch][pkgname] = []
rset.add(tuple(recommends[barch][pkgname]))
if len(rset) == 1:
self.recommends['global'][pkgname] = rset.pop()
else:
for barch in recommends:
self.recommends[barch][pkgname] = \
recommends[barch][pkgname]
provided = set()
for bprovided in list(provides.values()):
provided.update(set(bprovided))
for prov in provided:
prset = set()
for barch in provides:
if prov not in provides[barch]:
continue
prset.add(tuple(provides[barch].get(prov, ())))
if len(prset) == 1:
self.provides['global'][prov] = prset.pop()
else:
for barch in provides:
self.provides[barch][prov] = provides[barch].get(prov, ())
self.save_state()
[docs] def unknown_filter(self, package):
""" A predicate that is used by :func:`filter_unknown` to
filter packages from the results of
:func:`Bcfg2.Server.Plugins.Packages.Collection.Collection.complete`
that should not be shown to the end user (i.e., that are not
truly unknown, but are rather packaging system artifacts). By
default, excludes any package whose name starts with "choice"
:param package: The name of a package that was unknown to the
backend
:type package: string
:returns: bool
"""
return package.startswith("choice")
[docs] def filter_unknown(self, unknown):
""" After
:func:`Bcfg2.Server.Plugins.Packages.Collection.Collection.complete`,
filter out packages that appear in the list of unknown
packages but should not be presented to the user.
:attr:`unknown_filter` is called to assess whether or not a
package is expected to be unknown.
:param unknown: A set of unknown packages. The set should be
modified in place.
:type unknown: set of strings
"""
unknown.difference_update(set([u for u in unknown
if self.unknown_filter(u)]))
[docs] def update(self):
""" Download metadata from the upstream repository and cache
it locally.
:raises: ValueError - If any URL in :attr:`urls` is malformed
:raises: OSError - If there is an error writing the local
cache
:raises: HTTPError - If there is an error fetching the remote
data
"""
for url in self.urls:
self.logger.info("Packages: Updating %s" % url)
fname = self.escape_url(url)
try:
open(fname, 'wb').write(fetch_url(url))
except ValueError:
self.logger.error("Packages: Bad url string %s" % url)
raise
except OSError:
err = sys.exc_info()[1]
self.logger.error("Packages: Could not write data from %s to "
"local cache at %s: %s" % (url, fname, err))
raise
except HTTPError:
err = sys.exc_info()[1]
self.logger.error("Packages: Failed to fetch url %s. HTTP "
"response code=%s" % (url, err.code))
raise
[docs] def applies(self, metadata):
""" Return true if this source applies to the given client,
i.e., the client is in all necessary groups.
:param metadata: The client metadata to check to see if this
source applies
:type metadata: Bcfg2.Server.Plugins.Metadata.ClientMetadata
:returns: bool
"""
# check arch groups
if not self.arch_groups_match(metadata):
return False
# check Group/Client tags from sources.xml
for condition in self.conditions:
if not condition(metadata):
return False
return True
[docs] def get_arches(self, metadata):
""" Get a list of architectures that the given client has and
for which this source provides packages for. The return value
will always include ``global``.
:param metadata: The client metadata to get matching
architectures for
:type metadata: Bcfg2.Server.Plugins.Metadata.ClientMetadata
:returns: list of strings
"""
return ['global'] + [a for a in self.arches if a in metadata.groups]
[docs] def get_deps(self, metadata, package, recommended=None):
""" Get a list of the dependencies of the given package.
:param package: The name of the symbol
:type package: string
:returns: list of strings
"""
recs = []
if ((recommended is None and self.recommended) or
(recommended and recommended.lower() == 'true')):
for arch in self.get_arches(metadata):
if package in self.recommends[arch]:
recs.extend(self.recommends[arch][package])
for arch in self.get_arches(metadata):
if package in self.deps[arch]:
recs.extend(self.deps[arch][package])
return recs
[docs] def get_provides(self, metadata, package):
""" Get a list of all symbols provided by the given package.
:param package: The name of the package
:type package: string
:returns: list of strings
"""
for arch in self.get_arches(metadata):
if package in self.provides[arch]:
return self.provides[arch][package]
return []
[docs] def is_package(self, metadata, package): # pylint: disable=W0613
""" Return True if a package is a package, False otherwise.
:param package: The name of the package
:type package: string
:returns: bool
"""
return (package in self.pkgnames and
package not in self.blacklist and
(len(self.whitelist) == 0 or package in self.whitelist))
[docs] def get_group(self, metadata, group, ptype=None): # pylint: disable=W0613
""" Get the list of packages of the given type in a package
group.
:param group: The name of the group to query
:type group: string
:param ptype: The type of packages to get, for backends that
support multiple package types in package groups
(e.g., "recommended," "optional," etc.)
:type ptype: string
:returns: list of strings - package names
"""
return []
[docs] def arch_groups_match(self, metadata):
""" Returns True if the client is in an arch group that
matches the arch of this source.
:returns: bool
"""
for arch in self.arches:
if arch in metadata.groups:
return True
return False