Source code for astropy.utils.xml.check
# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
A collection of functions for checking various XML-related strings for
standards compliance.
"""
import re
import urllib.parse
[docs]
def check_id(ID):
    """
    Returns `True` if *ID* is a valid XML ID.
    """
    return re.match(r"^[A-Za-z_][A-Za-z0-9_\.\-]*$", ID) is not None 
[docs]
def fix_id(ID):
    """
    Given an arbitrary string, create one that can be used as an xml
    id.  This is rather simplistic at the moment, since it just
    replaces non-valid characters with underscores.
    """
    if re.match(r"^[A-Za-z_][A-Za-z0-9_\.\-]*$", ID):
        return ID
    if len(ID):
        corrected = ID
        if not len(corrected) or re.match("^[^A-Za-z_]$", corrected[0]):
            corrected = "_" + corrected
        corrected = re.sub(r"[^A-Za-z_]", "_", corrected[0]) + re.sub(
            r"[^A-Za-z0-9_\.\-]", "_", corrected[1:]
        )
        return corrected
    return "" 
_token_regex = r"(?![\r\l\t ])[^\r\l\t]*(?![\r\l\t ])"
[docs]
def check_token(token):
    """
    Returns `True` if *token* is a valid XML token, as defined by XML
    Schema Part 2.
    """
    return (
        token == ""
        or re.match(r"[^\r\n\t ]?([^\r\n\t ]| [^\r\n\t ])*[^\r\n\t ]?$", token)
        is not None
    ) 
[docs]
def check_mime_content_type(content_type):
    """
    Returns `True` if *content_type* is a valid MIME content type
    (syntactically at least), as defined by RFC 2045.
    """
    ctrls = "".join(chr(x) for x in range(0x20))
    token_regex = f'[^()<>@,;:\\"/[\\]?= {ctrls}\x7f]+'
    return (
        re.match(rf"(?P<type>{token_regex})/(?P<subtype>{token_regex})$", content_type)
        is not None
    ) 
[docs]
def check_anyuri(uri):
    """
    Returns `True` if *uri* is a valid URI as defined in RFC 2396.
    """
    if (
        re.match(
            (
                r"(([a-zA-Z][0-9a-zA-Z+\-\.]*:)?/{0,2}[0-9a-zA-Z;"
                r"/?:@&=+$\.\-_!~*'()%]+)?(#[0-9a-zA-Z;/?:@&=+$\.\-_!~*'()%]+)?"
            ),
            uri,
        )
        is None
    ):
        return False
    try:
        urllib.parse.urlparse(uri)
    except Exception:
        return False
    return True