Source code for astropy.io.fits.header

# Licensed under a 3-clause BSD style license - see PYFITS.rst

import collections
import copy
import itertools
import numbers
import os
import re
import warnings

from astropy.utils import isiterable
from astropy.utils.exceptions import AstropyUserWarning

from ._utils import parse_header
from .card import KEYWORD_LENGTH, UNDEFINED, Card, _pad
from .file import _File
from .util import (
    decode_ascii,
    encode_ascii,
    fileobj_closed,
    fileobj_is_binary,
    path_like,
)

BLOCK_SIZE = 2880  # the FITS block size

# This regular expression can match a *valid* END card which just consists of
# the string 'END' followed by all spaces, or an *invalid* end card which
# consists of END, followed by any character that is *not* a valid character
# for a valid FITS keyword (that is, this is not a keyword like 'ENDER' which
# starts with 'END' but is not 'END'), followed by any arbitrary bytes.  An
# invalid end card may also consist of just 'END' with no trailing bytes.
HEADER_END_RE = re.compile(
    encode_ascii(r"(?:(?P<valid>END {77}) *)|(?P<invalid>END$|END {0,76}[^A-Z0-9_-])")
)


# According to the FITS standard the only characters that may appear in a
# header record are the restricted ASCII chars from 0x20 through 0x7E.
VALID_HEADER_CHARS = set(map(chr, range(0x20, 0x7F)))
END_CARD = "END" + " " * 77


__doctest_skip__ = [
    "Header",
    "Header.comments",
    "Header.fromtextfile",
    "Header.totextfile",
    "Header.set",
    "Header.update",
]





collections.abc.MutableSequence.register(Header)
collections.abc.MutableMapping.register(Header)


class _DelayedHeader:
    """
    Descriptor used to create the Header object from the header string that
    was stored in HDU._header_str when parsing the file.
    """

    def __get__(self, obj, owner=None):
        try:
            return obj.__dict__["_header"]
        except KeyError:
            if obj._header_str is not None:
                hdr = Header.fromstring(obj._header_str)
                obj._header_str = None
            else:
                raise AttributeError(
                    "'{}' object has no attribute '_header'".format(
                        obj.__class__.__name__
                    )
                )

            obj.__dict__["_header"] = hdr
            return hdr

    def __set__(self, obj, val):
        obj.__dict__["_header"] = val

    def __delete__(self, obj):
        del obj.__dict__["_header"]


class _BasicHeaderCards:
    """
    This class allows to access cards with the _BasicHeader.cards attribute.

    This is needed because during the HDU class detection, some HDUs uses
    the .cards interface.  Cards cannot be modified here as the _BasicHeader
    object will be deleted once the HDU object is created.

    """

    def __init__(self, header):
        self.header = header

    def __getitem__(self, key):
        # .cards is a list of cards, so key here is an integer.
        # get the keyword name from its index.
        key = self.header._keys[key]
        # then we get the card from the _BasicHeader._cards list, or parse it
        # if needed.
        try:
            return self.header._cards[key]
        except KeyError:
            cardstr = self.header._raw_cards[key]
            card = Card.fromstring(cardstr)
            self.header._cards[key] = card
            return card


class _BasicHeader(collections.abc.Mapping):
    """This class provides a fast header parsing, without all the additional
    features of the Header class. Here only standard keywords are parsed, no
    support for CONTINUE, HIERARCH, COMMENT, HISTORY, or rvkc.

    The raw card images are stored and parsed only if needed. The idea is that
    to create the HDU objects, only a small subset of standard cards is needed.
    Once a card is parsed, which is deferred to the Card class, the Card object
    is kept in a cache. This is useful because a small subset of cards is used
    a lot in the HDU creation process (NAXIS, XTENSION, ...).

    """

    def __init__(self, cards):
        # dict of (keywords, card images)
        self._raw_cards = cards
        self._keys = list(cards.keys())
        # dict of (keyword, Card object) storing the parsed cards
        self._cards = {}
        # the _BasicHeaderCards object allows to access Card objects from
        # keyword indices
        self.cards = _BasicHeaderCards(self)

        self._modified = False

    def __getitem__(self, key):
        if isinstance(key, numbers.Integral):
            key = self._keys[key]

        try:
            return self._cards[key].value
        except KeyError:
            # parse the Card and store it
            cardstr = self._raw_cards[key]
            self._cards[key] = card = Card.fromstring(cardstr)
            return card.value

    def __len__(self):
        return len(self._raw_cards)

    def __iter__(self):
        return iter(self._raw_cards)

    def index(self, keyword):
        return self._keys.index(keyword)

    @property
    def data_size(self):
        """
        Return the size (in bytes) of the data portion following the `Header`.
        """
        return _hdr_data_size(self)

    @property
    def data_size_padded(self):
        """
        Return the size (in bytes) of the data portion following the `Header`
        including padding.
        """
        size = self.data_size
        return size + _pad_length(size)

    @classmethod
    def fromfile(cls, fileobj):
        """The main method to parse a FITS header from a file. The parsing is
        done with the parse_header function implemented in Cython."""

        close_file = False
        if isinstance(fileobj, str):
            fileobj = open(fileobj, "rb")
            close_file = True

        try:
            header_str, cards = parse_header(fileobj)
            _check_padding(header_str, BLOCK_SIZE, False)
            return header_str, cls(cards)
        finally:
            if close_file:
                fileobj.close()


class _CardAccessor:
    """
    This is a generic class for wrapping a Header in such a way that you can
    use the header's slice/filtering capabilities to return a subset of cards
    and do something with them.

    This is sort of the opposite notion of the old CardList class--whereas
    Header used to use CardList to get lists of cards, this uses Header to get
    lists of cards.
    """

    # TODO: Consider giving this dict/list methods like Header itself
    def __init__(self, header):
        self._header = header

    def __repr__(self):
        return "\n".join(repr(c) for c in self._header._cards)

    def __len__(self):
        return len(self._header._cards)

    def __iter__(self):
        return iter(self._header._cards)

    def __eq__(self, other):
        # If the `other` item is a scalar we will still treat it as equal if
        # this _CardAccessor only contains one item
        if not isiterable(other) or isinstance(other, str):
            if len(self) == 1:
                other = [other]
            else:
                return False

        for a, b in itertools.zip_longest(self, other):
            if a != b:
                return False
        else:
            return True

    def __ne__(self, other):
        return not (self == other)

    def __getitem__(self, item):
        if isinstance(item, slice) or self._header._haswildcard(item):
            return self.__class__(self._header[item])

        idx = self._header._cardindex(item)
        return self._header._cards[idx]

    def _setslice(self, item, value):
        """
        Helper for implementing __setitem__ on _CardAccessor subclasses; slices
        should always be handled in this same way.
        """

        if isinstance(item, slice) or self._header._haswildcard(item):
            if isinstance(item, slice):
                indices = range(*item.indices(len(self)))
            else:
                indices = self._header._wildcardmatch(item)
            if isinstance(value, str) or not isiterable(value):
                value = itertools.repeat(value, len(indices))
            for idx, val in zip(indices, value):
                self[idx] = val
            return True
        return False


class _HeaderComments(_CardAccessor):
    """
    A class used internally by the Header class for the Header.comments
    attribute access.

    This object can be used to display all the keyword comments in the Header,
    or look up the comments on specific keywords.  It allows all the same forms
    of keyword lookup as the Header class itself, but returns comments instead
    of values.
    """

    def __iter__(self):
        for card in self._header._cards:
            yield card.comment

    def __repr__(self):
        """Returns a simple list of all keywords and their comments."""

        keyword_length = KEYWORD_LENGTH
        for card in self._header._cards:
            keyword_length = max(keyword_length, len(card.keyword))
        return "\n".join(
            "{:>{len}}  {}".format(c.keyword, c.comment, len=keyword_length)
            for c in self._header._cards
        )

    def __getitem__(self, item):
        """
        Slices and filter strings return a new _HeaderComments containing the
        returned cards.  Otherwise the comment of a single card is returned.
        """

        item = super().__getitem__(item)
        if isinstance(item, _HeaderComments):
            # The item key was a slice
            return item
        return item.comment

    def __setitem__(self, item, comment):
        """
        Set/update the comment on specified card or cards.

        Slice/filter updates work similarly to how Header.__setitem__ works.
        """

        if self._header._set_slice(item, comment, self):
            return

        # In this case, key/index errors should be raised; don't update
        # comments of nonexistent cards
        idx = self._header._cardindex(item)
        value = self._header[idx]
        self._header[idx] = (value, comment)


class _HeaderCommentaryCards(_CardAccessor):
    """
    This is used to return a list-like sequence over all the values in the
    header for a given commentary keyword, such as HISTORY.
    """

    def __init__(self, header, keyword=""):
        super().__init__(header)
        self._keyword = keyword
        self._count = self._header.count(self._keyword)
        self._indices = slice(self._count).indices(self._count)

    # __len__ and __iter__ need to be overridden from the base class due to the
    # different approach this class has to take for slicing
    def __len__(self):
        return len(range(*self._indices))

    def __iter__(self):
        for idx in range(*self._indices):
            yield self._header[(self._keyword, idx)]

    def __repr__(self):
        return "\n".join(str(x) for x in self)

    def __getitem__(self, idx):
        if isinstance(idx, slice):
            n = self.__class__(self._header, self._keyword)
            n._indices = idx.indices(self._count)
            return n
        elif not isinstance(idx, numbers.Integral):
            raise ValueError(f"{self._keyword} index must be an integer")

        idx = list(range(*self._indices))[idx]
        return self._header[(self._keyword, idx)]

    def __setitem__(self, item, value):
        """
        Set the value of a specified commentary card or cards.

        Slice/filter updates work similarly to how Header.__setitem__ works.
        """

        if self._header._set_slice(item, value, self):
            return

        # In this case, key/index errors should be raised; don't update
        # comments of nonexistent cards
        self._header[(self._keyword, item)] = value


def _block_size(sep):
    """
    Determine the size of a FITS header block if a non-blank separator is used
    between cards.
    """

    return BLOCK_SIZE + (len(sep) * (BLOCK_SIZE // Card.length - 1))


def _pad_length(stringlen):
    """Bytes needed to pad the input stringlen to the next FITS block."""

    return (BLOCK_SIZE - (stringlen % BLOCK_SIZE)) % BLOCK_SIZE


def _check_padding(header_str, block_size, is_eof, check_block_size=True):
    # Strip any zero-padding (see ticket #106)
    if header_str and header_str[-1] == "\0":
        if is_eof and header_str.strip("\0") == "":
            # TODO: Pass this warning to validation framework
            warnings.warn(
                "Unexpected extra padding at the end of the file.  This "
                "padding may not be preserved when saving changes.",
                AstropyUserWarning,
            )
            raise EOFError()
        else:
            # Replace the illegal null bytes with spaces as required by
            # the FITS standard, and issue a nasty warning
            # TODO: Pass this warning to validation framework
            warnings.warn(
                "Header block contains null bytes instead of spaces for "
                "padding, and is not FITS-compliant. Nulls may be "
                "replaced with spaces upon writing.",
                AstropyUserWarning,
            )
            header_str.replace("\0", " ")

    if check_block_size and (len(header_str) % block_size) != 0:
        # This error message ignores the length of the separator for
        # now, but maybe it shouldn't?
        actual_len = len(header_str) - block_size + BLOCK_SIZE
        # TODO: Pass this error to validation framework
        raise ValueError(f"Header size is not multiple of {BLOCK_SIZE}: {actual_len}")


def _hdr_data_size(header):
    """Calculate the data size (in bytes) following the given `Header`"""
    size = 0
    naxis = header.get("NAXIS", 0)
    if naxis > 0:
        size = 1
        for idx in range(naxis):
            size = size * header["NAXIS" + str(idx + 1)]
        bitpix = header["BITPIX"]
        gcount = header.get("GCOUNT", 1)
        pcount = header.get("PCOUNT", 0)
        size = abs(bitpix) * gcount * (pcount + size) // 8
    return size