Source code for astropy.io.ascii.sextractor
# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""sextractor.py:
  Classes to read SExtractor table format.
Built on daophot.py:
:Copyright: Smithsonian Astrophysical Observatory (2011)
:Author: Tom Aldcroft (aldcroft@head.cfa.harvard.edu)
"""
import re
from . import core
class SExtractorHeader(core.BaseHeader):
    """Read the header from a file produced by SExtractor."""
    comment = r"^\s*#\s*\S\D.*"  # Find lines that don't have "# digit"
    def get_cols(self, lines):
        """
        Initialize the header Column objects from the table ``lines`` for a SExtractor
        header.  The SExtractor header is specialized so that we just copy the entire BaseHeader
        get_cols routine and modify as needed.
        Parameters
        ----------
        lines : list
            List of table lines
        """
        # This assumes that the columns are listed in order, one per line with a
        # header comment string of the format: "# 1 ID short description [unit]"
        # However, some may be missing and must be inferred from skipped column numbers
        columns = {}
        # E.g. '# 1 ID identification number' (no units) or '# 2 MAGERR magnitude of error [mag]'
        # Updated along with issue #4603, for more robust parsing of unit
        re_name_def = re.compile(
            r"""^\s* \# \s*             # possible whitespace around #
                (?P<colnumber> [0-9]+)\s+   # number of the column in table
                (?P<colname> [-\w]+)        # name of the column
                # column description, match any character until...
                (?:\s+(?P<coldescr> \w .+)
                # ...until [non-space][space][unit] or [not-right-bracket][end]
                (?:(?<!(\]))$|(?=(?:(?<=\S)\s+\[.+\]))))?
                (?:\s*\[(?P<colunit>.+)\])?.* # match units in brackets
                """,
            re.VERBOSE,
        )
        dataline = None
        for line in lines:
            if not line.startswith("#"):
                dataline = line  # save for later to infer the actual number of columns
                break  # End of header lines
            match = re_name_def.search(line)
            if match:
                colnumber = int(match.group("colnumber"))
                colname = match.group("colname")
                coldescr = match.group("coldescr")
                # If no units are given, colunit = None
                colunit = match.group("colunit")
                columns[colnumber] = (colname, coldescr, colunit)
        # Handle skipped column numbers
        colnumbers = sorted(columns)
        # Handle the case where the last column is array-like by append a pseudo column
        # If there are more data columns than the largest column number
        # then add a pseudo-column that will be dropped later.  This allows
        # the array column logic below to work in all cases.
        if dataline is not None:
            n_data_cols = len(dataline.split())
        else:
            # handles no data, where we have to rely on the last column number
            n_data_cols = colnumbers[-1]
        # sextractor column number start at 1.
        columns[n_data_cols + 1] = (None, None, None)
        colnumbers.append(n_data_cols + 1)
        if len(columns) > 1:
            # only fill in skipped columns when there is genuine column initially
            previous_column = 0
            for n in colnumbers:
                if n != previous_column + 1:
                    for c in range(previous_column + 1, n):
                        column_name = (
                            columns[previous_column][0] + f"_{c - previous_column}"
                        )
                        column_descr = columns[previous_column][1]
                        column_unit = columns[previous_column][2]
                        columns[c] = (column_name, column_descr, column_unit)
                previous_column = n
        # Add the columns in order to self.names
        colnumbers = sorted(columns)[:-1]  # drop the pseudo column
        self.names = []
        for n in colnumbers:
            self.names.append(columns[n][0])
        if not self.names:
            raise core.InconsistentTableError(
                "No column names found in SExtractor header"
            )
        self.cols = []
        for n in colnumbers:
            col = core.Column(name=columns[n][0])
            col.description = columns[n][1]
            col.unit = columns[n][2]
            self.cols.append(col)
class SExtractorData(core.BaseData):
    start_line = 0
    delimiter = " "
    comment = r"\s*#"