# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
MAST Collections
================
This module contains various methods for querying MAST collections such as catalogs.
"""
import warnings
import os
import time
from requests import HTTPError
import astropy.units as u
import astropy.coordinates as coord
from astropy.table import Table, Row
from ..utils import commons, async_to_sync
from ..utils.class_or_instance import class_or_instance
from ..exceptions import InvalidQueryError, MaxResultsWarning, InputWarning
from . import utils
from .core import MastQueryWithLogin
__all__ = ['Catalogs', 'CatalogsClass']
[docs]
@async_to_sync
class CatalogsClass(MastQueryWithLogin):
    """
    MAST catalog query class.
    Class for querying MAST catalog data.
    """
    def __init__(self):
        super().__init__()
        services = {"panstarrs": {"path": "panstarrs/{data_release}/{table}.json",
                                  "args": {"data_release": "dr2", "table": "mean"}}}
        self._service_api_connection.set_service_params(services, "catalogs", True)
        self.catalog_limit = None
        self._current_connection = None
    def _parse_result(self, response, *, verbose=False):
        results_table = self._current_connection._parse_result(response, verbose=verbose)
        if len(results_table) == self.catalog_limit:
            warnings.warn("Maximum catalog results returned, may not include all sources within radius.",
                          MaxResultsWarning)
        return results_table
[docs]
    @class_or_instance
    def query_region_async(self, coordinates, *, radius=0.2*u.deg, catalog="Hsc",
                           version=None, pagesize=None, page=None, **kwargs):
        """
        Given a sky position and radius, returns a list of catalog entries.
        See column documentation for specific catalogs `here <https://mast.stsci.edu/api/v0/pages.html>`__.
        Parameters
        ----------
        coordinates : str or `~astropy.coordinates` object
            The target around which to search. It may be specified as a
            string or as the appropriate `~astropy.coordinates` object.
        radius : str or `~astropy.units.Quantity` object, optional
            Default 0.2 degrees.
            The string must be parsable by `~astropy.coordinates.Angle`. The
            appropriate `~astropy.units.Quantity` object from
            `~astropy.units` may also be used. Defaults to 0.2 deg.
        catalog : str, optional
            Default HSC.
            The catalog to be queried.
        version : int, optional
            Version number for catalogs that have versions. Default is highest version.
        pagesize : int, optional
            Default None.
            Can be used to override the default pagesize for (set in configs) this query only.
            E.g. when using a slow internet connection.
        page : int, optional
            Default None.
            Can be used to override the default behavior of all results being returned to obtain a
            specific page of results.
        **kwargs
            Other catalog-specific keyword args.
            These can be found in the (service documentation)[https://mast.stsci.edu/api/v0/_services.html]
            for specific catalogs. For example one can specify the magtype for an HSC search.
        Returns
        -------
        response : list of `~requests.Response`
        """
        # Put coordinates and radius into consistent format
        coordinates = commons.parse_coordinates(coordinates)
        # if radius is just a number we assume degrees
        radius = coord.Angle(radius, u.deg)
        # basic params
        params = {'ra': coordinates.ra.deg,
                  'dec': coordinates.dec.deg,
                  'radius': radius.deg}
        # Determine API connection and service name
        if catalog.lower() in self._service_api_connection.SERVICES:
            self._current_connection = self._service_api_connection
            service = catalog
        else:
            self._current_connection = self._portal_api_connection
            # Sorting out the non-standard portal service names
            if catalog.lower() == "hsc":
                if version == 2:
                    service = "Mast.Hsc.Db.v2"
                else:
                    if version not in (3, None):
                        warnings.warn("Invalid HSC version number, defaulting to v3.", InputWarning)
                    service = "Mast.Hsc.Db.v3"
                self.catalog_limit = kwargs.get('nr', 50000)
                # Hsc specific parameters (can be overridden by user)
                params['nr'] = 50000
                params['ni'] = 1
                params['magtype'] = 1
            elif catalog.lower() == "galex":
                service = "Mast.Galex.Catalog"
                self.catalog_limit = kwargs.get('maxrecords', 50000)
                # galex specific parameters (can be overridden by user)
                params['maxrecords'] = 50000
            elif catalog.lower() == "gaia":
                if version == 1:
                    service = "Mast.Catalogs.GaiaDR1.Cone"
                else:
                    if version not in (None, 2):
                        warnings.warn("Invalid Gaia version number, defaulting to DR2.", InputWarning)
                    service = "Mast.Catalogs.GaiaDR2.Cone"
            elif catalog.lower() == 'plato':
                if version in (None, 1):
                    service = "Mast.Catalogs.Plato.Cone"
                else:
                    warnings.warn("Invalid PLATO catalog version number, defaulting to DR1.", InputWarning)
                    service = "Mast.Catalogs.Plato.Cone"
            else:
                service = "Mast.Catalogs." + catalog + ".Cone"
                self.catalog_limit = None
        # adding additional user specified parameters
        for prop, value in kwargs.items():
            params[prop] = value
        # Parameters will be passed as JSON objects only when accessing the PANSTARRS API
        use_json = catalog.lower() == 'panstarrs'
        return self._current_connection.service_request_async(service, params, pagesize=pagesize, page=page,
                                                              use_json=use_json) 
[docs]
    @class_or_instance
    def query_object_async(self, objectname, *, radius=0.2*u.deg, catalog="Hsc",
                           pagesize=None, page=None, version=None, **kwargs):
        """
        Given an object name, returns a list of catalog entries.
        See column documentation for specific catalogs `here <https://mast.stsci.edu/api/v0/pages.html>`__.
        Parameters
        ----------
        objectname : str
            The name of the target around which to search.
        radius : str or `~astropy.units.Quantity` object, optional
            Default 0.2 degrees.
            The string must be parsable by `~astropy.coordinates.Angle`.
            The appropriate `~astropy.units.Quantity` object from
            `~astropy.units` may also be used. Defaults to 0.2 deg.
        catalog : str, optional
            Default HSC.
            The catalog to be queried.
        pagesize : int, optional
            Default None.
            Can be used to override the default pagesize for (set in configs) this query only.
            E.g. when using a slow internet connection.
        page : int, optional
            Defaulte None.
            Can be used to override the default behavior of all results being returned
            to obtain a specific page of results.
        version : int, optional
            Version number for catalogs that have versions. Default is highest version.
        **kwargs
            Catalog-specific keyword args.
            These can be found in the `service documentation <https://mast.stsci.edu/api/v0/_services.html>`__.
            for specific catalogs. For example one can specify the magtype for an HSC search.
        Returns
        -------
        response : list of `~requests.Response`
        """
        coordinates = utils.resolve_object(objectname)
        return self.query_region_async(coordinates,
                                       radius=radius,
                                       catalog=catalog,
                                       version=version,
                                       pagesize=pagesize,
                                       page=page,
                                       **kwargs) 
[docs]
    @class_or_instance
    def query_criteria_async(self, catalog, *, pagesize=None, page=None, **criteria):
        """
        Given an set of filters, returns a list of catalog entries.
        See column documentation for specific catalogs `here <https://mast.stsci.edu/api/v0/pages.html>`__.
        Parameters
        ----------
        pagesize : int, optional
            Can be used to override the default pagesize.
            E.g. when using a slow internet connection.
        page : int, optional
            Can be used to override the default behavior of all results being returned to obtain
            one specific page of results.
        **criteria
            Criteria to apply. At least one non-positional criteria must be supplied.
            Valid criteria are coordinates, objectname, radius (as in `query_region` and `query_object`),
            and all fields listed in the column documentation for the catalog being queried.
            The Column Name is the keyword, with the argument being one or more acceptable values for that parameter,
            except for fields with a float datatype where the argument should be in the form [minVal, maxVal].
            For non-float type criteria wildcards maybe used (both * and % are considered wildcards), however
            only one wildcarded value can be processed per criterion.
            RA and Dec must be given in decimal degrees, and datetimes in MJD.
            For example: filters=["FUV","NUV"],proposal_pi="Ost*",t_max=[52264.4586,54452.8914]
            For catalogs available through Catalogs.MAST (PanSTARRS), the Column Name is the keyword, and the argument
            should be either an acceptable value for that parameter, or a list consisting values, or  tuples of
            decorator, value pairs (decorator, value). In addition, columns may be used to select the return columns,
            consisting of a list of column names. Results may also be sorted through the query with the parameter
            sort_by composed of either a single Column Name to sort ASC, or a list of Column Nmaes to sort ASC or
            tuples of Column Name and Direction (ASC, DESC) to indicate sort order (Column Name, DESC).
            Detailed information of Catalogs.MAST criteria usage can
            be found `here <https://catalogs.mast.stsci.edu/docs/index.html>`__.
        Returns
        -------
        response : list of `~requests.Response`
        """
        # Separating any position info from the rest of the filters
        coordinates = criteria.pop('coordinates', None)
        objectname = criteria.pop('objectname', None)
        radius = criteria.pop('radius', 0.2*u.deg)
        if objectname or coordinates:
            coordinates = utils.parse_input_location(coordinates, objectname)
        # if radius is just a number we assume degrees
        radius = coord.Angle(radius, u.deg)
        # build query
        params = {}
        if coordinates:
            params["ra"] = coordinates.ra.deg
            params["dec"] = coordinates.dec.deg
            params["radius"] = radius.deg
        # Determine API connection, service name, and build filter set
        filters = None
        if catalog.lower() in self._service_api_connection.SERVICES:
            self._current_connection = self._service_api_connection
            service = catalog
            if not self._current_connection.check_catalogs_criteria_params(criteria):
                raise InvalidQueryError("At least one non-positional criterion must be supplied.")
            for prop, value in criteria.items():
                params[prop] = value
        else:
            self._current_connection = self._portal_api_connection
            if catalog.lower() == "tic":
                service = "Mast.Catalogs.Filtered.Tic"
                if coordinates or objectname:
                    service += ".Position"
                service += ".Rows"  # Using the rowstore version of the query for speed
                filters = self._current_connection.build_filter_set("Mast.Catalogs.Tess.Cone",
                                                                    service, **criteria)
                params["columns"] = "*"
            elif catalog.lower() == "ctl":
                service = "Mast.Catalogs.Filtered.Ctl"
                if coordinates or objectname:
                    service += ".Position"
                service += ".Rows"  # Using the rowstore version of the query for speed
                filters = self._current_connection.build_filter_set("Mast.Catalogs.Tess.Cone",
                                                                    service, **criteria)
                params["columns"] = "*"
            elif catalog.lower() == "diskdetective":
                service = "Mast.Catalogs.Filtered.DiskDetective"
                if coordinates or objectname:
                    service += ".Position"
                filters = self._current_connection.build_filter_set("Mast.Catalogs.Dd.Cone",
                                                                    service, **criteria)
            else:
                raise InvalidQueryError("Criteria query not available for {}".format(catalog))
            if not filters:
                raise InvalidQueryError("At least one non-positional criterion must be supplied.")
            params["filters"] = filters
        # Parameters will be passed as JSON objects only when accessing the PANSTARRS API
        use_json = catalog.lower() == 'panstarrs'
        return self._current_connection.service_request_async(service, params, pagesize=pagesize, page=page,
                                                              use_json=use_json) 
[docs]
    @class_or_instance
    def query_hsc_matchid_async(self, match, *, version=3, pagesize=None, page=None):
        """
        Returns all the matches for a given Hubble Source Catalog MatchID.
        Parameters
        ----------
        match : int or `~astropy.table.Row`
            The matchID or HSC entry to return matches for.
        version : int, optional
            The HSC version to match against. Default is v3.
        pagesize : int, optional
            Can be used to override the default pagesize.
            E.g. when using a slow internet connection.
        page : int, optional
            Can be used to override the default behavior of all results being returned to obtain
            one specific page of results.
        Returns
        -------
        response : list of `~requests.Response`
        """
        self._current_connection = self._portal_api_connection
        if isinstance(match, Row):
            match = match["MatchID"]
        match = str(match)  # np.int64 gives json serializer problems, so stringify right here
        if version == 2:
            service = "Mast.HscMatches.Db.v2"
        else:
            if version not in (3, None):
                warnings.warn("Invalid HSC version number, defaulting to v3.", InputWarning)
            service = "Mast.HscMatches.Db.v3"
        params = {"input": match}
        return self._current_connection.service_request_async(service, params, pagesize=pagesize, page=page) 
[docs]
    @class_or_instance
    def get_hsc_spectra_async(self, *, pagesize=None, page=None):
        """
        Returns all Hubble Source Catalog spectra.
        Parameters
        ----------
        pagesize : int, optional
            Can be used to override the default pagesize.
            E.g. when using a slow internet connection.
        page : int, optional
            Can be used to override the default behavior of all results being returned to obtain
            one specific page of results.
        Returns
        -------
        response : list of `~requests.Response`
        """
        self._current_connection = self._portal_api_connection
        service = "Mast.HscSpectra.Db.All"
        params = {}
        return self._current_connection.service_request_async(service, params, pagesize, page) 
[docs]
    def download_hsc_spectra(self, spectra, *, download_dir=None, cache=True, curl_flag=False):
        """
        Download one or more Hubble Source Catalog spectra.
        Parameters
        ----------
        spectra : `~astropy.table.Table` or `~astropy.table.Row`
            One or more HSC spectra to be downloaded.
        download_dir : str, optional
           Specify the base directory to download spectra into.
           Spectra will be saved in the subdirectory download_dir/mastDownload/HSC.
           If download_dir is not specified the base directory will be '.'.
        cache : bool, optional
            Default is True. If file is found on disc it will not be downloaded again.
            Note: has no affect when downloading curl script.
        curl_flag : bool, optional
            Default is False.  If true instead of downloading files directly, a curl script
            will be downloaded that can be used to download the data files at a later time.
        Returns
        -------
        response : list of `~requests.Response`
        """
        # if spectra is not a Table, put it in a list
        if isinstance(spectra, Row):
            spectra = [spectra]
        # set up the download directory and paths
        if not download_dir:
            download_dir = '.'
        if curl_flag:  # don't want to download the files now, just the curl script
            download_file = "mastDownload_" + time.strftime("%Y%m%d%H%M%S")
            url_list = []
            path_list = []
            for spec in spectra:
                if spec['SpectrumType'] < 2:
                    url_list.append('https://hla.stsci.edu/cgi-bin/getdata.cgi?config=ops&dataset={0}'
                                    .format(spec['DatasetName']))
                else:
                    url_list.append('https://hla.stsci.edu/cgi-bin/ecfproxy?file_id={0}'
                                    .format(spec['DatasetName']) + '.fits')
                path_list.append(download_file + "/HSC/" + spec['DatasetName'] + '.fits')
            description_list = [""]*len(spectra)
            producttype_list = ['spectrum']*len(spectra)
            service = "Mast.Bundle.Request"
            params = {"urlList": ",".join(url_list),
                      "filename": download_file,
                      "pathList": ",".join(path_list),
                      "descriptionList": list(description_list),
                      "productTypeList": list(producttype_list),
                      "extension": 'curl'}
            response = self._portal_api_connection.service_request_async(service, params)
            bundler_response = response[0].json()
            local_path = os.path.join(download_dir, "{}.sh".format(download_file))
            self._download_file(bundler_response['url'], local_path, head_safe=True, continuation=False)
            status = "COMPLETE"
            msg = None
            url = None
            if not os.path.isfile(local_path):
                status = "ERROR"
                msg = "Curl could not be downloaded"
                url = bundler_response['url']
            else:
                missing_files = [x for x in bundler_response['statusList'].keys()
                                 if bundler_response['statusList'][x] != 'COMPLETE']
                if len(missing_files):
                    msg = "{} files could not be added to the curl script".format(len(missing_files))
                    url = ",".join(missing_files)
            manifest = Table({'Local Path': [local_path],
                              'Status': [status],
                              'Message': [msg],
                              "URL": [url]})
        else:
            base_dir = download_dir.rstrip('/') + "/mastDownload/HSC"
            if not os.path.exists(base_dir):
                os.makedirs(base_dir)
            manifest_array = []
            for spec in spectra:
                if spec['SpectrumType'] < 2:
                    data_url = f'https://hla.stsci.edu/cgi-bin/getdata.cgi?config=ops&dataset={spec["DatasetName"]}'
                else:
                    data_url = f'https://hla.stsci.edu/cgi-bin/ecfproxy?file_id={spec["DatasetName"]}.fits'
                local_path = os.path.join(base_dir, f'{spec["DatasetName"]}.fits')
                status = "COMPLETE"
                msg = None
                url = None
                try:
                    self._download_file(data_url, local_path, cache=cache, head_safe=True)
                    # check file size also this is where would perform md5
                    if not os.path.isfile(local_path):
                        status = "ERROR"
                        msg = "File was not downloaded"
                        url = data_url
                except HTTPError as err:
                    status = "ERROR"
                    msg = "HTTPError: {0}".format(err)
                    url = data_url
                manifest_array.append([local_path, status, msg, url])
                manifest = Table(rows=manifest_array, names=('Local Path', 'Status', 'Message', "URL"))
        return manifest 
 
Catalogs = CatalogsClass()