Source code for pydl.pydlspec2d.spec1d

# Licensed under a 3-clause BSD style license - see LICENSE.rst
# -*- coding: utf-8 -*-
"""This module corresponds to the spec1d directory in idlspec2d.
"""
import glob
import os
import time
from warnings import warn
import numpy as np
from numpy.linalg import solve
try:
    import matplotlib
    matplotlib.use('Agg')
    matplotlib.rcParams['figure.figsize'] = (16.0, 12.0)
    import matplotlib.pyplot as plt
    from matplotlib.font_manager import fontManager, FontProperties
except ImportError:
    pass
from astropy import log
from astropy.io import ascii, fits
from . import Pydlspec2dException, Pydlspec2dUserWarning

#
# Used by findspec
#
findspec_cache = None


[docs]class HMF(object): """Class used to manage data for Heteroscedastic Matrix Factorization (HMF). This is a replacement for :func:`~pydl.pydlspec2d.spec1d.pca_solve`. It can be called with:: hmf = HMF(spectra, invvar) output = hmf.solve() The input spectra should be pre-processed through :func:`~pydl.pydlspec2d.spec2d.combine1fiber`. Parameters ---------- spectra : array-like The input spectral flux, assumed to have a common wavelength and redshift system. invvar : array-like The inverse variance of the spectral flux. K : :class:`int`, optional The number of dimensions of the factorization (default 4). n_iter : :class:`int`, optional Number of iterations. seed : :class:`int`, optional. If set, pass this value to :func:`numpy.random.seed`. nonnegative : :class:`bool`, optional Set this to ``True`` to perform nonnegative HMF. epsilon : :class:`float`, optional Regularization parameter. Set to any non-negative float value to turn it on. verbose : :class:`bool`, optional If ``True``, print extra information. Notes ----- See [1]_ and [2]_ for the original derivation of this method. The HMF iteration is initialized using :func:`~scipy.cluster.vq.kmeans`, which itself uses random numbers to initialize its state. If you need to ensure reproducibility, call :func:`numpy.random.seed` before initializing HMF. The current algorithm cannot handle input data that contain *columns* of zeros. Columns of this type need to be *carefully* removed from the input data. This could also result in the output data having a different size compared to the input data. References ---------- .. [1] `Tsalmantza, P., Decarli, R., Dotti, M., Hogg, D. W., 2011 ApJ 738, 20 <https://ui.adsabs.harvard.edu/abs/2011ApJ...738...20T/abstract>`_ .. [2] `Tsalmantza, P., Hogg, D. W., 2012 ApJ 753, 122 <https://ui.adsabs.harvard.edu/abs/2012ApJ...753..122T/abstract>`_ """ def __init__(self, spectra, invvar, K=4, n_iter=None, seed=None, nonnegative=False, epsilon=None, verbose=False): self.spectra = spectra self.invvar = invvar self.K = K if n_iter is None: if nonnegative: self.n_iter = 2048 else: self.n_iter = 20 else: self.n_iter = int(n_iter) self.seed = seed self.nonnegative = nonnegative self.epsilon = epsilon self.verbose = verbose self.a = None self.g = None if self.verbose: log.setLevel('DEBUG') return
[docs] def solve(self): """Process the inputs. Returns ------- :class:`dict` The HMF solution. """ if len(self.spectra.shape) == 1: nobj = 1 npix = self.spectra.shape[0] else: nobj, npix = self.spectra.shape log.info("Building HMF from %d object spectra.", nobj) fluxdict = dict() # # If there is only one object spectrum, then all we can do is return it. # if nobj == 1: fluxdict['flux'] = self.spectra.astype('f') return fluxdict a, g = self.iterate() fluxdict['acoeff'] = a fluxdict['flux'] = g return fluxdict
[docs] def model(self): """Compute the model. """ return np.dot(self.a, self.g)
[docs] def resid(self): """Compute residuals. """ return self.spectra - self.model()
[docs] def chi(self): """Compute :math:`\chi`, the scaled residual. """ return self.resid() * np.sqrt(self.invvar)
[docs] def penalty(self): """Compute penalty for non-smoothness. """ if self.epsilon is None: return 0.0 return self.epsilon * np.sum(np.diff(self.g)**2)
[docs] def badness(self): """Compute :math:`\chi^2`, including possible non-smoothness penalty. """ return np.sum(self.chi()**2) + self.penalty()
[docs] def normbase(self): """Apply standard component normalization. """ return np.sqrt((self.g**2).mean(1))
[docs] def astep(self): """Update for coefficients at fixed component spectra. """ N, M = self.spectra.shape K, M = self.g.shape a = np.zeros((N, K), dtype=self.g.dtype) for i in range(N): Gi = np.zeros((K, K), dtype=self.g.dtype) for k in range(K): for kp in range(k, K): Gi[k, kp] = np.sum(self.g[k, :] * self.g[kp, :] * self.invvar[i, :]) if kp > k: Gi[kp, k] = Gi[k, kp] Fi = np.dot(self.g, self.spectra[i, :]*self.invvar[i, :]) a[i, :] = solve(Gi, Fi) return a
[docs] def gstep(self): """Update for component spectra at fixed coefficients. """ N, M = self.spectra.shape N, K = self.a.shape g = np.zeros((K, M), dtype=self.a.dtype) e = np.zeros(self.g.shape, dtype=self.g.dtype) d = np.zeros((K, K, M), dtype=self.a.dtype) if self.epsilon is not None and self.epsilon > 0: foo = self.epsilon * np.eye(K, dtype=self.a.dtype) for l in range(M): d[:, :, l] = foo if l > 0 and l < M-1: d[:, :, l] *= 2 # d[:, :, 0] = foo # d[:, :, 1:M-1] = 2*foo # d[:, :, M-1] = foo e[:, 0] = self.epsilon*self.g[:, 1] e[:, 1:M-1] = self.epsilon*(self.g[:, 0:M-2] + self.g[:, 2:M]) e[:, M-1] = self.epsilon*self.g[:, M-2] for j in range(M): Aj = np.zeros((K, K), dtype=self.a.dtype) for k in range(K): for kp in range(k, K): Aj[k, kp] = np.sum(self.a[:, k] * self.a[:, kp] * self.invvar[:, j]) if kp > k: Aj[kp, k] = Aj[k, kp] Aj += d[:, :, j] Fj = (np.dot(self.a.T, self.spectra[:, j]*self.invvar[:, j]) + e[:, j]) g[:, j] = solve(Aj, Fj) return g
[docs] def astepnn(self): """Non-negative update for coefficients at fixed component spectra. """ numerator = np.dot(self.spectra*self.invvar, self.g.T) denominator = np.dot(np.dot(self.a, self.g)*self.invvar, self.g.T) return self.a*(numerator/denominator)
[docs] def gstepnn(self): """Non-negative update for component spectra at fixed coefficients. """ K, M = self.g.shape numerator = np.dot(self.a.T, (self.spectra*self.invvar)) if self.epsilon is not None and self.epsilon > 0: e = np.zeros(self.g.shape, dtype=self.g.dtype) e[:, 0] = self.epsilon*self.g[:, 1] e[:, 1:M-1] = self.epsilon*(self.g[:, 0:M-2] + self.g[:, 2:M]) e[:, M-1] = self.epsilon*self.g[:, M-2] numerator += e denominator = np.dot(self.a.T, np.dot(self.a, self.g)*self.invvar) if self.epsilon is not None and self.epsilon > 0: d = self.epsilon*self.g.copy() d[:, 1:M-1] *= 2 denominator += d return self.g*(numerator/denominator)
[docs] def reorder(self): """Reorder and rotate basis analogous to PCA. """ from numpy.linalg import eigh l, U = eigh(np.dot(self.a.T, self.a)) return (np.dot(self.a, U), np.dot(U.T, self.g))
[docs] def iterate(self): """Handle the HMF iteration. Returns ------- :class:`tuple` of :class:`numpy.ndarray` The fitting coefficients and fitted functions, respectively. """ from scipy.cluster.vq import kmeans, whiten from ..pydlutils.math import find_contiguous N, M = self.spectra.shape # # Make spectra non-negative # if self.nonnegative: self.spectra[self.spectra < 0] = 0 self.invvar[self.spectra < 0] = 0 # # Detect and fix very bad columns. # si = self.spectra * self.invvar if (self.spectra.sum(0) == 0).any(): log.warn("Columns of zeros detected in spectra!") if (self.invvar.sum(0) == 0).any(): log.warn("Columns of zeros detected in invvar!") if (si.sum(0) == 0).any(): log.warn("Columns of zeros detected in spectra*invvar!") zerocol = ((self.spectra.sum(0) == 0) | (self.invvar.sum(0) == 0) | (si.sum(0) == 0)) n_zero = zerocol.sum() if n_zero > 0: log.warn("Found %d bad columns in input data!", n_zero) # # Find the largest set of contiguous pixels # goodcol = find_contiguous(~zerocol) self.spectra = self.spectra[:, goodcol] self.invvar = self.invvar[:, goodcol] # si = si[:, goodcol] # newloglam = fullloglam[goodcol] # # Initialize g matrix with kmeans # if self.seed is not None: np.random.seed(self.seed) whitespectra = whiten(self.spectra) log.debug(whitespectra[0:3, 0:3]) self.g, foo = kmeans(whitespectra, self.K) self.g /= np.repeat(self.normbase(), M).reshape(self.g.shape) log.debug(self.g[0:3, 0:3]) # # Initialize a matrix # self.a = np.outer(np.sqrt((self.spectra**2).mean(1)), np.repeat(1.0/self.K, self.K)) if self.nonnegative: for k in range(128): self.a = self.astepnn() # # Iterate! # t0 = time.time() for m in range(self.n_iter): log.info("Starting iteration #%4d.", m+1) if self.nonnegative: self.a = self.astepnn() self.g = self.gstepnn() else: self.a = self.astep() self.g = self.gstep() self.a, self.g = self.reorder() norm = self.normbase() self.g /= np.repeat(norm, M).reshape(self.g.shape) self.a = (self.a.T*np.repeat(norm, N).reshape(self.K, N)).T log.debug(self.a[0:3, 0:3]) log.debug(self.g[0:3, 0:3]) log.debug("Chi**2 after iteration #%4d = %f.", m+1, self.badness()) log.info("The elapsed time for iteration #%4d is %6.2f s.", m+1, time.time()-t0) return (self.a, self.g)
[docs]def findspec(*args, **kwargs): """Find SDSS/BOSS spectra that match a given RA, Dec. Parameters ---------- ra, dec : array-like, optional If set, the first two positional arguments will be interpreted as RA, Dec. best : :class:`bool`, optional If set, return only the best match for each input RA, Dec. infile : :class:`str`, optional If set, read RA, Dec data from this file. outfile : :class:`str`, optional If set, print match data to this file. print : :class:`bool`, optional If set, print the match data to the console. run1d : :class:`str`, optional Override the value of :envvar:`RUN1D`. run2d : :class:`str`, optional Override the value of :envvar:`RUN2D`. sdss : :class:`bool`, optional If set, search for SDSS-I/II spectra instead of BOSS spectra. searchrad : :class:`float`, optional Search for spectra in this radius around given RA, Dec. Default is 3 arcsec. topdir : :class:`str`, optional If set, override the value of :envvar:`SPECTRO_REDUX` or :envvar:`BOSS_SPECTRO_REDUX`. Returns ------- :class:`dict` A dictionary containing plate, MJD, fiber, etc. """ from .. import uniq from ..pydlutils.misc import struct_print from ..pydlutils.spheregroup import spherematch global findspec_cache # # Set up default values # if 'sdss' in kwargs: if 'topdir' in kwargs: topdir = kwargs['topdir'] else: topdir = os.environ['SPECTRO_REDUX'] if 'run2d' in kwargs: run2d = str(kwargs['run2d']) else: run2d = '26' run1d = '' else: if 'topdir' in kwargs: topdir = kwargs['topdir'] else: topdir = os.environ['BOSS_SPECTRO_REDUX'] if 'run2d' in kwargs: run2d = str(kwargs['run2d']) else: run2d = os.environ['RUN2D'] if 'run1d' in kwargs: run1d = str(kwargs['run1d']) else: run1d = os.environ['RUN1D'] if findspec_cache is None: findspec_cache = {'lasttopdir': topdir, 'plist': None} if (findspec_cache['plist'] is None or topdir != findspec_cache['lasttopdir']): findspec_cache['lasttopdir'] = topdir platelist_file = os.path.join(topdir, "platelist.fits") plates_files = glob.glob(os.path.join(topdir, "plates-*.fits")) plist = None if os.path.exists(platelist_file): platelist = fits.open(platelist_file) plist = platelist[1].data platelist.close() if len(plates_files) > 0: plates = fits.open(plates_files[0]) plist = plates[1].data plates.close() if plist is None: raise Pydlspec2dException("Plate list (platelist.fits or plates-*.fits) not found in {0}.".format(topdir)) else: findspec_cache['plist'] = plist qdone = plist.field('STATUS1D') == 'Done' qdone2d = plist.field('RUN2D').strip() == run2d if run1d == '': qdone1d = np.ones(plist.size, dtype='bool') else: qdone1d = plist.field('RUN1D').strip() == run1d qfinal = qdone & qdone2d & qdone1d if not qfinal.any(): warn("No reduced plates!", Pydlspec2dUserWarning) return None idone = np.arange(plist.size)[qfinal] # # If there are positional arguments, interpret these as RA, Dec # if len(args) == 2: ra = args[0] dec = args[1] # # Read RA, Dec from infile if set # if 'infile' in kwargs: infile_data = ascii.read(kwargs['infile'], names=['ra', 'dec']) ra = infile_data["ra"].data dec = infile_data["dec"].data if 'searchrad' in kwargs: searchrad = float(kwargs['searchrad']) else: searchrad = 3.0/3600.0 # # Create output structure # slist_type = np.dtype([('PLATE', 'i4'), ('MJD', 'i4'), ('FIBERID', 'i4'), ('RA', 'f8'), ('DEC', 'f8'), ('MATCHRAD', 'f8')]) # # Match all plates with objects # imatch1, itmp, dist12 = spherematch(ra, dec, plist[qfinal].field('RACEN'), plist[qfinal].field('DECCEN'), searchrad+1.55, maxmatch=0) if imatch1.size == 0: warn("No matching plates found.", Pydlspec2dUserWarning) return None imatch2 = idone[itmp] # # Read all relevant plates # try: n_total = plist.field('N_TOTAL') except KeyError: n_total = np.zeros(plist.size, dtype='i4') + 640 iplate = imatch2[uniq(imatch2, imatch2.argsort())] i0 = 0 plugmap = np.zeros(n_total[iplate].sum(), dtype=[('PLATE', 'i4'), ('MJD', 'i4'), ('FIBERID', 'i4'), ('RA', 'd'), ('DEC', 'd')]) for i in range(iplate.size): spplate = readspec(plist[iplate[i]].field('PLATE'), mjd=plist[iplate[i]].field('MJD'), topdir=topdir, run2d=run2d, run1d=run1d) index_to = i0 + np.arange(n_total[iplate[i]], dtype='i4') plugmap['PLATE'][index_to] = plist[iplate[i]].field('PLATE') plugmap['MJD'][index_to] = plist[iplate[i]].field('MJD') plugmap['FIBERID'][index_to] = spplate['plugmap']['FIBERID'] plugmap['RA'][index_to] = spplate['plugmap']['RA'] plugmap['DEC'][index_to] = spplate['plugmap']['DEC'] i0 += n_total[iplate[i]] i1, i2, d12 = spherematch(ra, dec, plugmap['RA'], plugmap['DEC'], searchrad, maxmatch=0) if i1.size == 0: warn('No matching objects found.', Pydlspec2dUserWarning) return None if 'best' in kwargs: # # Return only best match per object # slist = np.zeros(ra.size, dtype=slist_type) spplate = readspec(plugmap[i2]['PLATE'], plugmap[i2]['FIBERID'], mjd=plugmap[i2]['MJD'], topdir=topdir, run2d=run2d, run1d=run1d) sn = spplate['zans']['SN_MEDIAN'] isort = (i1 + np.where(sn > 0, sn, 0)/(sn+1.0).max()).argsort() i1 = i1[isort] i2 = i2[isort] d12 = d12[isort] iuniq = uniq(i1) slist[i1[iuniq]]['PLATE'] = plugmap[i2[iuniq]]['PLATE'] slist[i1[iuniq]]['MJD'] = plugmap[i2[iuniq]]['MJD'] slist[i1[iuniq]]['FIBERID'] = plugmap[i2[iuniq]]['FIBERID'] slist[i1[iuniq]]['RA'] = plugmap[i2[iuniq]]['RA'] slist[i1[iuniq]]['DEC'] = plugmap[i2[iuniq]]['DEC'] slist[i1[iuniq]]['MATCHRAD'] = d12[iuniq] else: # # Return all matches # slist = np.zeros(i1.size, dtype=slist_type) slist['PLATE'] = plugmap[i2]['PLATE'] slist['MJD'] = plugmap[i2]['MJD'] slist['FIBERID'] = plugmap[i2]['FIBERID'] slist['RA'] = plugmap[i2]['RA'] slist['DEC'] = plugmap[i2]['DEC'] slist['MATCHRAD'] = d12 # # Print to terminal or output file # if 'print' in kwargs: foo = struct_print(slist) if 'outfile' in kwargs: foo = struct_print(slist, filename=outfile) return slist
[docs]def latest_mjd(plate, **kwargs): """Find the most recent MJD associated with a plate. Parameters ---------- plate : :class:`int` or :class:`numpy.ndarray` The plate(s) to examine. Returns ------- :class:`numpy.ndarray` An array of MJD values for each plate. """ import re if isinstance(plate, (int,)) or plate.shape == (): platevec = np.array([plate], dtype='i4') else: platevec = plate mjd = np.zeros(len(platevec), dtype='i4') mjdre = re.compile(r'spPlate-[0-9]{4}-([0-9]{5}).fits') unique_plates = np.unique(platevec) paths = spec_path(unique_plates, **kwargs) for p, q in zip(paths, unique_plates): plateglob = "{0}/spPlate-{1:04d}-*.fits".format(p, q) bigmjd = 0 for f in glob.glob(plateglob): thismjd = int(mjdre.search(f).groups()[0]) if thismjd > bigmjd: bigmjd = thismjd mjd[platevec == q] = bigmjd return mjd
[docs]def number_of_fibers(plate, **kwargs): """Returns the total number of fibers per plate. Parameters ---------- plate : :class:`int` or :class:`numpy.ndarray` The plate(s) to examine. Returns ------- :class:`numpy.ndarray` The number of fibers on each plate. """ # # Get mjd values # if isinstance(plate, (int,)) or plate.shape == (): platevec = np.array([plate], dtype='i4') else: platevec = plate mjd = latest_mjd(plate, **kwargs) nfiber = np.zeros(mjd.size, dtype='i4') # # SDSS-I,II plates # nfiber[mjd < 55025] = 640 # # Short circuit if we're done. # if (nfiber == 640).all(): return nfiber # # Not all BOSS plates have 1000 fibers # if 'path' in kwargs: platelistpath = os.path.join(kwargs['path'], 'platelist.fits') else: platelistpath = os.path.join(os.environ['BOSS_SPECTRO_REDUX'], 'platelist.fits') platelist = fits.open(platelistpath) platentotal = platelist[1].data.field('N_TOTAL') plateplate = platelist[1].data.field('PLATE') platemjd = platelist[1].data.field('MJD') platerun2d = platelist[1].data.field('RUN2D') platerun1d = platelist[1].data.field('RUN1D') platelist.close() if 'run2d' in kwargs: run2d = kwargs['run2d'] else: run2d = os.environ['RUN2D'] if 'run1d' in kwargs: run1d = kwargs['run1d'] else: run1d = os.environ['RUN1D'] for k in range(mjd.size): nfiber[k] = platentotal[(plateplate == platevec[k]) & (platemjd == mjd[k]) & (platerun2d == run2d) & (platerun1d == run1d)] return nfiber
[docs]def pca_solve(newflux, newivar, maxiter=0, niter=10, nkeep=3, nreturn=None, verbose=False): """Replacement for idlspec2d pca_solve.pro. Parameters ---------- newflux : array-like The input spectral flux, assumed to have a common wavelength and redshift system. newivar : array-like The inverse variance of the spectral flux. maxiter : :class:`int`, optional Stop PCA+reject iterations after this number. niter : :class:`int`, optional Stop PCA iterations after this number. nkeep : :class:`int`, optional Number of PCA components to keep. nreturn : :class:`int`, optional Number of PCA components to return, usually the same as `nkeep`. verbose : :class:`bool`, optional If ``True``, print extra information. Returns ------- :class:`dict` The PCA solution. """ from .. import pcomp from ..pydlutils.math import computechi2, djs_reject if verbose: log.setLevel('DEBUG') if nreturn is None: nreturn = nkeep if len(newflux.shape) == 1: nobj = 1 npix = newflux.shape[0] else: nobj, npix = newflux.shape log.info("Building PCA from %d object spectra.", nobj) nzi = newivar.nonzero() first_nonzero = (np.arange(nobj, dtype=nzi[0].dtype), np.array([nzi[1][nzi[0] == k].min() for k in range(nobj)])) # # Construct the synthetic weight vector, to be used when replacing the # low-S/N object pixels with the reconstructions. # synwvec = np.ones((npix,), dtype='d') for ipix in range(npix): indx = newivar[:, ipix] != 0 if indx.any(): synwvec[ipix] = newivar[indx, ipix].mean() fluxdict = dict() # # If there is only one object spectrum, then all we can do is return it. # if nobj == 1: fluxdict['flux'] = newflux.astype('f') return fluxdict # # Rejection iteration loop. # qdone = 0 iiter = 0 # # Begin with all points good. # outmask = None inmask = newivar != 0 ymodel = None # emevecs, emevals = pydlutils.empca(newflux, inmask) # fluxdict['emevecs'] = emevecs # fluxdict['emevals'] = emeveals while qdone == 0 and iiter <= maxiter: log.debug('starting djs_reject') outmask, qdone = djs_reject(newflux, ymodel, inmask=inmask, outmask=outmask, invvar=newivar) log.debug('finished with djs_reject') # # Iteratively do the PCA solution # filtflux = newflux.copy() acoeff = np.zeros((nobj, nkeep), dtype='d') t0 = time.time() for ipiter in range(niter): # # We want to get these values from the pcomp routine. # # eigenval = 1 # coeff = 1 flux0 = np.tile(filtflux[first_nonzero], npix).reshape(npix, nobj).transpose() # flux0 = np.tile(filtflux, npix).reshape(npix, nobj).transpose() totflux = np.absolute(filtflux - flux0).sum(1) goodobj = totflux > 0 if goodobj.all(): tmp = pcomp(filtflux.T) # , standardize=True) pres = tmp.derived eigenval = tmp.eigenvalues else: tmp = pcomp(filtflux[goodobj, :].T) # , standardize=True) pres = np.zeros((nobj, npix), dtype='d') pres[goodobj, :] = tmp.derived eigenval = np.zeros((nobj,), dtype='d') eigenval[goodobj] = tmp.eigenvalues maskivar = newivar * outmask sqivar = np.sqrt(maskivar) for iobj in range(nobj): out = computechi2(newflux[iobj, :], sqivar[iobj, :], pres[:, 0:nkeep]) filtflux[iobj, :] = (maskivar[iobj, :] * newflux[iobj, :] + synwvec*out.yfit) / (maskivar[iobj, :] + synwvec) acoeff[iobj, :] = out.acoeff log.info("The elapsed time for iteration #%2d is %6.2f s.", ipiter+1, time.time()-t0) # # Now set ymodel for rejecting points. # ymodel = np.dot(acoeff, pres[:, 0:nkeep].T) iiter += 1 if nobj == 1: usemask = outmask else: usemask = outmask.sum(0) fluxdict['usemask'] = usemask fluxdict['outmask'] = outmask fluxdict['flux'] = pres[:, 0:nreturn].transpose().astype('f') fluxdict['eigenval'] = eigenval[0:nreturn] fluxdict['acoeff'] = acoeff return fluxdict
[docs]def plot_eig(filename, title='Unknown'): """Plot spectra from an eigenspectra/template file. Parameters ---------- filename : :class:`str` Name of a FITS file containing eigenspectra/templates. title : :class:`str`, optional Title to put on the plot. Raises ------ :exc:`ValueError` If an unknown template type was input in `filename`. """ # # Set title based on filename # if title == 'Unknown': if filename.find('Gal') > 0: title = 'Galaxies: Eigenspectra' elif filename.find('QSO') > 0: title = 'QSOs: Eigenspectra' elif filename.find('Star') > 0: title = 'Stars: Eigenspectra' elif filename.find('CVstar') > 0: title = 'CV Stars: Eigenspectra' else: raise ValueError('Unknown template type!') base, ext = filename.split('.') spectrum = fits.open(filename) newloglam0 = spectrum[0].header['COEFF0'] objdloglam = spectrum[0].header['COEFF1'] spectro_data = spectrum[0].data spectrum.close() (neig, ndata) = spectro_data.shape newloglam = np.arange(ndata) * objdloglam + newloglam0 lam = 10.0**newloglam fig = plt.figure(dpi=100) ax = fig.add_subplot(111) colorvec = ['k', 'r', 'g', 'b', 'm', 'c'] for l in range(neig): p = ax.plot(lam, spectro_data[l, :], colorvec[l % len(colorvec)]+'-', linewidth=1) ax.set_xlabel(r'Wavelength [$\AA$]') ax.set_ylabel('Flux [Arbitrary Units]') ax.set_title(title) # ax.set_xlim([3500.0,10000.0]) # ax.set_ylim([-400.0,500.0]) # fig.savefig(base+'.zoom.png') fig.savefig(base+'.png') plt.close(fig) return
[docs]def readspec(platein, mjd=None, fiber=None, **kwargs): """Read SDSS/BOSS spec2d & spec1d files. Parameters ---------- platein : :class:`int` or :class:`numpy.ndarray` Plate number(s). mjd : :class:`int` or :class:`numpy.ndarray`, optional MJD numbers. If not provided, they will be calculated by :func:`latest_mjd`. fiber : array-like, optional Fibers to read. If not set, all fibers from all plates will be returned. topdir : :class:`str`, optional Override the value of :envvar:`BOSS_SPECTRO_REDUX`. run2d : :class:`str`, optional Override the value of :envvar:`RUN2D`. run1d : :class:`str`, optional Override the value of :envvar:`RUN1D`. path : :class:`str`, optional Override all path information with this directory name. align : :class:`bool`, optional If set, align all the spectra in wavelength. znum : :class:`int`, optional If set, return the znum-th best fit reshift fit, instead of the best. Returns ------- :class:`dict` A dictionary containing the data read. """ try: nplate = len(platein) plate = platein except TypeError: nplate = 1 plate = np.array([platein], dtype='i4') if 'run2d' in kwargs: run2d = kwargs['run2d'] else: run2d = os.environ['RUN2D'] if 'run1d' in kwargs: run1d = kwargs['run1d'] else: run1d = os.environ['RUN1D'] if fiber is None: # # Read all fibers # nfibers = number_of_fibers(plate, **kwargs) total_fibers = nfibers.sum() platevec = np.zeros(total_fibers, dtype='i4') fibervec = np.zeros(total_fibers, dtype='i4') k = 0 for p in np.unique(plate): n = np.unique(nfibers[plate == p])[0] platevec[k:k+n] = p fibervec[k:k+n] = np.arange(n) + 1 k += n else: try: nfiber = len(fiber) except TypeError: nfiber = 1 if nplate > 1 and nfiber > 1 and nplate != nfiber: raise TypeError("Plate & Fiber must have the same length!") if nplate > 1: platevec = np.array(plate, dtype='i4') else: platevec = np.zeros(nfiber, dtype='i4') + plate if nfiber > 1: fibervec = np.array(fiber, dtype='i4') else: fibervec = np.zeros(nplate, dtype='i4') + fiber if mjd is None: mjdvec = latest_mjd(platevec, **kwargs) else: try: nmjd = len(mjd) except TypeError: nmjd = 1 if nmjd != nplate: raise TypeError("Plate & MJD must have the same length!") mjdvec = np.zeros(nplate, dtype='i4') + mjd # # Now select unique plate-mjd combinations & read them # pmjd = ((np.array(platevec, dtype='u8') << 16) + np.array(mjdvec, dtype='u8')) # log.debug(pmjd) upmjd = np.unique(pmjd) zupmjd = list(zip(upmjd >> 16, upmjd & ((1 << 16) - 1))) # log.debug(zupmjd) spplate_data = dict() hdunames = ('flux', 'invvar', 'andmask', 'ormask', 'disp', 'plugmap', 'sky', 'loglam',) for thisplate, thismjd in zupmjd: # thisplate = int(p>>16) # thismjd = int(np.bitwise_and(p, (1<<16)-1)) pmjdindex = ((platevec == thisplate) & (mjdvec == thismjd)).nonzero()[0] thisfiber = fibervec[pmjdindex] # log.debug(type(thisplate), type(thismjd)) # log.debug(repr(thisfiber)) # log.debug(type(thisfiber)) pmjdstr = "{0:04d}-{1:05d}".format(int(thisplate), int(thismjd)) if 'path' in kwargs: sppath = [kwargs['path']] else: sppath = spec_path(thisplate, run2d=run2d) spfile = os.path.join(sppath[0], "spPlate-{0}.fits".format(pmjdstr)) log.info(spfile) spplate = fits.open(spfile) # # Get wavelength coefficients from primary header # npix = spplate[0].header['NAXIS1'] c0 = spplate[0].header['COEFF0'] c1 = spplate[0].header['COEFF1'] coeff0 = np.zeros(thisfiber.size, dtype='d') + c0 coeff1 = np.zeros(thisfiber.size, dtype='d') + c1 loglam0 = c0 + c1*np.arange(npix, dtype='d') loglam = np.resize(loglam0, (thisfiber.size, npix)) # # Read the data images # for k in range(len(hdunames)): if hdunames[k] == 'loglam': tmp = loglam else: try: tmp = spplate[k].data[thisfiber-1, :] except IndexError: tmp = spplate[k].data[thisfiber-1] if hdunames[k] not in spplate_data: if k == 0: allpmjdindex = pmjdindex allcoeff0 = coeff0 allcoeff1 = coeff1 # # Put the data into the return structure # if hdunames[k] == 'plugmap': spplate_data['plugmap'] = dict() for c in spplate[k].columns.names: spplate_data['plugmap'][c] = tmp[c] else: spplate_data[hdunames[k]] = tmp else: # # Append data # if k == 0: allpmjdindex = np.concatenate((allpmjdindex, pmjdindex)) if 'align' in kwargs: mincoeff0 = min(allcoeff0) if mincoeff0 == 0 and coeff0[0] > 0: allcoeff0 = coeff0[0] allcoeff1 = coeff1[1] if mincoeff0 > 0 and coeff0[0] == 0: coeff0 = mincoeff0 coeff1 = allcoeff1[0] ps = np.floor((coeff0[0] - mincoeff0)/coeff1[0] + 0.5) if ps > 0: coeff0 = coeff0 - ps*coeff1 else: allcoeff0 = allcoeff0 + ps*allcoeff1 else: ps = 0 allcoeff0 = np.concatenate((allcoeff0, coeff0)) allcoeff1 = np.concatenate((allcoeff1, coeff1)) if hdunames[k] == 'plugmap': for c in spplate[5].columns.names: spplate_data['plugmap'][c] = np.concatenate( (spplate_data['plugmap'][c], tmp[c])) else: spplate_data[hdunames[k]] = spec_append(spplate_data[hdunames[k]], tmp, pixshift=ps) spplate.close() # # Read photoPlate information, if available # photofile = os.path.join(sppath[0], "photoPlate-{0}.fits".format(pmjdstr)) if not os.path.exists(photofile): # # Hmm, maybe this is an SDSS-I,II plate # photofile = os.path.join(os.environ['SPECTRO_MATCH'], run2d, os.path.basename(os.environ['PHOTO_RESOLVE']), "{0:04d}".format(int(thisplate)), "photoPlate-{0}.fits".format(pmjdstr)) if os.path.exists(photofile): photop = fits.open(photofile) tmp = photop[1].data[thisfiber-1] if 'tsobj' not in spplate_data: spplate_data['tsobj'] = dict() for c in photop[1].columns.names: spplate_data['tsobj'][c] = tmp[c] else: for c in photop[1].columns.names: spplate_data['tsobj'][c] = np.concatenate( (spplate_data['tsobj'][c], tmp[c])) photop.close() # # Read redshift information, if available. # if 'znum' in kwargs: zfile = os.path.join(sppath[0], run1d, "spZall-{0}.fits".format(pmjdstr)) else: zfile = os.path.join(sppath[0], run1d, "spZbest-{0}.fits".format(pmjdstr)) if os.path.exists(zfile): spz = fits.open(zfile) if 'znum' in kwargs: nper = spz[0].header['DIMS0'] zfiber = (thisfiber-1)*nper + kwargs['znum'] - 1 else: zfiber = thisfiber tmp = spz[1].data[zfiber-1] if 'zans' not in spplate_data: spplate_data['zans'] = dict() for c in spz[1].columns.names: spplate_data['zans'][c] = tmp[c] else: for c in spz[1].columns.names: spplate_data['zans'][c] = np.concatenate( (spplate_data['zans'][c], tmp[c])) spz.close() # # Reorder the data. At this point allpmjdindex is an index for which # fiber[allpmjdindex] == spplate['plugmap']['FIBERID'], so we have to # reverse this mapping. # j = allpmjdindex.argsort() for k in spplate_data: if isinstance(spplate_data[k], dict): for c in spplate_data[k]: if spplate_data[k][c].ndim == 2: spplate_data[k][c] = spplate_data[k][c][j, :] else: spplate_data[k][c] = spplate_data[k][c][j] else: spplate_data[k] = spplate_data[k][j, :] allcoeff0 = allcoeff0[j] allcoeff1 = allcoeff1[j] # # If necessary, recompute the wavelengths # nfibers, npixmax = spplate_data['flux'].shape if 'align' in kwargs: loglam0 = allcoeff0[0] + allcoeff1[1]*np.arange(npixmax, dtype='d') spplate_data['loglam'] = np.resize(loglam0, (nfibers, npixmax)) return spplate_data
[docs]def skymask(invvar, andmask, ormask=None, ngrow=2): """Mask regions where sky-subtraction errors are expected to dominate. Parameters ---------- invvar : :class:`numpy.ndarray` Inverse variance. andmask : :class:`numpy.ndarray` An "and" mask. For historical reasons, this input is ignored. ormask : :class:`numpy.ndarray`, optional An "or" mask. Although technically this is optional, if it is not supplied, this function will have no effect. ngrow : :class:`int`, optional Expand bad areas by this number of pixels. Returns ------- :class:`numpy.ndarray` The `invvar` multiplied by the bad areas. """ from ..pydlutils.sdss import sdss_flagval from .. import smooth nrows, npix = invvar.shape badmask = np.zeros(invvar.shape, dtype='i4') badskychi = sdss_flagval('SPPIXMASK', 'BADSKYCHI') redmonster = sdss_flagval('SPPIXMASK', 'REDMONSTER') # brightsky = sdss_flagval('SPPIXMASK', 'BRIGHTSKY') if ormask is not None: badmask = badmask | ((ormask & badskychi) != 0) badmask = badmask | ((ormask & redmonster) != 0) # badmask = badmask | ((andmask & brightsky) != 0) if ngrow > 0: width = 2*ngrow + 1 for k in range(nrows): badmask[k, :] = smooth(badmask[k, :]*width, width, True) > 0 return invvar * (1 - badmask)
[docs]def spec_append(spec1, spec2, pixshift=0): """Append the array spec2 to the array spec1 & return a new array. If the dimension of these arrays is the same, then append as [spec1,spec2]. If not, increase the size of the smaller array & fill with zeros. Parameters ---------- spec1, spec2 : :class:`numpy.ndarray` Append `spec2` to `spec1`. pixshift : :class:`int`, optional If `pixshift` is set to a positive integer, `spec2` will be padded with `pixshift` zeros on the left side. If `pixshift` is set to a negative integer, `spec1` will be padded with ``abs(pixshift)`` zeros on the left side. If not set, all zeros will be padded on the right side. Returns ------- :class:`numpy.ndarray` A new array containing both `spec1` and `spec2`. """ nrows1, npix1 = spec1.shape nrows2, npix2 = spec2.shape nrows = nrows1+nrows2 nadd1 = 0 nadd2 = 0 if pixshift != 0: if pixshift < 0: nadd1 = -pixshift else: nadd2 = pixshift maxpix = max(npix1 + nadd1, npix2 + nadd2) spec3 = np.zeros((nrows, maxpix), dtype=spec1.dtype) spec3[0:nrows1, nadd1:nadd1+npix1] = spec1 spec3[nrows1:nrows, nadd2:nadd2+npix2] = spec2 return spec3
[docs]def spec_path(plate, path=None, topdir=None, run2d=None): """Return the directory containing spPlate files. Parameters ---------- plate : :class:`int` or :class:`numpy.ndarray` The plate(s) to examine. path : :class:`str`, optional If set, `path` becomes the full path for every plate. In other words, it completely short-circuits this function. topdir : :class:`str`, optional Used to override the value of :envvar:`BOSS_SPECTRO_REDUX`. run2d : :class:`str`, optional Used to override the value of :envvar:`RUN2D`. Returns ------- :class:`list` A list of directories, one for each plate. Raises ------ :exc:`KeyError` If environment variables are not supplied. """ if isinstance(plate, (int,)) or plate.shape == (): platevec = np.array([plate], dtype='i4') else: platevec = plate if path is None: if run2d is None: run2d = os.environ['RUN2D'] if topdir is None: env = "SPECTRO_REDUX" try: ir = int(run2d) except ValueError: env = 'BOSS_SPECTRO_REDUX' topdir = os.environ[env] paths = list() for p in platevec: if path is not None: paths.append(path) else: paths.append(os.path.join(topdir, run2d, '{0:04d}'.format(p))) return paths
[docs]def preprocess_spectra(flux, ivar, loglam=None, zfit=None, aesthetics='mean', newloglam=None, wavemin=None, wavemax=None, verbose=False): """Handle the processing of input spectra through the :func:`~pydl.pydlspec2d.spec2d.combine1fiber` stage. Parameters ---------- flux : array-like The input spectral flux. ivar : array-like The inverse variance of the spectral flux. loglam : array-like, optional The input wavelength solution. zfit : array-like, optional The redshift of each input spectrum. aesthetics : :class:`str`, optional This parameter will be passed to :func:`~pydl.pydlspec2d.spec2d.combine1fiber`. newloglam : array-like, optional The output wavelength solution. wavemin : :class:`float`, optional Minimum wavelength if `newloglam` is not specified. wavemax : :class:`float`, optional Maximum wavelength if `newloglam` is not specified. verbose : :class:`bool`, optional If ``True``, print extra information. Returns ------- :class:`tuple` of :class:`numpy.ndarray` The resampled flux, inverse variance and wavelength solution, respectively. """ from .spec2d import combine1fiber if verbose: log.setLevel('DEBUG') if len(flux.shape) == 1: nobj = 1 npix = flux.shape[0] else: nobj, npix = flux.shape # # The redshift of each object in pixels would be logshift/objdloglam. # if zfit is None: logshift = np.zeros((nobj,), dtype=flux.dtype) else: logshift = np.log10(1.0 + zfit) # # Determine the new wavelength mapping. # if loglam is None: if newloglam is None: raise ValueError("newloglam must be set if loglam is not!") return (flux, ivar, newloglam) else: if newloglam is None: igood = loglam != 0 dloglam = loglam[1] - loglam[0] logmin = loglam[igood].min() - logshift.max() logmax = loglam[igood].max() - logshift.min() if wavemin is not None: logmin = max(logmin, np.log10(wavemin)) if wavemax is not None: logmax = min(logmax, np.log10(wavemax)) fullloglam = wavevector(logmin, logmax, binsz=dloglam) else: fullloglam = newloglam dloglam = fullloglam[1] - fullloglam[0] nnew = fullloglam.size fullflux = np.zeros((nobj, nnew), dtype='d') fullivar = np.zeros((nobj, nnew), dtype='d') # # Shift each spectrum to z = 0 and sample at the output wavelengths # if loglam.ndim == 1: indx = loglam > 0 rowloglam = loglam[indx] for iobj in range(nobj): log.info("OBJECT %5d", iobj) if loglam.ndim > 1: if loglam.shape[0] != nobj: raise ValueError('Wrong number of dimensions for loglam.') indx = loglam[iobj, :] > 0 rowloglam = loglam[iobj, indx] flux1, ivar1 = combine1fiber(rowloglam-logshift[iobj], flux[iobj, indx], fullloglam, objivar=ivar[iobj, indx], binsz=dloglam, aesthetics=aesthetics, verbose=verbose) fullflux[iobj, :] = flux1 fullivar[iobj, :] = ivar1 return (fullflux, fullivar, fullloglam)
[docs]def template_metadata(inputfile, verbose=False): """Read template metadata from file. Parameters ---------- inputfile : :class:`str` Name of a Parameter file containing the input data and metadata. verbose : :class:`bool`, optional If ``True``, print lots of extra information. Returns ------- :class:`tuple` A tuple containing the list of input spectra and a dictionary containing other metadata. """ from ..pydlutils.yanny import yanny if verbose: log.setLevel('DEBUG') if not os.path.exists(inputfile): raise Pydlspec2dException("Could not find {0}!".format(inputfile)) log.debug("Reading input data from %s.", inputfile) par = yanny(inputfile) required_metadata = {'object': str, 'method': str, 'aesthetics': str, 'run2d': str, 'run1d': str, 'wavemin': float, 'wavemax': float, 'snmax': float, 'niter': int, 'nkeep': int, 'minuse': int} metadata = dict() for key in required_metadata: try: metadata[key] = required_metadata[key](par[key]) log.debug('%s = %s', key, par[key]) except KeyError: raise KeyError('The {0} keyword was not found in {1}!'.format(key, inputfile)) except ValueError: raise ValueError('The {0} keyword has invalid value, {0}!'.format(key, par[key])) slist = par['EIGENOBJ'] for r in ('run2d', 'run1d'): try: metadata['orig_'+r] = os.environ[r.upper()] except KeyError: metadata['orig_'+r] = None os.environ[r.upper()] = metadata[r] if metadata['method'].lower() == 'hmf': required_hmf_metadata = {'nonnegative': lambda x: bool(int(x)), 'epsilon': float} for key in required_hmf_metadata: try: metadata[key] = required_hmf_metadata[key](par[key]) except KeyError: raise KeyError('The {0} keyword was not found in {1}!'.format(key, inputfile)) except ValueError: raise ValueError('The {0} keyword has invalid value, {0}!'.format(key, par[key])) return (slist, metadata)
[docs]def template_input(inputfile, dumpfile, flux=False, verbose=False): """Collect spectra and pass them to PCA or HMF solvers to compute spectral templates. This function replaces the various ``PCA_GAL()``, ``PCA_STAR()``, etc., functions from idlspec2d. Parameters ---------- inputfile : :class:`str` Name of a Parameter file containing the input data and metadata. dumpfile : :class:`str` Name of a Pickle file used to store intermediate data. flux : :class:`bool`, optional If ``True``, plot the individual input spectra. verbose : :class:`bool`, optional If ``True``, print lots of extra information. """ import pickle from astropy.constants import c as cspeed from .. import uniq from .. import __version__ as pydl_version from ..goddard.astro import get_juldate from ..pydlutils.image import djs_maskinterp from ..pydlutils.math import djs_median # # Logging # if verbose: log.setLevel('DEBUG') # # Read metadata. # slist, metadata = template_metadata(inputfile) # # Name the output files. # jd = get_juldate() outfile = "spEigen{0}-{1:d}".format(metadata['object'].title(), int(jd - 2400000.5)) # # Read the input spectra # if os.path.exists(dumpfile): log.info("Loading data from %s.", dumpfile) with open(dumpfile) as f: inputflux = pickle.load(f) newflux = inputflux['newflux'] newivar = inputflux['newivar'] newloglam = inputflux['newloglam'] else: if metadata['object'].lower() == 'star': spplate = readspec(slist.plate, mjd=slist.mjd, fiber=slist.fiberid, align=True) else: spplate = readspec(slist.plate, mjd=slist.mjd, fiber=slist.fiberid) # # Insist that all of the requested spectra exist. # missing = spplate['plugmap']['FIBERID'] == 0 if missing.any(): imissing = missing.nonzero()[0] for k in imissing: log.error("Missing plate=%d mjd=%d fiberid=%d", slist.plate[k], slist.mjd[k], slist.fiberid[k]) raise ValueError("{0:d} missing object(s).".format(missing.sum())) # # Do not fit where the spectrum may be dominated by sky-sub residuals. # objinvvar = skymask(spplate['invvar'], spplate['andmask'], spplate['ormask']) ifix = spplate['flux']**2 * objinvvar > metadata['snmax']**2 if ifix.any(): objinvvar[ifix.nonzero()] = (metadata['snmax']/spplate['flux'][ifix.nonzero()])**2 # # Set the new wavelength mapping here. If the binsz keyword is not set, # then bin size is determined from the first spectrum returned by readspec. # This is fine in the case where all spectra have the same bin size # (though their starting wavelengths may differ). However, this may not # be a safe assumption in the future. # try: objdloglam = float(par['binsz']) except: objdloglam = spplate['loglam'][0, 1] - spplate['loglam'][0, 0] if metadata['object'].lower() == 'star': newloglam = spplate['loglam'][0, :] else: newloglam = wavevector(np.log10(metadata['wavemin']), np.log10(metadata['wavemax']), binsz=objdloglam) try: zfit = slist.zfit except AttributeError: zfit = slist.cz/cspeed.to('km / s').value # # Shift to common wavelength grid. # newflux, newivar, newloglam = preprocess_spectra(spplate['flux'], objinvvar, loglam=spplate['loglam'], zfit=zfit, newloglam=newloglam, aesthetics=metadata['aesthetics'], verbose=verbose) # # Dump input fluxes to a file for debugging purposes. # if not os.path.exists(dumpfile): with open(dumpfile, 'w') as f: inputflux = {'newflux': newflux, 'newivar': newivar, 'newloglam': newloglam} pickle.dump(inputflux, f) # # Solve. # if metadata['object'].lower() == 'qso': pcaflux = template_qso(metadata, newflux, newivar, verbose) elif metadata['object'].lower() == 'star': pcaflux = template_star(metadata, newloglam, newflux, newivar, slist, outfile, verbose) else: if metadata['method'].lower() == 'pca': pcaflux = pca_solve(newflux, newivar, niter=metadata['niter'], nkeep=metadata['nkeep'], verbose=verbose) elif metadata['method'].lower() == 'hmf': hmf = HMF(newflux, newivar, K=metadata['nkeep'], n_iter=metadata['niter'], nonnegative=metadata['nonnegative'], epsilon=metadata['epsilon'], verbose=verbose) pcaflux = hmf.solve() else: raise ValueError("Unknown method: {0}!".format(metadata['method'])) pcaflux['newflux'] = newflux pcaflux['newivar'] = newivar pcaflux['newloglam'] = newloglam # # Fill in bad data with a running median of the good data. # The presence of boundary='nearest' means that this code snippet # was never meant to be called! In other words it should always # be the case that qgood.all() is True. # if 'usemask' in pcaflux: qgood = pcaflux['usemask'] >= metadata['minuse'] if not qgood.all(): warn("Would have triggered djs_median replacement!", Pydlspec2dUserWarning) if False: medflux = np.zeros(pcaflux['flux'].shape, dtype=pcaflux['flux'].dtype) for i in range(metadata['nkeep']): medflux[i, qgood] = djs_median(pcaflux['flux'][i, qgood], width=51, boundary='nearest') medflux[i, :] = djs_maskinterp(medflux[i, :], ~qgood, const=True) pcaflux['flux'][:, ~qgood] = medflux[:, ~qgood] # # Make plots # colorvec = ['k', 'r', 'g', 'b', 'm', 'c'] smallfont = FontProperties(size='xx-small') nspectra = pcaflux['newflux'].shape[0] # # Plot input spectra # if flux: nfluxes = 30 separation = 5.0 nplots = nspectra/nfluxes if nspectra % nfluxes > 0: nplots += 1 for k in range(nplots): istart = k*nfluxes iend = min(istart+nfluxes, nspectra) - 1 fig = plt.figure(dpi=100) ax = fig.add_subplot(111) for l in range(istart, iend+1): p = ax.plot(10.0**pcaflux['newloglam'], pcaflux['newflux'][l, :] + separation*(l % nfluxes), colorvec[l % len(colorvec)]+'-', linewidth=1) ax.set_xlabel(r'Wavelength [$\AA$]') ax.set_ylabel(r'Flux [$\mathsf{10^{-17} erg\, cm^{-2} s^{-1} \AA^{-1}}$] + Constant') ax.set_title('Input Spectra {0:04d}-{1:04d}'.format(istart+1, iend+1)) ax.set_ylim(pcaflux['newflux'][istart, :].min(), pcaflux['newflux'][iend-1, :].max()+separation*(nfluxes-1)) fig.savefig('{0}.flux.{1:04d}-{2:04d}.png'.format(outfile, istart+1, iend+1)) plt.close(fig) # # Missing data diagnostic. # fig = plt.figure(dpi=100) ax = fig.add_subplot(111) p = ax.plot(10.0**pcaflux['newloglam'], (pcaflux['newivar'] == 0).sum(0)/float(nspectra), 'k-') ax.set_xlabel(r'Wavelength [$\AA$]') ax.set_ylabel('Fraction of spectra with missing data') ax.set_title('Missing Data') ax.grid(True) fig.savefig(outfile+'.missing.png') plt.close(fig) # # usemask diagnostic # if 'usemask' in pcaflux: fig = plt.figure(dpi=100) ax = fig.add_subplot(111) p = ax.semilogy(10.0**pcaflux['newloglam'][pcaflux['usemask'] > 0], pcaflux['usemask'][pcaflux['usemask'] > 0], 'k-', 10.0**pcaflux['newloglam'], np.zeros(pcaflux['newloglam'].shape, dtype=pcaflux['newloglam'].dtype) + metadata['minuse'], 'k--') ax.set_xlabel(r'Wavelength [$\AA$]') ax.set_ylabel('Usemask') ax.set_title('UseMask') ax.grid(True) fig.savefig(outfile+'.usemask.png') plt.close(fig) # # This type of figure isn't really meaningful for stars. # if metadata['object'].lower() != 'star': aratio10 = pcaflux['acoeff'][:, 1]/pcaflux['acoeff'][:, 0] aratio20 = pcaflux['acoeff'][:, 2]/pcaflux['acoeff'][:, 0] aratio30 = pcaflux['acoeff'][:, 3]/pcaflux['acoeff'][:, 0] fig = plt.figure(dpi=100) ax = fig.add_subplot(111) p = ax.plot(aratio10, aratio20, marker='None', linestyle='None') for k in range(len(aratio10)): t = ax.text(aratio10[k], aratio20[k], '{0:04d}-{1:04d}'.format(slist.plate[k], slist.fiberid[k]), horizontalalignment='center', verticalalignment='center', color=colorvec[k % len(colorvec)], fontproperties=smallfont) # ax.set_xlim([aratio10.min(), aratio10.max]) # ax.set_xlim([aratio20.min(), aratio20.max]) ax.set_xlabel('Eigenvalue Ratio, $a_1/a_0$') ax.set_ylabel('Eigenvalue Ratio, $a_2/a_0$') ax.set_title('Eigenvalue Ratios') fig.savefig(outfile+'.a2_v_a1.png') plt.close(fig) fig = plt.figure(dpi=100) ax = fig.add_subplot(111) p = ax.plot(aratio20, aratio30, marker='None', linestyle='None') for k in range(len(aratio10)): t = ax.text(aratio20[k], aratio30[k], '{0:04d}-{1:04d}'.format(slist.plate[k], slist.fiberid[k]), horizontalalignment='center', verticalalignment='center', color=colorvec[k % len(colorvec)], fontproperties=smallfont) # ax.set_xlim([aratio10.min(), aratio10.max]) # ax.set_xlim([aratio20.min(), aratio20.max]) ax.set_xlabel('Eigenvalue Ratio, $a_2/a_0$') ax.set_ylabel('Eigenvalue Ratio, $a_3/a_0$') ax.set_title('Eigenvalue Ratios') fig.savefig(outfile+'.a3_v_a2.png') plt.close(fig) # # Save output to FITS file. # if os.path.exists(outfile+'.fits'): os.remove(outfile+'.fits') hdu0 = fits.PrimaryHDU(pcaflux['flux']) objtypes = {'gal': 'GALAXY', 'qso': 'QSO', 'star': 'STAR'} if not pydl_version: pydl_version = 'git' hdu0.header['OBJECT'] = (objtypes[metadata['object']], 'Type of template') hdu0.header['COEFF0'] = (pcaflux['newloglam'][0], 'Wavelength zeropoint') hdu0.header['COEFF1'] = (pcaflux['newloglam'][1]-pcaflux['newloglam'][0], 'Delta wavelength') hdu0.header['IDLUTILS'] = ('pydl-{0}'.format(pydl_version), 'Version of idlutils') hdu0.header['SPEC2D'] = ('pydl-{0}'.format(pydl_version), 'Version of idlspec2d') hdu0.header['RUN2D'] = (os.environ['RUN2D'], 'Version of 2d reduction') hdu0.header['RUN1D'] = (os.environ['RUN1D'], 'Version of 1d reduction') hdu0.header['FILENAME'] = (inputfile, 'Input file') hdu0.header['METHOD'] = (metadata['method'].upper(), 'Method used') if metadata['method'].lower() == 'hmf': hdu0.header['NONNEG'] = (metadata['nonnegative'], 'Was nonnegative HMF used?') hdu0.header['EPSILON'] = (metadata['epsilon'], 'Regularization parameter used.') # for i in range(len(namearr)): # hdu0.header["NAME{0:d}".format(i)] = namearr[i]+' ' c = [fits.Column(name='plate', format='J', array=slist.plate), fits.Column(name='mjd', format='J', array=slist.mjd), fits.Column(name='fiberid', format='J', array=slist.fiberid)] if metadata['object'].lower() == 'star': c.append(fits.Column(name='cz', format='D', unit='km/s', array=slist.cz)) for i, name in enumerate(pcaflux['namearr']): hdu0.header['NAME{0:d}'.format(i)] = (name, 'Name of class {0:d}.'.format(i)) else: c.append(fits.Column(name='zfit', format='D', array=slist.zfit)) hdu1 = fits.BinTableHDU.from_columns(fits.ColDefs(c)) hdulist = fits.HDUList([hdu0, hdu1]) hdulist.writeto(outfile+'.fits') if metadata['object'].lower() != 'star': plot_eig(outfile+'.fits') # # Clean up # for r in ('run2d', 'run1d'): if metadata['orig_'+r] is None: del os.environ[r.upper()] else: os.environ[r.upper()] = metadata['orig_'+r] return
[docs]def template_qso(metadata, newflux, newivar, verbose=False): """Run PCA or HMF on QSO spectra. Historically, QSO templates were comptuted one at a time instead of all at once. Parameters ---------- metadata : :class:`dict` Dictionary containing metadata about the spectra. newflux : :class:`~numpy.ndarray` Flux shifted onto common wavelength. newivar : :class:`~numpy.ndarray` Inverse variances of the fluxes. verbose : :class:`bool`, optional If ``True``, print lots of extra information. Returns ------- :class:`dict` A dictonary containing flux, eigenvalues, etc. """ from ..pydlutils.math import computechi2 if metadata['object'].lower() != 'qso': raise Pydlspec2dException("You appear to be passing the wrong kind of object to template_qso()!") if len(newflux.shape) == 1: nobj = 1 npix = newflux.shape[0] else: nobj, npix = newflux.shape objflux = newflux.copy() for ikeep in range(metadata['nkeep']): log.info("Solving for eigencomponent #%d of %d", ikeep+1, nkeep) if metadata['method'].lower() == 'pca': pcaflux1 = pca_solve(objflux, newivar, niter=metadata['niter'], nkeep=1, verbose=verbose) elif metadata['method'].lower() == 'hmf': hmf = HMF(objflux, newivar, K=metadata['nkeep'], n_iter=metadata['niter'], nonnegative=metadata['nonnegative'], epsilon=metadata['epsilon'], verbose=verbose) pcaflux1 = hmf.solve() else: raise ValueError("Unknown method: {0}!".format(metadata['method'])) if ikeep == 0: # # Create new pcaflux dict # pcaflux = dict() for k in pcaflux1: pcaflux[k] = pcaflux1[k].copy() else: # # Add to existing dict # # for k in pcaflux1: # pcaflux[k] = np.vstack((pcaflux[k],pcaflux1[k])) pcaflux['flux'] = np.vstack((pcaflux['flux'], pcaflux1['flux'])) pcaflux['eigenval'] = np.concatenate((pcaflux['eigenval'], pcaflux1['eigenval'])) # # Re-solve for the coefficients using all PCA components so far # acoeff = np.zeros((nobj, ikeep+1), dtype=pcaflux1['acoeff'].dtype) for iobj in range(nobj): out = computechi2(newflux[iobj, :], np.sqrt(pcaflux1['newivar'][iobj, :]), pcaflux['flux'].T) acoeff[iobj, :] = out['acoeff'] # # Prevent re-binning of spectra on subsequent calls to pca_solve() # # objloglam = None if ikeep == 0: objflux = newflux - np.outer(acoeff, pcaflux['flux']) else: objflux = newflux - np.dot(acoeff, pcaflux['flux']) # objflux = newflux - np.outer(acoeff,pcaflux['flux']) # objinvvar = pcaflux1['newivar'] pcaflux['acoeff'] = acoeff return pcaflux
[docs]def template_star(metadata, newloglam, newflux, newivar, slist, outfile, verbose=False): """Run PCA or HMF on stellar spectra of various classes. Parameters ---------- metadata : :class:`dict` Dictionary containing metadata about the spectra. newloglam : :class:`~numpy.ndarray` The wavelength array, used only for plots. newflux : :class:`~numpy.ndarray` Flux shifted onto common wavelength. newivar : :class:`~numpy.ndarray` Inverse variances of the fluxes. slist : :class:`~numpy.recarray` The list of objects, containing stellar class information. outfile : :class:`str` The base name of output file, used for plots. verbose : :class:`bool`, optional If ``True``, print lots of extra information. Returns ------- :class:`dict` A dictonary containing flux, eigenvalues, etc. """ from .. import uniq from ..pydlutils.image import djs_maskinterp if metadata['object'].lower() != 'star': raise Pydlspec2dException("You appear to be passing the wrong kind of object to template_star()!") # # Find the list of unique star types # isort = np.argsort(slist['class']) classlist = slist['class'][isort[uniq(slist['class'][isort])]] # # Loop over each star type # npix, nstars = newflux.shape pcaflux = dict() pcaflux['namearr'] = list() for c in classlist: # # Find the subclasses for this stellar type # log.info("Finding eigenspectra for Stellar class %s.", c) indx = (slist['class'] == c).nonzero()[0] nindx = indx.size thesesubclass = slist['subclass'][indx] isort = np.argsort(thesesubclass) subclasslist = thesesubclass[isort[uniq(thesesubclass[isort])]] nsubclass = subclasslist.size # # Solve for 2 eigencomponents if we have specified subclasses for # this stellar type # if nsubclass == 1: nkeep = 1 else: nkeep = 2 if metadata['method'].lower() == 'pca': pcaflux1 = pca_solve(newflux[indx, :], newivar[indx, :], niter=metadata['niter'], nkeep=nkeep, verbose=verbose) elif metadata['method'].lower() == 'hmf': hmf = HMF(newflux[indx, :], newivar[indx, :], K=metadata['nkeep'], n_iter=metadata['niter'], nonnegative=metadata['nonnegative'], epsilon=metadata['epsilon'], verbose=verbose) pcaflux1 = hmf.solve() else: raise ValueError("Unknown method: {0}!".format(metadata['method'])) # # Some star templates are generated from only one spectrum, # and these will not have a usemask set. # if 'usemask' not in pcaflux1: pcaflux1['usemask'] = np.zeros((npix,), dtype='i4') + nindx # # Interpolate over bad flux values in the middle of a spectrum, # and set fluxes to zero at the blue+red ends of the spectrum # # minuse = 1 # ? minuse = np.floor((nindx+1) / 3.0) qbad = pcaflux1['usemask'] < minuse # # Interpolate over all bad pixels # for j in range(nkeep): pcaflux1['flux'][j, :] = djs_maskinterp(pcaflux1['flux'][j, :], qbad, const=True) # # Set bad pixels at the very start or end of the spectrum to zero # instead. # npix = qbad.size igood = (~qbad).nonzero()[0] if qbad[0]: pcaflux1['flux'][:, 0:igood[0]-1] = 0 if qbad[npix-1]: pcaflux1['flux'][:, igood[::-1][0]+1:npix] = 0 # # Re-normalize the first eigenspectrum to a mean of 1 # norm = pcaflux1['flux'][0, :].mean() pcaflux1['flux'] /= norm if 'acoeff' in pcaflux1: pcaflux1['acoeff'] *= norm # # Now loop through each stellar subclass and reconstruct # an eigenspectrum for that subclass # thesesubclassnum = np.zeros(thesesubclass.size, dtype='i4') colorvec = ['k', 'r', 'g', 'b', 'm', 'c'] smallfont = FontProperties(size='xx-small') fig = plt.figure(dpi=100) ax = fig.add_subplot(111) for isub in range(nsubclass): ii = (thesesubclass == subclasslist[isub]).nonzero()[0] thesesubclassnum[ii] = isub if nkeep == 1: thisflux = pcaflux1['flux'][0, :] else: aratio = pcaflux1['acoeff'][ii, 1]/pcaflux1['acoeff'][ii, 0] # # np.median(foo) is equivalent to MEDIAN(foo,/EVEN) # thisratio = np.median(aratio) thisflux = (pcaflux1['flux'][0, :] + thisratio.astype('f') * pcaflux1['flux'][1, :]) # # Plot spectra # plotflux = thisflux/thisflux.max() ax.plot(10.0**newloglam, plotflux, "{0}-".format(colorvec[isub % len(colorvec)]), linewidth=1) if isub == 0: ax.set_xlabel(r'Wavelength [$\AA$]') ax.set_ylabel('Flux [arbitrary units]') ax.set_title('STAR {0}: Eigenspectra Reconstructions'.format(c)) t = ax.text(10.0**newloglam[-1], plotflux[-1], subclasslist[isub], horizontalalignment='right', verticalalignment='center', color=colorvec[isub % len(colorvec)], fontproperties=smallfont) fig.savefig(outfile+'.{0}.png'.format(c)) plt.close(fig) if 'flux' in pcaflux: pcaflux['flux'] = np.vstack((pcaflux['flux'], thisflux)) # pcaflux['acoeff'] = np.vstack((pcaflux['acoeff'], pcaflux1['acoeff'])) # pcaflux['usemask'] = np.vstack((pcaflux['usemask'], pcaflux1['usemask'])) else: pcaflux['flux'] = thisflux # pcaflux['acoeff'] = pcaflux1['acoeff'] # pcaflux['usemask'] = pcaflux1['usemask'] pcaflux['namearr'].append(subclasslist[isub]) return pcaflux
[docs]def template_input_main(): # pragma: no cover """Entry point for the compute_templates script. Returns ------- :class:`int` An integer suitable for passing to :func:`sys.exit`. """ # # Imports for main() # import sys from argparse import ArgumentParser # Get home directory in platform-independent way home_dir = os.path.expanduser('~') # # Get Options # parser = ArgumentParser(description="Compute spectral templates.", prog=os.path.basename(sys.argv[0])) parser.add_argument('-d', '--dump', action='store', dest='dump', metavar='FILE', default=os.path.join(home_dir, 'scratch', 'templates', 'compute_templates.dump'), help='Dump data to a pickle file (default: %(default)s).') parser.add_argument('-F', '--flux', action='store_true', dest='flux', help='Plot input spectra.') parser.add_argument('-f', '--file', action='store', dest='inputfile', metavar='FILE', default=os.path.join(home_dir, 'scratch', 'templates', 'compute_templates.par'), help='Read input spectra and redshifts from FILE (default: %(default)s).') parser.add_argument('-v', '--verbose', action='store_true', dest='verbose', help='Print lots of extra information.') options = parser.parse_args() template_input(options.inputfile, options.dump, options.flux, options.verbose) return 0
[docs]def wavevector(minfullwave, maxfullwave, zeropoint=3.5, binsz=1.0e-4, wavemin=None): """Return an array of wavelengths. Parameters ---------- minfullwave : :class:`float` Minimum wavelength. maxfullwave : :class:`float` Maximum wavelength. zeropoint : :class:`float`, optional Offset of the input wavelength values. binsz : :class:`float`, optional Separation between wavelength values. wavemin : :class:`float`, optional If this is set the values of `minfullwave` and `zeropoint` are ignored. Returns ------- :class:`numpy.ndarray` Depending on the values of `minfullwave`, `binsz`, etc., the resulting array could be interpreted as an array of wavelengths or an array of log(wavelength). """ if wavemin is not None: spotmin = 0 spotmax = int((maxfullwave - wavemin)/binsz) wavemax = spotmax * binsz + wavemin else: spotmin = int((minfullwave - zeropoint)/binsz) + 1 spotmax = int((maxfullwave - zeropoint)/binsz) wavemin = spotmin * binsz + zeropoint wavemax = spotmax * binsz + zeropoint nfinalpix = spotmax - spotmin + 1 finalwave = np.arange(nfinalpix, dtype='d') * binsz + wavemin return finalwave