doxygen/src/cs__blas_8h_source.html

#ifndef __CS_BLAS_H__

#define __CS_BLAS_H__


/*============================================================================

 * BLAS (Basic Linear Algebra Subroutine) functions

 *============================================================================*/


/*

  This file is part of Code_Saturne, a general-purpose CFD tool.


  Copyright (C) 1998-2019 EDF S.A.


  This program is free software; you can redistribute it and/or modify it under

  the terms of the GNU General Public License as published by the Free Software

  Foundation; either version 2 of the License, or (at your option) any later

  version.


  This program is distributed in the hope that it will be useful, but WITHOUT

  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more

  details.


  You should have received a copy of the GNU General Public License along with

  this program; if not, write to the Free Software Foundation, Inc., 51 Franklin

  Street, Fifth Floor, Boston, MA 02110-1301, USA.

*/


/*----------------------------------------------------------------------------*/


#include "cs_defs.h"


/*----------------------------------------------------------------------------

 * External library headers

 *----------------------------------------------------------------------------*/


/*----------------------------------------------------------------------------

 *  Local headers

 *----------------------------------------------------------------------------*/


#include "cs_base.h"


/*----------------------------------------------------------------------------*/


BEGIN_C_DECLS


/*============================================================================

 * Macro definitions

 *============================================================================*/


/*============================================================================

 * Type definitions

 *============================================================================*/


/* BLAS reduction algorithm families */


typedef enum {


  CS_BLAS_REDUCE_SUPERBLOCK,

  CS_BLAS_REDUCE_KAHAN


} cs_blas_reduce_t;


/*============================================================================

 *  Public function prototypes

 *============================================================================*/


/*----------------------------------------------------------------------------*/

/*----------------------------------------------------------------------------*/


void

cs_blas_set_reduce_algorithm(cs_blas_reduce_t  mode);


/*----------------------------------------------------------------------------

 * Constant times a vector plus a vector: y <-- ax + y

 *

 * parameters:

 *   n <-- size of arrays x and y

 *   a <-- multiplier for x

 *   x <-- array of floating-point values

 *   y <-- array of floating-point values

 *----------------------------------------------------------------------------*/


void

cs_axpy(cs_lnum_t         n,

        double            a,

        const cs_real_t  *x,

        cs_real_t        *restrict y);


/*----------------------------------------------------------------------------

 * Return the sum of a vector. For better precision, a superblock algorithm

 * is used.

 *

 * parameters:

 *   n <-- size of array x

 *   x <-- array of floating-point values

 *

 * returns:

 *   the resulting sum

 *----------------------------------------------------------------------------*/


double

cs_sum(cs_lnum_t         n,

       const cs_real_t  *x);


/*----------------------------------------------------------------------------

 * Return the weighted sum of a vector. For better precision, a superblock

 * algorithm is used.

 *

 * \param[in]  n  size of array x

 * \param[in]  w  array of floating-point weights

 * \param[in]  x  array of floating-point values

 *

 * \return the resulting weighted sum

 *----------------------------------------------------------------------------*/


double

cs_weighted_sum(cs_lnum_t         n,

                const cs_real_t  *w,

                const cs_real_t  *x);


/*----------------------------------------------------------------------------

 * Return the dot product of 2 vectors: x.y

 *

 * parameters:

 *   n <-- size of arrays x and y

 *   x <-- array of floating-point values

 *   y <-- array of floating-point values

 *

 * returns:

 *   dot product

 *----------------------------------------------------------------------------*/


double

cs_dot(cs_lnum_t         n,

       const cs_real_t  *x,

       const cs_real_t  *y);


/*----------------------------------------------------------------------------

 * Return dot products of a vector with itself: x.x

 *

 * parameters:

 *   n  <-- size of arrays x

 *   x  <-- array of floating-point values

 *

 * returns:

 *   dot product

 *----------------------------------------------------------------------------*/


double

cs_dot_xx(cs_lnum_t         n,

          const cs_real_t  *x);


/*----------------------------------------------------------------------------

 * Return weighted dot products of a vector with itself: x.x

 *

 * For better precision, a superblock algorithm is used.

 *

 * parameters:

 *   n  <-- size of arrays x

 *   w  <-- array of weights

 *   x  <-- array of floating-point values

 *

 * returns:

 *   dot product

 *----------------------------------------------------------------------------*/


double

cs_dot_wxx(cs_lnum_t         n,

           const cs_real_t  *w,

           const cs_real_t  *x);


/*----------------------------------------------------------------------------

 * Return the double dot product of 2 vectors: x.x, and x.y

 *

 * The products could be computed separately, but computing them

 * simultaneously adds more optimization opportunities and possibly better

 * cache behavior.

 *

 * parameters:

 *   n  <-- size of arrays x and y

 *   x  <-- array of floating-point values

 *   y  <-- array of floating-point values

 *   xx --> x.x dot product

 *   xy --> x.y dot product

 *----------------------------------------------------------------------------*/


void

cs_dot_xx_xy(cs_lnum_t                    n,

             const cs_real_t  *restrict   x,

             const cs_real_t  *restrict   y,

             double                      *xx,

             double                      *xy);


/*----------------------------------------------------------------------------

 * Return the double dot product of 3 vectors: x.y, and y.z

 *

 * The products could be computed separately, but computing them

 * simultaneously adds more optimization opportunities and possibly better

 * cache behavior.

 *

 * parameters:

 *   n  <-- size of arrays x and y

 *   x  <-- array of floating-point values

 *   y  <-- array of floating-point values

 *   z  <-- array of floating-point values

 *   xy --> x.y dot product

 *   yz --> x.z dot product

 *----------------------------------------------------------------------------*/


void

cs_dot_xy_yz(cs_lnum_t                    n,

             const cs_real_t  *restrict   x,

             const cs_real_t  *restrict   y,

             const cs_real_t  *restrict   z,

             double                      *xx,

             double                      *xy);


/*----------------------------------------------------------------------------

 * Return 3 dot products of 3 vectors: x.x, x.y, and y.z

 *

 * The products could be computed separately, but computing them

 * simultaneously adds more optimization opportunities and possibly better

 * cache behavior.

 *

 * parameters:

 *   n  <-- size of arrays x and y

 *   x  <-- array of floating-point values

 *   y  <-- array of floating-point values

 *   z  <-- array of floating-point values

 *   xx --> x.y dot product

 *   xy --> x.y dot product

 *   yz --> y.z dot product

 *----------------------------------------------------------------------------*/


void

cs_dot_xx_xy_yz(cs_lnum_t                    n,

                const cs_real_t  *restrict   x,

                const cs_real_t  *restrict   y,

                const cs_real_t  *restrict   z,

                double                      *xx,

                double                      *xy,

                double                      *yz);


/*----------------------------------------------------------------------------

 * Return 5 dot products of 3 vectors: x.x, y.y, x.y, x.z, and y.z

 *

 * The products could be computed separately, but computing them

 * simultaneously adds more optimization opportunities and possibly better

 * cache behavior.

 *

 * parameters:

 *   n  <-- size of arrays x and y

 *   x  <-- array of floating-point values

 *   y  <-- array of floating-point values

 *   z  <-- array of floating-point values

 *   xx --> x.y dot product

 *   yy --> y.y dot product

 *   xy --> x.y dot product

 *   xz --> x.z dot product

 *   yz --> y.z dot product

 *----------------------------------------------------------------------------*/


void

cs_dot_xx_yy_xy_xz_yz(cs_lnum_t                    n,

                      const cs_real_t  *restrict   x,

                      const cs_real_t  *restrict   y,

                      const cs_real_t  *restrict   z,

                      double                      *xx,

                      double                      *yy,

                      double                      *xy,

                      double                      *xz,

                      double                      *yz);


/*----------------------------------------------------------------------------

 * Return the global dot product of 2 vectors: x.y

 *

 * In parallel mode, the local results are summed on the default

 * global communicator.

 *

 * parameters:

 *   n <-- size of arrays x and y

 *   x <-- array of floating-point values

 *   y <-- array of floating-point values

 *

 * returns:

 *   dot product

 *----------------------------------------------------------------------------*/


double

cs_gdot(cs_lnum_t         n,

        const cs_real_t  *x,

        const cs_real_t  *y);


/*----------------------------------------------------------------------------

 * Return the global residual of 2 extensive vectors:

 *  1/sum(vol) . sum(X.Y/vol)

 *

 * parameters:

 *   n   <-- size of arrays x and y

 *   vol <-- array of floating-point values

 *   x   <-- array of floating-point values

 *   y   <-- array of floating-point values

 *

 * returns:

 *   global residual

 *----------------------------------------------------------------------------*/


double

cs_gres(cs_lnum_t         n,

        const cs_real_t  *vol,

        const cs_real_t  *x,

        const cs_real_t  *y);


/*----------------------------------------------------------------------------*/


END_C_DECLS


#endif /* __CS_BLAS_H__ */