My Project
programmer's documentation
Functions | Variables
cs_blas.c File Reference
#include "cs_defs.h"
#include <math.h>
#include <stdio.h>
#include "cs_base.h"
#include "cs_parall.h"
#include "cs_blas.h"
Include dependency graph for cs_blas.c:

Functions

static void _thread_range (cs_lnum_t n, cs_lnum_t *s_id, cs_lnum_t *e_id)
 Compute array index bounds for a local thread. More...
 
static void _sbloc_sizes (cs_lnum_t n, cs_lnum_t block_size, cs_lnum_t *n_sblocks, cs_lnum_t *blocks_in_sblocks)
 Compute blocks sizes for superblock algorithm. More...
 
static double _cs_dot_superblock (cs_lnum_t n, const cs_real_t *x, const cs_real_t *y)
 Return the dot product of 2 vectors: x.y using a superblock algorithm. More...
 
static double _cs_dot_xx_superblock (cs_lnum_t n, const cs_real_t *x)
 Return dot products of a vector with itself: x.x using a superblock algorithm. More...
 
static void _cs_dot_xx_xy_superblock (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, double *xx, double *xy)
 Return 2 dot products of 2 vectors: x.x, and x.y using a superblock algorithm. More...
 
static void _cs_dot_xy_yz_superblock (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xy, double *yz)
 Return 2 dot products of 3 vectors: x.x, and y.z using a superblock algorithm. More...
 
static void _cs_dot_xx_xy_yz_superblock (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xx, double *xy, double *yz)
 Return 3 dot products of 3 vectors: x.x, x.y, and y.z using a superblock algorithm. More...
 
static void _cs_dot_xx_yy_xy_xz_yz_superblock (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xx, double *yy, double *xy, double *xz, double *yz)
 Return 5 dot products of 3 vectors: x.x, y.y, x.y, x.z, and y.z using a superblock algorithm. More...
 
static double _cs_gres_superblock (cs_lnum_t n, const cs_real_t *vol, const cs_real_t *x, const cs_real_t *y)
 Return the global residual of 2 extensive vectors: 1/sum(vol) . sum(X.Y.vol) using a superblock algorithm. More...
 
static double _cs_dot_kahan (cs_lnum_t n, const cs_real_t *x, const cs_real_t *y)
 Return the dot product of 2 vectors: x.y using Kahan summation. More...
 
static double _cs_dot_xx_kahan (cs_lnum_t n, const cs_real_t *x)
 Return the dot product of 2 vectors: x.x using Kahan summation. More...
 
static void _cs_dot_xx_xy_kahan (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, double *xx, double *xy)
 Return 2 dot products of 2 vectors: x.x, and x.y using Kahan summation. More...
 
static void _cs_dot_xy_yz_kahan (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xy, double *yz)
 Return 2 dot products of 3 vectors: x.x, and y.z using Kahan summation. More...
 
static void _cs_dot_xx_xy_yz_kahan (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xx, double *xy, double *yz)
 Return 3 dot products of 3 vectors: x.x, x.y, and y.z using Kahan summation. More...
 
static void _cs_dot_xx_yy_xy_xz_yz_kahan (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xx, double *yy, double *xy, double *xz, double *yz)
 Return 5 dot products of 3 vectors: x.x, y.y, x.y, x.z, and y.z using Kahan summation. More...
 
static double _cs_gres_kahan (cs_lnum_t n, const cs_real_t *vol, const cs_real_t *x, const cs_real_t *y)
 Return the global residual of 2 extensive vectors: 1/sum(vol) . sum(X.Y.vol) using Kahan summation. More...
 
void cs_blas_set_reduce_algorithm (cs_blas_reduce_t mode)
 Set the preferred BLAS reduction algorithm family. More...
 
void cs_axpy (cs_lnum_t n, double a, const cs_real_t *x, cs_real_t *restrict y)
 Constant times a vector plus a vector: y <– ax + y. More...
 
double cs_sum (cs_lnum_t n, const cs_real_t *x)
 
double cs_weighted_sum (cs_lnum_t n, const cs_real_t *w, const cs_real_t *x)
 
double cs_dot (cs_lnum_t n, const cs_real_t *x, const cs_real_t *y)
 Return the dot product of 2 vectors: x.y. More...
 
double cs_dot_xx (cs_lnum_t n, const cs_real_t *x)
 Return dot products of a vector with itself: x.x. More...
 
double cs_dot_wxx (cs_lnum_t n, const cs_real_t *w, const cs_real_t *x)
 
void cs_dot_xx_xy (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, double *xx, double *xy)
 Return 2 dot products of 2 vectors: x.x, and x.y. More...
 
void cs_dot_xy_yz (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xy, double *yz)
 Return 2 dot products of 3 vectors: x.y, and y.z. More...
 
void cs_dot_xx_xy_yz (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xx, double *xy, double *yz)
 Return 3 dot products of 3 vectors: x.x, x.y, and y.z. More...
 
void cs_dot_xx_yy_xy_xz_yz (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xx, double *yy, double *xy, double *xz, double *yz)
 Return 5 dot products of 3 vectors: x.x, y.y, x.y, x.z, and y.z. More...
 
double cs_gdot (cs_lnum_t n, const cs_real_t *x, const cs_real_t *y)
 Return the global dot product of 2 vectors: x.y. More...
 
double cs_gres (cs_lnum_t n, const cs_real_t *vol, const cs_real_t *x, const cs_real_t *y)
 Return the global residual of 2 extensive vectors: 1/sum(vol) . sum(X.Y/vol) More...
 

Variables

static cs_dot_t * _cs_glob_dot = _cs_dot_superblock
 
static cs_dot_xx_t * _cs_glob_dot_xx = _cs_dot_xx_superblock
 
static cs_dot_xx_xy_t * _cs_glob_dot_xx_xy = _cs_dot_xx_xy_superblock
 
static cs_dot_xy_yz_t * _cs_glob_dot_xy_yz = _cs_dot_xy_yz_superblock
 
static cs_dot_xx_xy_yz_t * _cs_glob_dot_xx_xy_yz = _cs_dot_xx_xy_yz_superblock
 
static cs_dot_xx_yy_xy_xz_yz_t * _cs_glob_dot_xx_yy_xy_xz_yz = _cs_dot_xx_yy_xy_xz_yz_superblock
 
static cs_gres_t * _cs_glob_gres = _cs_gres_superblock
 

Detailed Description

BLAS (Basic Linear Algebra Subroutines) type functions

Function Documentation

◆ _cs_dot_kahan()

static double _cs_dot_kahan ( cs_lnum_t  n,
const cs_real_t x,
const cs_real_t y 
)
static

Return the dot product of 2 vectors: x.y using Kahan summation.

Parameters
[in]nsize of arrays x and y
[in]xarray of floating-point values
[in]yarray of floating-point values
Returns
dot product

◆ _cs_dot_superblock()

static double _cs_dot_superblock ( cs_lnum_t  n,
const cs_real_t x,
const cs_real_t y 
)
static

Return the dot product of 2 vectors: x.y using a superblock algorithm.

Parameters
[in]nsize of arrays x and y
[in]xarray of floating-point values
[in]yarray of floating-point values
Returns
dot product

◆ _cs_dot_xx_kahan()

static double _cs_dot_xx_kahan ( cs_lnum_t  n,
const cs_real_t x 
)
static

Return the dot product of 2 vectors: x.x using Kahan summation.

Parameters
[in]nsize of arrays x and y
[in]xarray of floating-point values
Returns
dot product

◆ _cs_dot_xx_superblock()

static double _cs_dot_xx_superblock ( cs_lnum_t  n,
const cs_real_t x 
)
static

Return dot products of a vector with itself: x.x using a superblock algorithm.

Parameters
[in]nsize of array x
[in]xarray of floating-point values
Returns
dot product

◆ _cs_dot_xx_xy_kahan()

static void _cs_dot_xx_xy_kahan ( cs_lnum_t  n,
const cs_real_t *restrict  x,
const cs_real_t *restrict  y,
double *  xx,
double *  xy 
)
static

Return 2 dot products of 2 vectors: x.x, and x.y using Kahan summation.

The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.

Parameters
[in]nsize of arrays x and y
[in]xarray of floating-point values
[in]yarray of floating-point values
[out]xxx.x dot product
[out]xyx.y dot product

◆ _cs_dot_xx_xy_superblock()

static void _cs_dot_xx_xy_superblock ( cs_lnum_t  n,
const cs_real_t *restrict  x,
const cs_real_t *restrict  y,
double *  xx,
double *  xy 
)
static

Return 2 dot products of 2 vectors: x.x, and x.y using a superblock algorithm.

The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.

Parameters
[in]nsize of arrays x and y
[in]xarray of floating-point values
[in]yarray of floating-point values
[out]xxx.x dot product
[out]xyx.y dot product

◆ _cs_dot_xx_xy_yz_kahan()

static void _cs_dot_xx_xy_yz_kahan ( cs_lnum_t  n,
const cs_real_t *restrict  x,
const cs_real_t *restrict  y,
const cs_real_t *restrict  z,
double *  xx,
double *  xy,
double *  yz 
)
static

Return 3 dot products of 3 vectors: x.x, x.y, and y.z using Kahan summation.

The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.

For better precision, a superblock algorithm is used.

Parameters
[in]nsize of arrays x and y, and z
[in]xarray of floating-point values
[in]yarray of floating-point values
[in]zarray of floating-point values
[out]xxx.x dot product
[out]xyx.y dot product
[out]yzy.z dot product

◆ _cs_dot_xx_xy_yz_superblock()

static void _cs_dot_xx_xy_yz_superblock ( cs_lnum_t  n,
const cs_real_t *restrict  x,
const cs_real_t *restrict  y,
const cs_real_t *restrict  z,
double *  xx,
double *  xy,
double *  yz 
)
static

Return 3 dot products of 3 vectors: x.x, x.y, and y.z using a superblock algorithm.

The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.

For better precision, a superblock algorithm is used.

Parameters
[in]nsize of arrays x and y, and z
[in]xarray of floating-point values
[in]yarray of floating-point values
[in]zarray of floating-point values
[out]xxx.x dot product
[out]xyx.y dot product
[out]yzy.z dot product

◆ _cs_dot_xx_yy_xy_xz_yz_kahan()

static void _cs_dot_xx_yy_xy_xz_yz_kahan ( cs_lnum_t  n,
const cs_real_t *restrict  x,
const cs_real_t *restrict  y,
const cs_real_t *restrict  z,
double *  xx,
double *  yy,
double *  xy,
double *  xz,
double *  yz 
)
static

Return 5 dot products of 3 vectors: x.x, y.y, x.y, x.z, and y.z using Kahan summation.

The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.

Parameters
[in]nsize of arrays x and y, and z
[in]xarray of floating-point values
[in]yarray of floating-point values
[in]zarray of floating-point values
[out]xxx.x dot product
[out]yyy.y dot product
[out]xyx.y dot product
[out]xzx.z dot product
[out]yzy.z dot product

◆ _cs_dot_xx_yy_xy_xz_yz_superblock()

static void _cs_dot_xx_yy_xy_xz_yz_superblock ( cs_lnum_t  n,
const cs_real_t *restrict  x,
const cs_real_t *restrict  y,
const cs_real_t *restrict  z,
double *  xx,
double *  yy,
double *  xy,
double *  xz,
double *  yz 
)
static

Return 5 dot products of 3 vectors: x.x, y.y, x.y, x.z, and y.z using a superblock algorithm.

The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.

Parameters
[in]nsize of arrays x and y, and z
[in]xarray of floating-point values
[in]yarray of floating-point values
[in]zarray of floating-point values
[out]xxx.x dot product
[out]yyy.y dot product
[out]xyx.y dot product
[out]xzx.z dot product
[out]yzy.z dot product

◆ _cs_dot_xy_yz_kahan()

static void _cs_dot_xy_yz_kahan ( cs_lnum_t  n,
const cs_real_t *restrict  x,
const cs_real_t *restrict  y,
const cs_real_t *restrict  z,
double *  xy,
double *  yz 
)
static

Return 2 dot products of 3 vectors: x.x, and y.z using Kahan summation.

The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.

Parameters
[in]nsize of arrays x and y, and z
[in]xarray of floating-point values
[in]yarray of floating-point values
[in]zarray of floating-point values
[out]xyx.y dot product
[out]yzy.z dot product

◆ _cs_dot_xy_yz_superblock()

static void _cs_dot_xy_yz_superblock ( cs_lnum_t  n,
const cs_real_t *restrict  x,
const cs_real_t *restrict  y,
const cs_real_t *restrict  z,
double *  xy,
double *  yz 
)
static

Return 2 dot products of 3 vectors: x.x, and y.z using a superblock algorithm.

The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.

Parameters
[in]nsize of arrays x and y, and z
[in]xarray of floating-point values
[in]yarray of floating-point values
[in]zarray of floating-point values
[out]xyx.y dot product
[out]yzy.z dot product

◆ _cs_gres_kahan()

static double _cs_gres_kahan ( cs_lnum_t  n,
const cs_real_t vol,
const cs_real_t x,
const cs_real_t y 
)
static

Return the global residual of 2 extensive vectors: 1/sum(vol) . sum(X.Y.vol) using Kahan summation.

In parallel mode, the local results are summed on the default global communicator.

Parameters
[in]nsize of arrays x and y
[in]volarray of floating-point values
[in]xarray of floating-point values
[in]yarray of floating-point values
Returns
global residual

◆ _cs_gres_superblock()

static double _cs_gres_superblock ( cs_lnum_t  n,
const cs_real_t vol,
const cs_real_t x,
const cs_real_t y 
)
static

Return the global residual of 2 extensive vectors: 1/sum(vol) . sum(X.Y.vol) using a superblock algorithm.

In parallel mode, the local results are summed on the default global communicator.

Parameters
[in]nsize of arrays x and y
[in]volarray of floating-point values
[in]xarray of floating-point values
[in]yarray of floating-point values
Returns
global residual

◆ _sbloc_sizes()

static void _sbloc_sizes ( cs_lnum_t  n,
cs_lnum_t  block_size,
cs_lnum_t n_sblocks,
cs_lnum_t blocks_in_sblocks 
)
inlinestatic

Compute blocks sizes for superblock algorithm.

Parameters
[in]nsize of array
[in]block_sizeblock size
[out]n_sblocksnumber of superblocks
[out]blocks_in_sblocksnumber of blocks per superblock

◆ _thread_range()

static void _thread_range ( cs_lnum_t  n,
cs_lnum_t s_id,
cs_lnum_t e_id 
)
static

Compute array index bounds for a local thread.

When called inside an OpenMP parallel section, this will return the start an past-the-end indexes for the array range assigned to that thread. In other cases, the start index is 1, and the past-the-end index is n;

Parameters
[in]nsize of array
[out]s_idstart index for the current thread
[out]e_idpast-the-end index for the current thread

◆ cs_axpy()

void cs_axpy ( cs_lnum_t  n,
double  a,
const cs_real_t x,
cs_real_t *restrict  y 
)

Constant times a vector plus a vector: y <– ax + y.

Parameters
[in]nsize of arrays x and y
[in]amultiplier for x
[in]xarray of floating-point values
[in,out]yarray of floating-point values

◆ cs_blas_set_reduce_algorithm()

void cs_blas_set_reduce_algorithm ( cs_blas_reduce_t  mode)

Set the preferred BLAS reduction algorithm family.

This may not be enforced for all algorithms, though it should at least be enforced for the most general functions such as cs_dot.

Parameters
[in]modeBLAS mode to use

◆ cs_dot()

double cs_dot ( cs_lnum_t  n,
const cs_real_t x,
const cs_real_t y 
)

Return the dot product of 2 vectors: x.y.

Parameters
[in]nsize of arrays x and y
[in]xarray of floating-point values
[in]yarray of floating-point values
Returns
dot product

◆ cs_dot_wxx()

double cs_dot_wxx ( cs_lnum_t  n,
const cs_real_t w,
const cs_real_t x 
)

◆ cs_dot_xx()

double cs_dot_xx ( cs_lnum_t  n,
const cs_real_t x 
)

Return dot products of a vector with itself: x.x.

For better precision, a superblock algorithm is used.

Parameters
[in]nsize of array x
[in]xarray of floating-point values
Returns
dot product

◆ cs_dot_xx_xy()

void cs_dot_xx_xy ( cs_lnum_t  n,
const cs_real_t *restrict  x,
const cs_real_t *restrict  y,
double *  xx,
double *  xy 
)

Return 2 dot products of 2 vectors: x.x, and x.y.

The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.

Parameters
[in]nsize of arrays x and y
[in]xarray of floating-point values
[in]yarray of floating-point values
[out]xxx.x dot product
[out]xyx.y dot product

◆ cs_dot_xx_xy_yz()

void cs_dot_xx_xy_yz ( cs_lnum_t  n,
const cs_real_t *restrict  x,
const cs_real_t *restrict  y,
const cs_real_t *restrict  z,
double *  xx,
double *  xy,
double *  yz 
)

Return 3 dot products of 3 vectors: x.x, x.y, and y.z.

The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.

Parameters
[in]nsize of arrays x and y, and z
[in]xarray of floating-point values
[in]yarray of floating-point values
[in]zarray of floating-point values
[out]xxx.x dot product
[out]xyx.y dot product
[out]yzy.z dot product

◆ cs_dot_xx_yy_xy_xz_yz()

void cs_dot_xx_yy_xy_xz_yz ( cs_lnum_t  n,
const cs_real_t *restrict  x,
const cs_real_t *restrict  y,
const cs_real_t *restrict  z,
double *  xx,
double *  yy,
double *  xy,
double *  xz,
double *  yz 
)

Return 5 dot products of 3 vectors: x.x, y.y, x.y, x.z, and y.z.

The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.

Parameters
[in]nsize of arrays x and y, and z
[in]xarray of floating-point values
[in]yarray of floating-point values
[in]zarray of floating-point values
[out]xxx.x dot product
[out]yyy.y dot product
[out]xyx.y dot product
[out]xzx.z dot product
[out]yzy.z dot product

◆ cs_dot_xy_yz()

void cs_dot_xy_yz ( cs_lnum_t  n,
const cs_real_t *restrict  x,
const cs_real_t *restrict  y,
const cs_real_t *restrict  z,
double *  xy,
double *  yz 
)

Return 2 dot products of 3 vectors: x.y, and y.z.

The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.

Parameters
[in]nsize of arrays x and y, and z
[in]xarray of floating-point values
[in]yarray of floating-point values
[in]zarray of floating-point values
[out]xyx.y dot product
[out]yzy.z dot product

◆ cs_gdot()

double cs_gdot ( cs_lnum_t  n,
const cs_real_t x,
const cs_real_t y 
)

Return the global dot product of 2 vectors: x.y.

In parallel mode, the local results are summed on the default global communicator.

For better precision, a superblock algorithm is used.

Parameters
[in]nsize of arrays x and y
[in]xarray of floating-point values
[in]yarray of floating-point values
Returns
dot product

◆ cs_gres()

double cs_gres ( cs_lnum_t  n,
const cs_real_t vol,
const cs_real_t x,
const cs_real_t y 
)

Return the global residual of 2 extensive vectors: 1/sum(vol) . sum(X.Y/vol)

In parallel mode, the local results are summed on the default global communicator.

Parameters
[in]nsize of arrays x and y
[in]volarray of floating-point values
[in]xarray of floating-point values
[in]yarray of floating-point values
Returns
global residual

◆ cs_sum()

double cs_sum ( cs_lnum_t  n,
const cs_real_t x 
)

◆ cs_weighted_sum()

double cs_weighted_sum ( cs_lnum_t  n,
const cs_real_t w,
const cs_real_t x 
)

Variable Documentation

◆ _cs_glob_dot

cs_dot_t* _cs_glob_dot = _cs_dot_superblock
static

◆ _cs_glob_dot_xx

cs_dot_xx_t* _cs_glob_dot_xx = _cs_dot_xx_superblock
static

◆ _cs_glob_dot_xx_xy

cs_dot_xx_xy_t* _cs_glob_dot_xx_xy = _cs_dot_xx_xy_superblock
static

◆ _cs_glob_dot_xx_xy_yz

cs_dot_xx_xy_yz_t* _cs_glob_dot_xx_xy_yz = _cs_dot_xx_xy_yz_superblock
static

◆ _cs_glob_dot_xx_yy_xy_xz_yz

cs_dot_xx_yy_xy_xz_yz_t* _cs_glob_dot_xx_yy_xy_xz_yz = _cs_dot_xx_yy_xy_xz_yz_superblock
static

◆ _cs_glob_dot_xy_yz

cs_dot_xy_yz_t* _cs_glob_dot_xy_yz = _cs_dot_xy_yz_superblock
static

◆ _cs_glob_gres

cs_gres_t* _cs_glob_gres = _cs_gres_superblock
static