My Project
programmer's documentation
|
#include "cs_defs.h"
#include <math.h>
#include <stdio.h>
#include "cs_base.h"
#include "cs_parall.h"
#include "cs_blas.h"
Functions | |
static void | _thread_range (cs_lnum_t n, cs_lnum_t *s_id, cs_lnum_t *e_id) |
Compute array index bounds for a local thread. More... | |
static void | _sbloc_sizes (cs_lnum_t n, cs_lnum_t block_size, cs_lnum_t *n_sblocks, cs_lnum_t *blocks_in_sblocks) |
Compute blocks sizes for superblock algorithm. More... | |
static double | _cs_dot_superblock (cs_lnum_t n, const cs_real_t *x, const cs_real_t *y) |
Return the dot product of 2 vectors: x.y using a superblock algorithm. More... | |
static double | _cs_dot_xx_superblock (cs_lnum_t n, const cs_real_t *x) |
Return dot products of a vector with itself: x.x using a superblock algorithm. More... | |
static void | _cs_dot_xx_xy_superblock (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, double *xx, double *xy) |
Return 2 dot products of 2 vectors: x.x, and x.y using a superblock algorithm. More... | |
static void | _cs_dot_xy_yz_superblock (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xy, double *yz) |
Return 2 dot products of 3 vectors: x.x, and y.z using a superblock algorithm. More... | |
static void | _cs_dot_xx_xy_yz_superblock (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xx, double *xy, double *yz) |
Return 3 dot products of 3 vectors: x.x, x.y, and y.z using a superblock algorithm. More... | |
static void | _cs_dot_xx_yy_xy_xz_yz_superblock (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xx, double *yy, double *xy, double *xz, double *yz) |
Return 5 dot products of 3 vectors: x.x, y.y, x.y, x.z, and y.z using a superblock algorithm. More... | |
static double | _cs_gres_superblock (cs_lnum_t n, const cs_real_t *vol, const cs_real_t *x, const cs_real_t *y) |
Return the global residual of 2 extensive vectors: 1/sum(vol) . sum(X.Y.vol) using a superblock algorithm. More... | |
static double | _cs_dot_kahan (cs_lnum_t n, const cs_real_t *x, const cs_real_t *y) |
Return the dot product of 2 vectors: x.y using Kahan summation. More... | |
static double | _cs_dot_xx_kahan (cs_lnum_t n, const cs_real_t *x) |
Return the dot product of 2 vectors: x.x using Kahan summation. More... | |
static void | _cs_dot_xx_xy_kahan (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, double *xx, double *xy) |
Return 2 dot products of 2 vectors: x.x, and x.y using Kahan summation. More... | |
static void | _cs_dot_xy_yz_kahan (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xy, double *yz) |
Return 2 dot products of 3 vectors: x.x, and y.z using Kahan summation. More... | |
static void | _cs_dot_xx_xy_yz_kahan (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xx, double *xy, double *yz) |
Return 3 dot products of 3 vectors: x.x, x.y, and y.z using Kahan summation. More... | |
static void | _cs_dot_xx_yy_xy_xz_yz_kahan (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xx, double *yy, double *xy, double *xz, double *yz) |
Return 5 dot products of 3 vectors: x.x, y.y, x.y, x.z, and y.z using Kahan summation. More... | |
static double | _cs_gres_kahan (cs_lnum_t n, const cs_real_t *vol, const cs_real_t *x, const cs_real_t *y) |
Return the global residual of 2 extensive vectors: 1/sum(vol) . sum(X.Y.vol) using Kahan summation. More... | |
void | cs_blas_set_reduce_algorithm (cs_blas_reduce_t mode) |
Set the preferred BLAS reduction algorithm family. More... | |
void | cs_axpy (cs_lnum_t n, double a, const cs_real_t *x, cs_real_t *restrict y) |
Constant times a vector plus a vector: y <– ax + y. More... | |
double | cs_sum (cs_lnum_t n, const cs_real_t *x) |
double | cs_weighted_sum (cs_lnum_t n, const cs_real_t *w, const cs_real_t *x) |
double | cs_dot (cs_lnum_t n, const cs_real_t *x, const cs_real_t *y) |
Return the dot product of 2 vectors: x.y. More... | |
double | cs_dot_xx (cs_lnum_t n, const cs_real_t *x) |
Return dot products of a vector with itself: x.x. More... | |
double | cs_dot_wxx (cs_lnum_t n, const cs_real_t *w, const cs_real_t *x) |
void | cs_dot_xx_xy (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, double *xx, double *xy) |
Return 2 dot products of 2 vectors: x.x, and x.y. More... | |
void | cs_dot_xy_yz (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xy, double *yz) |
Return 2 dot products of 3 vectors: x.y, and y.z. More... | |
void | cs_dot_xx_xy_yz (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xx, double *xy, double *yz) |
Return 3 dot products of 3 vectors: x.x, x.y, and y.z. More... | |
void | cs_dot_xx_yy_xy_xz_yz (cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xx, double *yy, double *xy, double *xz, double *yz) |
Return 5 dot products of 3 vectors: x.x, y.y, x.y, x.z, and y.z. More... | |
double | cs_gdot (cs_lnum_t n, const cs_real_t *x, const cs_real_t *y) |
Return the global dot product of 2 vectors: x.y. More... | |
double | cs_gres (cs_lnum_t n, const cs_real_t *vol, const cs_real_t *x, const cs_real_t *y) |
Return the global residual of 2 extensive vectors: 1/sum(vol) . sum(X.Y/vol) More... | |
Variables | |
static cs_dot_t * | _cs_glob_dot = _cs_dot_superblock |
static cs_dot_xx_t * | _cs_glob_dot_xx = _cs_dot_xx_superblock |
static cs_dot_xx_xy_t * | _cs_glob_dot_xx_xy = _cs_dot_xx_xy_superblock |
static cs_dot_xy_yz_t * | _cs_glob_dot_xy_yz = _cs_dot_xy_yz_superblock |
static cs_dot_xx_xy_yz_t * | _cs_glob_dot_xx_xy_yz = _cs_dot_xx_xy_yz_superblock |
static cs_dot_xx_yy_xy_xz_yz_t * | _cs_glob_dot_xx_yy_xy_xz_yz = _cs_dot_xx_yy_xy_xz_yz_superblock |
static cs_gres_t * | _cs_glob_gres = _cs_gres_superblock |
BLAS (Basic Linear Algebra Subroutines) type functions
Return the dot product of 2 vectors: x.y using Kahan summation.
[in] | n | size of arrays x and y |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
Return the dot product of 2 vectors: x.y using a superblock algorithm.
[in] | n | size of arrays x and y |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
Return the dot product of 2 vectors: x.x using Kahan summation.
[in] | n | size of arrays x and y |
[in] | x | array of floating-point values |
Return dot products of a vector with itself: x.x using a superblock algorithm.
[in] | n | size of array x |
[in] | x | array of floating-point values |
|
static |
Return 2 dot products of 2 vectors: x.x, and x.y using Kahan summation.
The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.
[in] | n | size of arrays x and y |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
[out] | xx | x.x dot product |
[out] | xy | x.y dot product |
|
static |
Return 2 dot products of 2 vectors: x.x, and x.y using a superblock algorithm.
The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.
[in] | n | size of arrays x and y |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
[out] | xx | x.x dot product |
[out] | xy | x.y dot product |
|
static |
Return 3 dot products of 3 vectors: x.x, x.y, and y.z using Kahan summation.
The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.
For better precision, a superblock algorithm is used.
[in] | n | size of arrays x and y, and z |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
[in] | z | array of floating-point values |
[out] | xx | x.x dot product |
[out] | xy | x.y dot product |
[out] | yz | y.z dot product |
|
static |
Return 3 dot products of 3 vectors: x.x, x.y, and y.z using a superblock algorithm.
The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.
For better precision, a superblock algorithm is used.
[in] | n | size of arrays x and y, and z |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
[in] | z | array of floating-point values |
[out] | xx | x.x dot product |
[out] | xy | x.y dot product |
[out] | yz | y.z dot product |
|
static |
Return 5 dot products of 3 vectors: x.x, y.y, x.y, x.z, and y.z using Kahan summation.
The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.
[in] | n | size of arrays x and y, and z |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
[in] | z | array of floating-point values |
[out] | xx | x.x dot product |
[out] | yy | y.y dot product |
[out] | xy | x.y dot product |
[out] | xz | x.z dot product |
[out] | yz | y.z dot product |
|
static |
Return 5 dot products of 3 vectors: x.x, y.y, x.y, x.z, and y.z using a superblock algorithm.
The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.
[in] | n | size of arrays x and y, and z |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
[in] | z | array of floating-point values |
[out] | xx | x.x dot product |
[out] | yy | y.y dot product |
[out] | xy | x.y dot product |
[out] | xz | x.z dot product |
[out] | yz | y.z dot product |
|
static |
Return 2 dot products of 3 vectors: x.x, and y.z using Kahan summation.
The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.
[in] | n | size of arrays x and y, and z |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
[in] | z | array of floating-point values |
[out] | xy | x.y dot product |
[out] | yz | y.z dot product |
|
static |
Return 2 dot products of 3 vectors: x.x, and y.z using a superblock algorithm.
The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.
[in] | n | size of arrays x and y, and z |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
[in] | z | array of floating-point values |
[out] | xy | x.y dot product |
[out] | yz | y.z dot product |
|
static |
Return the global residual of 2 extensive vectors: 1/sum(vol) . sum(X.Y.vol) using Kahan summation.
In parallel mode, the local results are summed on the default global communicator.
[in] | n | size of arrays x and y |
[in] | vol | array of floating-point values |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
|
static |
Return the global residual of 2 extensive vectors: 1/sum(vol) . sum(X.Y.vol) using a superblock algorithm.
In parallel mode, the local results are summed on the default global communicator.
[in] | n | size of arrays x and y |
[in] | vol | array of floating-point values |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
|
inlinestatic |
Compute blocks sizes for superblock algorithm.
[in] | n | size of array |
[in] | block_size | block size |
[out] | n_sblocks | number of superblocks |
[out] | blocks_in_sblocks | number of blocks per superblock |
Compute array index bounds for a local thread.
When called inside an OpenMP parallel section, this will return the start an past-the-end indexes for the array range assigned to that thread. In other cases, the start index is 1, and the past-the-end index is n;
[in] | n | size of array |
[out] | s_id | start index for the current thread |
[out] | e_id | past-the-end index for the current thread |
Constant times a vector plus a vector: y <– ax + y.
[in] | n | size of arrays x and y |
[in] | a | multiplier for x |
[in] | x | array of floating-point values |
[in,out] | y | array of floating-point values |
void cs_blas_set_reduce_algorithm | ( | cs_blas_reduce_t | mode | ) |
Set the preferred BLAS reduction algorithm family.
This may not be enforced for all algorithms, though it should at least be enforced for the most general functions such as cs_dot.
[in] | mode | BLAS mode to use |
Return the dot product of 2 vectors: x.y.
[in] | n | size of arrays x and y |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
Return dot products of a vector with itself: x.x.
For better precision, a superblock algorithm is used.
[in] | n | size of array x |
[in] | x | array of floating-point values |
void cs_dot_xx_xy | ( | cs_lnum_t | n, |
const cs_real_t *restrict | x, | ||
const cs_real_t *restrict | y, | ||
double * | xx, | ||
double * | xy | ||
) |
Return 2 dot products of 2 vectors: x.x, and x.y.
The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.
[in] | n | size of arrays x and y |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
[out] | xx | x.x dot product |
[out] | xy | x.y dot product |
void cs_dot_xx_xy_yz | ( | cs_lnum_t | n, |
const cs_real_t *restrict | x, | ||
const cs_real_t *restrict | y, | ||
const cs_real_t *restrict | z, | ||
double * | xx, | ||
double * | xy, | ||
double * | yz | ||
) |
Return 3 dot products of 3 vectors: x.x, x.y, and y.z.
The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.
[in] | n | size of arrays x and y, and z |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
[in] | z | array of floating-point values |
[out] | xx | x.x dot product |
[out] | xy | x.y dot product |
[out] | yz | y.z dot product |
void cs_dot_xx_yy_xy_xz_yz | ( | cs_lnum_t | n, |
const cs_real_t *restrict | x, | ||
const cs_real_t *restrict | y, | ||
const cs_real_t *restrict | z, | ||
double * | xx, | ||
double * | yy, | ||
double * | xy, | ||
double * | xz, | ||
double * | yz | ||
) |
Return 5 dot products of 3 vectors: x.x, y.y, x.y, x.z, and y.z.
The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.
[in] | n | size of arrays x and y, and z |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
[in] | z | array of floating-point values |
[out] | xx | x.x dot product |
[out] | yy | y.y dot product |
[out] | xy | x.y dot product |
[out] | xz | x.z dot product |
[out] | yz | y.z dot product |
void cs_dot_xy_yz | ( | cs_lnum_t | n, |
const cs_real_t *restrict | x, | ||
const cs_real_t *restrict | y, | ||
const cs_real_t *restrict | z, | ||
double * | xy, | ||
double * | yz | ||
) |
Return 2 dot products of 3 vectors: x.y, and y.z.
The products could be computed separately, but computing them simultaneously adds more optimization opportunities and possibly better cache behavior.
[in] | n | size of arrays x and y, and z |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
[in] | z | array of floating-point values |
[out] | xy | x.y dot product |
[out] | yz | y.z dot product |
Return the global dot product of 2 vectors: x.y.
In parallel mode, the local results are summed on the default global communicator.
For better precision, a superblock algorithm is used.
[in] | n | size of arrays x and y |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
Return the global residual of 2 extensive vectors: 1/sum(vol) . sum(X.Y/vol)
In parallel mode, the local results are summed on the default global communicator.
[in] | n | size of arrays x and y |
[in] | vol | array of floating-point values |
[in] | x | array of floating-point values |
[in] | y | array of floating-point values |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |