My Project
programmer's documentation
Performance tuning

Introduction

C user functions for performance tuning.

Several functions are present in the file, each specific to different performance tuning parameters.

Advanced mesh numbering

/* Force the target number of threads for mesh renumbering
(by default, OMP_NUM_THREADS if OpenMP is enabled, 1 otherwise) */
/* Set the minimum subset sizes when renumbering for threads. */
cs_renumber_set_min_subset_size(64, /* min. interior_subset_size */
64); /* min. boundary subset_size */
/* Select renumbering algorithms */
(false, /* halo_adjacent_cells_last */
false, /* halo_adjacent_i_faces_last */
CS_RENUMBER_ADJACENT_LOW, /* interior face base ordering */
CS_RENUMBER_CELLS_NONE, /* cells_pre_numbering */
CS_RENUMBER_CELLS_NONE, /* cells_numbering */
CS_RENUMBER_I_FACES_MULTIPASS, /* interior faces numbering */
CS_RENUMBER_B_FACES_THREAD, /* boundary faces numbering */
CS_RENUMBER_VERTICES_NONE); /* vertices numbering */

Advanced partitioning

Example 1

{
/* Example:
Force PT-SCOTCH or SCOTCH for preprocessing partitioning,
and Hilbert SFC for main partitioning;
Available algorithms (subject to build with external libraries for
SCOTCH and METIS) are:
CS_PARTITION_DEFAULT Default partitioning, based on stage
CS_PARTITION_SFC_MORTON_BOX Morton (Z) curve in bounding box
CS_PARTITION_SFC_MORTON_CUBE Morton (Z) curve in bounding cube
CS_PARTITION_SFC_HILBERT_BOX Peano-Hilbert curve in bounding box
CS_PARTITION_SFC_HILBERT_CUBE Peano-Hilbert curve in bounding cube
CS_PARTITION_SCOTCH PT-SCOTCH or SCOTCH
CS_PARTITION_METIS ParMETIS or METIS
CS_PARTITION_BLOCK Unoptimized (naive) block partitioning */
1, /* rank_step */
false); /* ignore periodicity in graph */
1, /* rank_step */
false); /* ignore periodicity in graph */
}

Example 2

{
/* Example: set partitioning write to file option.
*
* value of write flag: 0: never
* 1: for graph-based partitioning only (default)
* 2: always */
}

Example 3

{
/* Example: force activation/deactivation of initial partitioning
* for preprocessing. */
}

Example 4

{
/* Example: define list of extra partitionings to build.
*
* Partitionings in this list will be output to file, and may be used for
* subsequent calculations.
*
* When partitioning for both preprocessing and calculation stages, output to
* file of partioning data or generation of additional partitionings
* (see \ref cs_partition_add_partitions) will only be done for the
* second stage. */
int n_extra_partitions = 3;
int extra_partitions_list[] = {12, 24, 48};
cs_partition_add_partitions(n_extra_partitions, extra_partitions_list);
}

Parallel IO

#if defined(HAVE_MPI_IO) && MPI_VERSION > 1
/* Example fine-tune parallel IO settings.
Available distributed block access methods
(subject to build with MPI IO) are:
CS_FILE_STDIO_SERIAL Serial standard C IO
(funnelled through rank 0 in parallel)
CS_FILE_STDIO_PARALLEL Per-process standard C IO
CS_FILE_MPI_INDEPENDENT Non-collective MPI-IO
with independent file open and close
CS_FILE_MPI_NON_COLLECTIVE Non-collective MPI-IO
with collective file open and close
CS_FILE_MPI_COLLECTIVE Collective MPI-IO
*/
int block_rank_step = 8;
int block_min_size = 1024*1024*8;
/* Set MPI IO hints
(see MPI-IO or your filesystem documentation;
examples here may have no effect, improve, or degrade performance)
For LUSTRE filesystems, many articles in the literature seem
to recommend adjusting striping to improve performance.
If using ROMIO, useful hints for collective buffering and data-sieving
may take values: "enable", "disable", "automatic".
*/
MPI_Info_create(&hints);
MPI_Info_set(hints, "striping_factor", "8");
MPI_Info_set(hints, "striping_unit", "8388608");
MPI_Info_set(hints, "romio_cb_read", "automatic");
MPI_Info_set(hints, "romio_cb_write", "automatic");
MPI_Info_set(hints, "romio_ds_read", "automatic");
MPI_Info_set(hints, "romio_ds_write", "automatic");
/* Set default file acces methods and communicator stride */
MPI_Info_set(hints, "collective_buffering", "true");
MPI_Info_set(hints, "access_style", "read_once");
cs_file_set_default_comm(block_rank_step, block_min_size, cs_glob_mpi_comm);
MPI_Info_free(&hints);
#endif /* defined(HAVE_MPI_IO) && MPI_VERSION > 1 */

Matrix tuning

/* Activate tuning of matrix-vector operations */
/* Set tuning runs (defaults) */
cs_matrix_set_tuning_runs(10, /* n_min_products */
0.5); /* t_measure */
/* Activate tuning for selected matrix fill types. */
/* Force variant for selected types */
2,
"default");
/* Also allow tuning for multigrid for all expected levels
* (we rarely have more than 10 or 11 levels except for huge meshes). */
cs_partition_set_preprocess
void cs_partition_set_preprocess(bool active)
Activate or deactivate initial partitioning for preprocessing.
Definition: cs_partition.c:2970
CS_MATRIX_BLOCK_D
Definition: cs_matrix.h:71
CS_RENUMBER_I_FACES_MULTIPASS
Definition: cs_renumber.h:67
CS_FILE_MPI_COLLECTIVE
Definition: cs_file.h:91
cs_matrix_set_variant
void cs_matrix_set_variant(cs_matrix_fill_type_t fill_type, const cs_matrix_variant_t *mv)
Definition: cs_matrix_default.c:838
CS_RENUMBER_B_FACES_THREAD
Definition: cs_renumber.h:75
CS_PARTITION_MAIN
Definition: cs_partition.h:86
CS_FILE_MPI_INDIVIDUAL_POINTERS
Definition: cs_file.h:100
cs_file_set_default_comm
void cs_file_set_default_comm(int block_rank_step, int block_min_size, MPI_Comm comm)
Set default MPI communicator values for file access.
Definition: cs_file.c:3112
cs_grid_set_matrix_tuning
void cs_grid_set_matrix_tuning(cs_matrix_fill_type_t fill_type, int max_level)
Set matrix tuning behavior for multigrid coarse meshes.
Definition: cs_grid.c:6416
CS_PARTITION_FOR_PREPROCESS
Definition: cs_partition.h:85
cs_file_access_t
cs_file_access_t
Shared file access methods.
Definition: cs_file.h:84
cs_matrix_variant_t
struct _cs_matrix_variant_t cs_matrix_variant_t
Definition: cs_matrix.h:94
cs_renumber_set_n_threads
void cs_renumber_set_n_threads(int n_threads)
Set the target number of threads for mesh renumbering.
Definition: cs_renumber.c:5674
CS_RENUMBER_CELLS_NONE
Definition: cs_renumber.h:60
cs_matrix_set_tuning
void cs_matrix_set_tuning(cs_matrix_fill_type_t fill_type, int tune)
Definition: cs_matrix_default.c:865
cs_matrix_variant_destroy
void cs_matrix_variant_destroy(cs_matrix_variant_t **mv)
Destroy a matrix variant structure.
Definition: cs_matrix.c:7515
cs_glob_mesh
cs_mesh_t * cs_glob_mesh
cs_matrix_variant_set_func
void cs_matrix_variant_set_func(cs_matrix_variant_t *mv, const cs_numbering_t *numbering, cs_matrix_fill_type_t fill_type, int ed_flag, const char *func_name)
Select the sparse matrix-vector product function to be used by a matrix variant for a given fill type...
Definition: cs_matrix.c:7557
cs_renumber_set_algorithm
void cs_renumber_set_algorithm(bool halo_adjacent_cells_last, bool halo_adjacent_faces_last, cs_renumber_ordering_t i_faces_base_ordering, cs_renumber_cells_type_t cells_pre_numbering, cs_renumber_cells_type_t cells_numbering, cs_renumber_i_faces_type_t i_faces_numbering, cs_renumber_b_faces_type_t b_faces_numbering, cs_renumber_vertices_type_t vertices_numbering)
Select the algorithm for mesh renumbering.
Definition: cs_renumber.c:5764
cs_file_set_mpi_io_positionning
void cs_file_set_mpi_io_positionning(cs_file_mpi_positionning_t positionning)
Set the positionning method for MPI-IO.
Definition: cs_file.c:3260
cs_glob_mpi_comm
MPI_Comm cs_glob_mpi_comm
Definition: cs_defs.c:181
cs_matrix_set_tuning_runs
void cs_matrix_set_tuning_runs(int n_min_products, double t_measure)
Definition: cs_matrix_default.c:917
cs_partition_set_write_level
void cs_partition_set_write_level(int write_flag)
Set partitioning write to file option.
Definition: cs_partition.c:2936
cs_mesh_t::i_face_numbering
cs_numbering_t * i_face_numbering
Definition: cs_mesh.h:138
CS_RENUMBER_VERTICES_NONE
Definition: cs_renumber.h:85
cs_file_set_default_access
void cs_file_set_default_access(cs_file_mode_t mode, cs_file_access_t method, MPI_Info hints)
Set the default options for file access.
Definition: cs_file.c:2953
CS_MATRIX_MSR
Definition: cs_matrix.h:60
CS_RENUMBER_ADJACENT_LOW
Definition: cs_renumber.h:93
cs_partition_add_partitions
void cs_partition_add_partitions(int n_extra_partitions, int extra_partitions_list[])
Define list of extra partitionings to build.
Definition: cs_partition.c:3033
MPI_Info
#define MPI_Info
Definition: cs_defs.h:93
cs_renumber_set_min_subset_size
void cs_renumber_set_min_subset_size(cs_lnum_t min_i_subset_size, cs_lnum_t min_b_subset_size)
Set the minimum sunset sizes when renumbering for threads.
Definition: cs_renumber.c:5715
CS_MATRIX_SCALAR_SYM
Definition: cs_matrix.h:70
CS_MATRIX_SCALAR
Definition: cs_matrix.h:69
CS_PARTITION_SCOTCH
Definition: cs_partition.h:112
CS_FILE_MODE_WRITE
Definition: cs_file.h:67
MPI_INFO_NULL
#define MPI_INFO_NULL
Definition: cs_defs.h:94
cs_matrix_variant_create
cs_matrix_variant_t * cs_matrix_variant_create(cs_matrix_type_t type, const cs_numbering_t *numbering)
Build matrix variant.
Definition: cs_matrix.c:7239
CS_PARTITION_SFC_HILBERT_BOX
Definition: cs_partition.h:110
cs_partition_set_algorithm
void cs_partition_set_algorithm(cs_partition_stage_t stage, cs_partition_algorithm_t algorithm, int rank_step, bool ignore_perio)
Set algorithm for domain partitioning.
Definition: cs_partition.c:2861
CS_FILE_MODE_READ
Definition: cs_file.h:66