Next: Running Statistics References and Further Reading, Previous: Running Statistics Quantiles, Up: Running Statistics [Index]
Here is a basic example of how to use the statistical functions:
#include <stdio.h> #include <gsl/gsl_rstat.h> int main(void) { double data[5] = {17.2, 18.1, 16.5, 18.3, 12.6}; double mean, variance, largest, smallest, sd, rms, sd_mean, median, skew, kurtosis; gsl_rstat_workspace *rstat_p = gsl_rstat_alloc(); size_t i, n; /* add data to rstat accumulator */ for (i = 0; i < 5; ++i) gsl_rstat_add(data[i], rstat_p); mean = gsl_rstat_mean(rstat_p); variance = gsl_rstat_variance(rstat_p); largest = gsl_rstat_max(rstat_p); smallest = gsl_rstat_min(rstat_p); median = gsl_rstat_median(rstat_p); sd = gsl_rstat_sd(rstat_p); sd_mean = gsl_rstat_sd_mean(rstat_p); skew = gsl_rstat_skew(rstat_p); rms = gsl_rstat_rms(rstat_p); kurtosis = gsl_rstat_kurtosis(rstat_p); n = gsl_rstat_n(rstat_p); printf ("The dataset is %g, %g, %g, %g, %g\n", data[0], data[1], data[2], data[3], data[4]); printf ("The sample mean is %g\n", mean); printf ("The estimated variance is %g\n", variance); printf ("The largest value is %g\n", largest); printf ("The smallest value is %g\n", smallest); printf( "The median is %g\n", median); printf( "The standard deviation is %g\n", sd); printf( "The root mean square is %g\n", rms); printf( "The standard devation of the mean is %g\n", sd_mean); printf( "The skew is %g\n", skew); printf( "The kurtosis %g\n", kurtosis); printf( "There are %zu items in the accumulator\n", n); gsl_rstat_reset(rstat_p); n = gsl_rstat_n(rstat_p); printf( "There are %zu items in the accumulator\n", n); gsl_rstat_free(rstat_p); return 0; }
The program should produce the following output,
The dataset is 17.2, 18.1, 16.5, 18.3, 12.6 The sample mean is 16.54 The estimated variance is 5.373 The largest value is 18.3 The smallest value is 12.6 The median is 16.5 The standard deviation is 2.31797 The root mean square is 16.6694 The standard devation of the mean is 1.03663 The skew is -0.829058 The kurtosis -1.2217 There are 5 items in the accumulator There are 0 items in the accumulator
This next program estimates the lower quartile, median and upper quartile from 10,000 samples of a random Rayleigh distribution, using the P^2 algorithm of Jain and Chlamtec. For comparison, the exact values are also computed from the sorted dataset.
#include <stdio.h> #include <stdlib.h> #include <gsl/gsl_rstat.h> #include <gsl/gsl_statistics.h> #include <gsl/gsl_rng.h> #include <gsl/gsl_randist.h> #include <gsl/gsl_sort.h> int main(void) { const size_t N = 10000; double *data = malloc(N * sizeof(double)); gsl_rstat_quantile_workspace *work_25 = gsl_rstat_quantile_alloc(0.25); gsl_rstat_quantile_workspace *work_50 = gsl_rstat_quantile_alloc(0.5); gsl_rstat_quantile_workspace *work_75 = gsl_rstat_quantile_alloc(0.75); gsl_rng *r = gsl_rng_alloc(gsl_rng_default); double exact_p25, exact_p50, exact_p75; double val_p25, val_p50, val_p75; size_t i; /* add data to quantile accumulators; also store data for exact * comparisons */ for (i = 0; i < N; ++i) { data[i] = gsl_ran_rayleigh(r, 1.0); gsl_rstat_quantile_add(data[i], work_25); gsl_rstat_quantile_add(data[i], work_50); gsl_rstat_quantile_add(data[i], work_75); } /* exact values */ gsl_sort(data, 1, N); exact_p25 = gsl_stats_quantile_from_sorted_data(data, 1, N, 0.25); exact_p50 = gsl_stats_quantile_from_sorted_data(data, 1, N, 0.5); exact_p75 = gsl_stats_quantile_from_sorted_data(data, 1, N, 0.75); /* estimated values */ val_p25 = gsl_rstat_quantile_get(work_25); val_p50 = gsl_rstat_quantile_get(work_50); val_p75 = gsl_rstat_quantile_get(work_75); printf ("The dataset is %g, %g, %g, %g, %g, ...\n", data[0], data[1], data[2], data[3], data[4]); printf ("0.25 quartile: exact = %.5f, estimated = %.5f, error = %.6e\n", exact_p25, val_p25, (val_p25 - exact_p25) / exact_p25); printf ("0.50 quartile: exact = %.5f, estimated = %.5f, error = %.6e\n", exact_p50, val_p50, (val_p50 - exact_p50) / exact_p50); printf ("0.75 quartile: exact = %.5f, estimated = %.5f, error = %.6e\n", exact_p75, val_p75, (val_p75 - exact_p75) / exact_p75); gsl_rstat_quantile_free(work_25); gsl_rstat_quantile_free(work_50); gsl_rstat_quantile_free(work_75); gsl_rng_free(r); free(data); return 0; }
The program should produce the following output,
The dataset is 0.00645272, 0.0074002, 0.0120706, 0.0207256, 0.0227282, ... 0.25 quartile: exact = 0.75766, estimated = 0.75580, error = -2.450209e-03 0.50 quartile: exact = 1.17508, estimated = 1.17438, error = -5.995912e-04 0.75 quartile: exact = 1.65347, estimated = 1.65696, error = 2.110571e-03
Next: Running Statistics References and Further Reading, Previous: Running Statistics Quantiles, Up: Running Statistics [Index]