diff mbox

[i-g-t,05/18] stats: Add a way to specify if the data set is a population or a sample

Message ID 1435417696-28115-6-git-send-email-damien.lespiau@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Lespiau, Damien June 27, 2015, 3:08 p.m. UTC
This changes how we compute the variance. We want an unbiased variance
when reasoning about a sample.

Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
---
 lib/igt_stats.c           | 35 ++++++++++++++++++++++++++++++++++-
 lib/igt_stats.h           |  2 ++
 lib/tests/igt_stats.c     |  1 +
 tools/skl_compute_wrpll.c |  1 +
 4 files changed, 38 insertions(+), 1 deletion(-)
diff mbox

Patch

diff --git a/lib/igt_stats.c b/lib/igt_stats.c
index 0bee138..c7d5fbd 100644
--- a/lib/igt_stats.c
+++ b/lib/igt_stats.c
@@ -92,6 +92,36 @@  void igt_stats_fini(igt_stats_t *stats)
 }
 
 /**
+ * igt_stats_set_population:
+ * @stats: An #igt_stats_t instance
+ * @full_population: Whether we're dealing with sample data or a full
+ *		     population
+ *
+ * In statistics, we usually deal with a subset of the full data (which may be
+ * a continuous or infinite set). Data analysis is then done on a sample of
+ * this population.
+ *
+ * This has some importance as only having a sample of the data leads to
+ * [biased estimators](https://en.wikipedia.org/wiki/Bias_of_an_estimator). We
+ * currently used the information given by this method to apply
+ * [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction)
+ * to the variance.
+ *
+ * When giving #true to this function, the data set in @stats is considered a
+ * full population. It's considered a sample of a bigger population otherwise.
+ *
+ * When newly created, @stats defaults to holding sample data.
+ */
+void igt_stats_set_population(igt_stats_t *stats, bool full_population)
+{
+	if (full_population == stats->is_population)
+		return;
+
+	stats->is_population = full_population;
+	stats->mean_variance_valid = false;
+}
+
+/**
  * igt_stats_push:
  * @stats: An #igt_stats_t instance
  * @value: An integer value
@@ -129,7 +159,10 @@  static void igt_stats_knuth_mean_variance(igt_stats_t *stats)
 	}
 
 	stats->mean = mean;
-	stats->variance = m2 / stats->n_values;
+	if (stats->n_values > 1 && !stats->is_population)
+		stats->variance = m2 / (stats->n_values - 1);
+	else
+		stats->variance = m2 / stats->n_values;
 	stats->mean_variance_valid = true;
 }
 
diff --git a/lib/igt_stats.h b/lib/igt_stats.h
index c45c819..d2c1cc7 100644
--- a/lib/igt_stats.h
+++ b/lib/igt_stats.h
@@ -38,12 +38,14 @@  typedef struct {
 
 	/*< private >*/
 	unsigned int capacity;
+	unsigned int is_population  : 1;
 	unsigned int mean_variance_valid : 1;
 	double mean, variance;
 } igt_stats_t;
 
 void igt_stats_init(igt_stats_t *stats, unsigned int capacity);
 void igt_stats_fini(igt_stats_t *stats);
+void igt_stats_set_population(igt_stats_t *stats, bool full_population);
 void igt_stats_push(igt_stats_t *stats, uint64_t value);
 double igt_stats_get_mean(igt_stats_t *stats);
 double igt_stats_get_variance(igt_stats_t *stats);
diff --git a/lib/tests/igt_stats.c b/lib/tests/igt_stats.c
index f76d334..59097c8 100644
--- a/lib/tests/igt_stats.c
+++ b/lib/tests/igt_stats.c
@@ -89,6 +89,7 @@  static void test_std_deviation(void)
 	double mean, variance, std_deviation;
 
 	igt_stats_init(&stats, 8);
+	igt_stats_set_population(&stats, true);
 
 	igt_stats_push(&stats, 2);
 	igt_stats_push(&stats, 4);
diff --git a/tools/skl_compute_wrpll.c b/tools/skl_compute_wrpll.c
index cf808c7..8b6fcd3 100644
--- a/tools/skl_compute_wrpll.c
+++ b/tools/skl_compute_wrpll.c
@@ -867,6 +867,7 @@  static void test_run(struct test_ops *test)
 	igt_stats_t stats;
 
 	igt_stats_init(&stats, ARRAY_SIZE(modes));
+	igt_stats_set_population(&stats, true);
 
 	for (m = 0; m < ARRAY_SIZE(modes); m++) {
 		struct skl_wrpll_params params = {};