Source code for stac.parametric_tests

"""
.. module:: parametric_tests
   :synopsis: Parametric Tests

.. moduleauthor:: Andrew Carter <andrew@invalid.com>


"""

import itertools as it
import scipy as sp
import scipy.stats as st

[docs]def anova_test(*args): """ Performs a 1-way ANOVA. Tests the hypothesis that in a set of k independent samples groups (where k >= 2) at least two of the groups represent populations with different mean values. Parameters ---------- sample1, sample2, ... : array_like The sample measurements for each group. Returns ------- F-value : float The computed F-value of the test. p-value : float The associated p-value from the F-distribution. pivots : array_like The pivotal quantities for each group. References ---------- D.J. Sheskin, Handbook of parametric and nonparametric statistical procedures. crc Press, 2003, Test 21: The Single-Factor Between-Subjects Analysis of Variance """ k = len(args) if k < 2: raise ValueError('Less than 2 groups') n = len(args[0]) if len(set([len(v) for v in args])) != 1: raise ValueError('Unequal number of samples') # Precalcs x_j = [sp.sum(group) for group in args] x_t = sp.sum(x_j) # Variances ss_t = sp.sum([v**2 for v in group for group in args]) - x_t**2/float(k*n) ss_bg = sp.sum(x_j[j]**2/float(n) for j in range(k)) - x_t**2/float(k*n) ss_wg = ss_t - ss_bg # Degrees of freedom df_bg = k - 1 df_wg = n*k - k F = (ss_bg/df_bg)/(ss_wg/df_wg) p_value = 1 - st.f.cdf(F, df_bg, df_wg) # Pivots pivots = [sp.mean(group)/sp.sqrt(2*(ss_wg/df_wg)/float(n)) for group in args] return F, p_value, pivots
[docs]def anova_within_test(*args): """ Performs a 1-way ANOVA within cases. Tests the hypothesis that in a set of k dependent sample groups (where k >= 2) at least two of the groups represent populations with different mean values. Parameters ---------- sample1, sample2, ... : array_like The sample measurements for each group. Returns ---------- F-value : float The computed F-value of the test. p-value : float The associated p-value from the F-distribution. pivots : array_like The pivotal quantities for each group. References ---------- D.J. Sheskin, Handbook of parametric and nonparametric statistical procedures. crc Press, 2003, Test 24: The Single-Factor Within-Subjects Analysis of Variance """ k = len(args) if k < 2: raise ValueError('Less than 2 groups') n = len(args[0]) if len(set([len(v) for v in args])) != 1: raise ValueError('Unequal number of samples') # Precalcs x_j = [sp.sum(group) for group in args] x_t = sp.sum(x_j) s_i = [sp.sum([group[i] for group in args]) for i in range(n)] # Variances ss_t = sp.sum([v**2 for v in group for group in args]) - x_t**2/float(k*n) ss_bg = sp.sum([x_j[j]**2/float(n) for j in range(k)]) - x_t**2/float(k*n) ss_bs = sp.sum([s_i[i]**2/float(k) for i in range(n)]) - x_t**2/float(k*n) ss_wg = ss_t - ss_bg ss_res = ss_t - ss_bg - ss_bs # Degrees of freedom df_bg = k - 1 df_wg = n*k - k df_res = (n-1)*(k-1) F = (ss_bg/df_bg)/(ss_res/df_res) p_value = 1 - st.f.cdf(F, df_bg, df_res) # Pivots pivots = [sp.mean(group)/sp.sqrt(2*(ss_wg/df_wg)/float(n)) for group in args] return F, p_value, pivots
[docs]def bonferroni_test(pivots, n): """ Performs a Bonferroni-Dunn post-hoc test using the pivot quantities obtained by an ANOVA test. Parameters ---------- pivots : dictionary_like A dictionary with format 'groupname':'pivotal quantity' n : int Number of samples per group Returns ---------- Comparions : array-like Strings identifier of each comparison with format 'group_i vs group_j' T-values: array-like The computed T-value statistic for each comparison. p-values: array-like The associated p-value from the T-distribution wich depends on the index of the comparison Adjusted p-values: array-like The associated adjusted p-values wich can be compared with a significance level References ---------- D.J. Sheskin, Handbook of parametric and nonparametric statistical procedures. crc Press, 2003, Test 21b: The Bonferroni-Dunn test """ k = len(pivots) values = pivots.values() keys = pivots.keys() m = (k*(k-1))/2. versus = list(it.combinations(range(k), 2)) comparisons = [keys[vs[0]] + " vs " + keys[vs[1]] for vs in versus] t_values = [abs(values[vs[0]] - values[vs[1]]) for vs in versus] p_values = [1-st.t.cdf(t, n*k-k) for t in t_values] # Sort values by p_value so that p_0 < p_1 p_values, t_values, comparisons = map(list, zip(*sorted(zip(p_values, t_values, comparisons), key=lambda t: t[0]))) adj_p_values = [min(m*p_value,1) for p_value in p_values] return comparisons, t_values, p_values, adj_p_values