Source code for stac.nonparametric_tests

# -*- coding: utf-8 -*-

import numpy as np
import scipy as sp
import scipy.stats as st
import itertools as it


[docs]def binomial_sign_test(*args): """ Performs a binomial sign test for two dependent samples. Tests the hypothesis that the two dependent samples represent two different populations. Parameters ---------- sample1, sample2: array_like The sample measurements for each group. Returns ------- B-value : float The computed B-value of the test. p-value : float The associated p-value from the B-distribution. References ---------- D.J. Sheskin, Handbook of parametric and nonparametric statistical procedures. crc Press, 2003, Test 19: The Binomial Sign Test for Two Dependent Samples """ k = len(args) if k != 2: raise ValueError('The test needs two samples') n = len(args[0]) d_plus = 0 d_minus = 0 for i in range(n): # Zero differences are eliminated if args[0][i] < args[1][i]: d_plus = d_plus+1 elif args[0][i] > args[1][i]: d_minus = d_minus+1 x = max(d_plus, d_minus) n = d_plus + d_minus p_value = 2*(1 - st.binom.cdf(x, n, 0.5)) # Two-tailed of the smallest p-value return x, p_value
[docs]def friedman_test(*args): """ Performs a Friedman ranking test. Tests the hypothesis that in a set of k dependent samples groups (where k >= 2) at least two of the groups represent populations with different median values. Parameters ---------- sample1, sample2, ... : array_like The sample measurements for each group. Returns ------- F-value : float The computed F-value of the test. p-value : float The associated p-value from the F-distribution. rankings : array_like The ranking for each group. pivots : array_like The pivotal quantities for each group. References ---------- M. Friedman, The use of ranks to avoid the assumption of normality implicit in the analysis of variance, Journal of the American Statistical Association 32 (1937) 674–701. D.J. Sheskin, Handbook of parametric and nonparametric statistical procedures. crc Press, 2003, Test 25: The Friedman Two-Way Analysis of Variance by Ranks """ k = len(args) if k < 2: raise ValueError('Less than 2 levels') n = len(args[0]) if len(set([len(v) for v in args])) != 1: raise ValueError('Unequal number of samples') rankings = [] for i in range(n): row = [col[i] for col in args] row_sort = sorted(row) rankings.append([row_sort.index(v) + 1 + (row_sort.count(v)-1)/2. for v in row]) rankings_avg = [sp.mean([case[j] for case in rankings]) for j in range(k)] rankings_cmp = [r/sp.sqrt(k*(k+1)/(6.*n)) for r in rankings_avg] chi2 = ((12*n)/float((k*(k+1))))*((sp.sum(r**2 for r in rankings_avg))-((k*(k+1)**2)/float(4))) iman_davenport = ((n-1)*chi2)/float((n*(k-1)-chi2)) p_value = 1 - st.f.cdf(iman_davenport, k-1, (k-1)*(n-1)) return iman_davenport, p_value, rankings_avg, rankings_cmp
[docs]def friedman_aligned_ranks_test(*args): """ Performs a Friedman aligned ranks ranking test. Tests the hypothesis that in a set of k dependent samples groups (where k >= 2) at least two of the groups represent populations with different median values. The difference with a friedman test is that it uses the median of each group to construct the ranking, which is useful when the number of samples is low. Parameters ---------- sample1, sample2, ... : array_like The sample measurements for each group. Returns ------- Chi2-value : float The computed Chi2-value of the test. p-value : float The associated p-value from the Chi2-distribution. rankings : array_like The ranking for each group. pivots : array_like The pivotal quantities for each group. References ---------- J.L. Hodges, E.L. Lehmann, Ranks methods for combination of independent experiments in analysis of variance, Annals of Mathematical Statistics 33 (1962) 482–497. """ k = len(args) if k < 2: raise ValueError('Less than 2 levels') n = len(args[0]) if len(set([len(v) for v in args])) != 1: raise ValueError('Unequal number of samples') aligned_observations = [] for i in range(n): loc = sp.mean([col[i] for col in args]) aligned_observations.extend([col[i] - loc for col in args]) aligned_observations_sort = sorted(aligned_observations) aligned_ranks = [] for i in range(n): row = [] for j in range(k): v = aligned_observations[i*k+j] row.append(aligned_observations_sort.index(v) + 1 + (aligned_observations_sort.count(v)-1)/2.) aligned_ranks.append(row) rankings_avg = [sp.mean([case[j] for case in aligned_ranks]) for j in range(k)] rankings_cmp = [r/sp.sqrt(k*(n*k+1)/6.) for r in rankings_avg] r_i = [np.sum(case) for case in aligned_ranks] r_j = [np.sum([case[j] for case in aligned_ranks]) for j in range(k)] T = (k-1) * (sp.sum(v**2 for v in r_j) - (k*n**2/4.) * (k*n+1)**2) / float(((k*n*(k*n+1)*(2*k*n+1))/6.) - (1./float(k))*sp.sum(v**2 for v in r_i)) p_value = 1 - st.chi2.cdf(T, k-1) return T, p_value, rankings_avg, rankings_cmp
[docs]def quade_test(*args): """ Performs a Quade ranking test. Tests the hypothesis that in a set of k dependent samples groups (where k >= 2) at least two of the groups represent populations with different median values. The difference with a friedman test is that it uses the median for each sample to wiehgt the ranking. Parameters ---------- sample1, sample2, ... : array_like The sample measurements for each group. Returns ------- F-value : float The computed F-value of the test. p-value : float The associated p-value from the F-distribution. rankings : array_like The ranking for each group. pivots : array_like The pivotal quantities for each group. References ---------- D. Quade, Using weighted rankings in the analysis of complete blocks with additive block effects, Journal of the American Statistical Association 74 (1979) 680–683. """ k = len(args) if k < 2: raise ValueError('Less than 2 levels') n = len(args[0]) if len(set([len(v) for v in args])) != 1: raise ValueError('Unequal number of samples') rankings = [] ranges = [] for i in range(n): row = [col[i] for col in args] ranges.append(max(row) - min(row)) row_sort = sorted(row) rankings.append([row_sort.index(v) + 1 + (row_sort.count(v)-1)/2. for v in row]) ranges_sort = sorted(ranges) ranking_cases = [ranges_sort.index(v) + 1 + (ranges_sort.count(v)-1)/2. for v in ranges] S = [] W = [] for i in range(n): S.append([ranking_cases[i] * (r - (k + 1)/2.) for r in rankings[i]]) W.append([ranking_cases[i] * r for r in rankings[i]]) Sj = [np.sum(row[j] for row in S) for j in range(k)] Wj = [np.sum(row[j] for row in W) for j in range(k)] rankings_avg = [w / (n*(n+1)/2.) for w in Wj] rankings_cmp = [r/sp.sqrt(k*(k+1)*(2*n+1)*(k-1)/(18.*n*(n+1))) for r in rankings_avg] A = sp.sum(S[i][j]**2 for i in range(n) for j in range(k)) B = sp.sum(s**2 for s in Sj)/float(n) F = (n-1)*B/(A-B) p_value = 1 - st.f.cdf(F, k-1, (k-1)*(n-1)) return F, p_value, rankings_avg, rankings_cmp
def bonferroni_dunn_test(ranks, control=None): """ Performs a Bonferroni-Dunn post-hoc test using the pivot quantities obtained by a ranking test. Tests the hypothesis that the ranking of the control method is different to each of the other methods. Parameters ---------- pivots : dictionary_like A dictionary with format 'groupname':'pivotal quantity' control : string optional The name of the control method (one vs all), default None (all vs all) Returns ---------- Comparions : array-like Strings identifier of each comparison with format 'group_i vs group_j' Z-values : array-like The computed Z-value statistic for each comparison. p-values : array-like The associated p-value from the Z-distribution wich depends on the index of the comparison Adjusted p-values : array-like The associated adjusted p-values wich can be compared with a significance level References ---------- O.J. Dunn, Multiple comparisons among means, Journal of the American Statistical Association 56 (1961) 52–64. """ k = len(ranks) values = ranks.values() keys = ranks.keys() if not control : control_i = values.index(min(values)) else: control_i = keys.index(control) comparisons = [keys[control_i] + " vs " + keys[i] for i in range(k) if i != control_i] z_values = [abs(values[control_i] - values[i]) for i in range(k) if i != control_i] p_values = [2*(1-st.norm.cdf(abs(z))) for z in z_values] # Sort values by p_value so that p_0 < p_1 p_values, z_values, comparisons = map(list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) adj_p_values = [min((k-1)*p_value,1) for p_value in p_values] return comparisons, z_values, p_values, adj_p_values
[docs]def holm_test(ranks, control=None): """ Performs a Holm post-hoc test using the pivot quantities obtained by a ranking test. Tests the hypothesis that the ranking of the control method is different to each of the other methods. Parameters ---------- pivots : dictionary_like A dictionary with format 'groupname':'pivotal quantity' control : string optional The name of the control method (one vs all), default None (all vs all) Returns ---------- Comparions : array-like Strings identifier of each comparison with format 'group_i vs group_j' Z-values : array-like The computed Z-value statistic for each comparison. p-values : array-like The associated p-value from the Z-distribution wich depends on the index of the comparison Adjusted p-values : array-like The associated adjusted p-values wich can be compared with a significance level References ---------- O.J. S. Holm, A simple sequentially rejective multiple test procedure, Scandinavian Journal of Statistics 6 (1979) 65–70. """ k = len(ranks) values = ranks.values() keys = ranks.keys() if not control : control_i = values.index(min(values)) else: control_i = keys.index(control) comparisons = [keys[control_i] + " vs " + keys[i] for i in range(k) if i != control_i] z_values = [abs(values[control_i] - values[i]) for i in range(k) if i != control_i] p_values = [2*(1-st.norm.cdf(abs(z))) for z in z_values] # Sort values by p_value so that p_0 < p_1 p_values, z_values, comparisons = map(list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) adj_p_values = [min(max((k-(j+1))*p_values[j] for j in range(i+1)), 1) for i in range(k-1)] return comparisons, z_values, p_values, adj_p_values
[docs]def hochberg_test(ranks, control=None): """ Performs a Hochberg post-hoc test using the pivot quantities obtained by a ranking test. Tests the hypothesis that the ranking of the control method is different to each of the other methods. Parameters ---------- pivots : dictionary_like A dictionary with format 'groupname':'pivotal quantity' control : string optional The name of the control method, default the group with minimum ranking Returns ---------- Comparions : array-like Strings identifier of each comparison with format 'group_i vs group_j' Z-values : array-like The computed Z-value statistic for each comparison. p-values : array-like The associated p-value from the Z-distribution wich depends on the index of the comparison Adjusted p-values : array-like The associated adjusted p-values wich can be compared with a significance level References ---------- Y. Hochberg, A sharper Bonferroni procedure for multiple tests of significance, Biometrika 75 (1988) 800–803. """ k = len(ranks) values = ranks.values() keys = ranks.keys() if not control : control_i = values.index(min(values)) else: control_i = keys.index(control) comparisons = [keys[control_i] + " vs " + keys[i] for i in range(k) if i != control_i] z_values = [abs(values[control_i] - values[i]) for i in range(k) if i != control_i] p_values = [2*(1-st.norm.cdf(abs(z))) for z in z_values] # Sort values by p_value so that p_0 < p_1 p_values, z_values, comparisons = map(list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) adj_p_values = [min(max((k-j)*p_values[j-1] for j in range(k-1, i, -1)), 1) for i in range(k-1)] return comparisons, z_values, p_values, adj_p_values
[docs]def li_test(ranks, control=None): """ Performs a Li post-hoc test using the pivot quantities obtained by a ranking test. Tests the hypothesis that the ranking of the control method is different to each of the other methods. Parameters ---------- pivots : dictionary_like A dictionary with format 'groupname':'pivotal quantity' control : string optional The name of the control method, default the group with minimum ranking Returns ---------- Comparions : array-like Strings identifier of each comparison with format 'group_i vs group_j' Z-values : array-like The computed Z-value statistic for each comparison. p-values : array-like The associated p-value from the Z-distribution wich depends on the index of the comparison Adjusted p-values : array-like The associated adjusted p-values wich can be compared with a significance level References ---------- J. Li, A two-step rejection procedure for testing multiple hypotheses, Journal of Statistical Planning and Inference 138 (2008) 1521–1527. """ k = len(ranks) values = ranks.values() keys = ranks.keys() if not control : control_i = values.index(min(values)) else: control_i = keys.index(control) comparisons = [keys[control_i] + " vs " + keys[i] for i in range(k) if i != control_i] z_values = [abs(values[control_i] - values[i]) for i in range(k) if i != control_i] p_values = [2*(1-st.norm.cdf(abs(z))) for z in z_values] # Sort values by p_value so that p_0 < p_1 p_values, z_values, comparisons = map(list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) adj_p_values = [p_values[i]/(p_values[i]+1-p_values[-1]) for i in range(k-1)] return comparisons, z_values, p_values, adj_p_values
[docs]def finner_test(ranks, control=None): """ Performs a Finner post-hoc test using the pivot quantities obtained by a ranking test. Tests the hypothesis that the ranking of the control method is different to each of the other methods. Parameters ---------- pivots : dictionary_like A dictionary with format 'groupname':'pivotal quantity' control : string optional The name of the control method, default the group with minimum ranking Returns ---------- Comparions : array-like Strings identifier of each comparison with format 'group_i vs group_j' Z-values : array-like The computed Z-value statistic for each comparison. p-values : array-like The associated p-value from the Z-distribution wich depends on the index of the comparison Adjusted p-values : array-like The associated adjusted p-values wich can be compared with a significance level References ---------- H. Finner, On a monotonicity problem in step-down multiple test procedures, Journal of the American Statistical Association 88 (1993) 920–923. """ k = len(ranks) values = ranks.values() keys = ranks.keys() if not control : control_i = values.index(min(values)) else: control_i = keys.index(control) comparisons = [keys[control_i] + " vs " + keys[i] for i in range(k) if i != control_i] z_values = [abs(values[control_i] - values[i]) for i in range(k) if i != control_i] p_values = [2*(1-st.norm.cdf(abs(z))) for z in z_values] # Sort values by p_value so that p_0 < p_1 p_values, z_values, comparisons = map(list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) adj_p_values = [min(max(1-(1-p_values[j])**((k-1)/float(j+1)) for j in range(i+1)), 1) for i in range(k-1)] return comparisons, z_values, p_values, adj_p_values
[docs]def nemenyi_multitest(ranks): """ Performs a Nemenyi post-hoc test using the pivot quantities obtained by a ranking test. Tests the hypothesis that the ranking of each pair of groups are different. Parameters ---------- pivots : dictionary_like A dictionary with format 'groupname':'pivotal quantity' Returns ---------- Comparions : array-like Strings identifier of each comparison with format 'group_i vs group_j' Z-values : array-like The computed Z-value statistic for each comparison. p-values : array-like The associated p-value from the Z-distribution wich depends on the index of the comparison Adjusted p-values : array-like The associated adjusted p-values wich can be compared with a significance level References ---------- Bonferroni-Dunn: O.J. Dunn, Multiple comparisons among means, Journal of the American Statistical Association 56 (1961) 52–64. """ k = len(ranks) values = ranks.values() keys = ranks.keys() versus = list(it.combinations(range(k), 2)) comparisons = [keys[vs[0]] + " vs " + keys[vs[1]] for vs in versus] z_values = [abs(values[vs[0]] - values[vs[1]]) for vs in versus] p_values = [2*(1-st.norm.cdf(abs(z))) for z in z_values] # Sort values by p_value so that p_0 < p_1 p_values, z_values, comparisons = map(list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) m = int(k*(k-1)/2.) adj_p_values = [min(m*p_value,1) for p_value in p_values] return comparisons, z_values, p_values, adj_p_values
[docs]def holm_multitest(ranks): """ Performs a Holm post-hoc test using the pivot quantities obtained by a ranking test. Tests the hypothesis that the ranking of each pair of groups are different. Parameters ---------- pivots : dictionary_like A dictionary with format 'groupname':'pivotal quantity' Returns ---------- Comparions : array-like Strings identifier of each comparison with format 'group_i vs group_j' Z-values : array-like The computed Z-value statistic for each comparison. p-values : array-like The associated p-value from the Z-distribution wich depends on the index of the comparison Adjusted p-values : array-like The associated adjusted p-values wich can be compared with a significance level References ---------- O.J. S. Holm, A simple sequentially rejective multiple test procedure, Scandinavian Journal of Statistics 6 (1979) 65–70. """ k = len(ranks) values = ranks.values() keys = ranks.keys() versus = list(it.combinations(range(k), 2)) comparisons = [keys[vs[0]] + " vs " + keys[vs[1]] for vs in versus] z_values = [abs(values[vs[0]] - values[vs[1]]) for vs in versus] p_values = [2*(1-st.norm.cdf(abs(z))) for z in z_values] # Sort values by p_value so that p_0 < p_1 p_values, z_values, comparisons = map(list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) m = int(k*(k-1)/2.) adj_p_values = [min(max((m-j)*p_values[j] for j in range(i+1)), 1) for i in range(m)] return comparisons, z_values, p_values, adj_p_values
[docs]def hochberg_multitest(ranks): """ Performs a Hochberg post-hoc test using the pivot quantities obtained by a ranking test. Tests the hypothesis that the ranking of each pair of groups are different. Parameters ---------- pivots : dictionary_like A dictionary with format 'groupname':'pivotal quantity' Returns ---------- Comparions : array-like Strings identifier of each comparison with format 'group_i vs group_j' Z-values : array-like The computed Z-value statistic for each comparison. p-values : array-like The associated p-value from the Z-distribution wich depends on the index of the comparison Adjusted p-values : array-like The associated adjusted p-values wich can be compared with a significance level References ---------- Y. Hochberg, A sharper Bonferroni procedure for multiple tests of significance, Biometrika 75 (1988) 800–803. """ k = len(ranks) values = ranks.values() keys = ranks.keys() versus = list(it.combinations(range(k), 2)) comparisons = [keys[vs[0]] + " vs " + keys[vs[1]] for vs in versus] z_values = [abs(values[vs[0]] - values[vs[1]]) for vs in versus] p_values = [2*(1-st.norm.cdf(abs(z))) for z in z_values] # Sort values by p_value so that p_0 < p_1 p_values, z_values, comparisons = map(list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) m = int(k*(k-1)/2.) adj_p_values = [max((m+1-j)*p_values[j-1] for j in range(m, i, -1))for i in range(m)] return comparisons, z_values, p_values, adj_p_values
[docs]def finner_multitest(ranks): """ Performs a Finner post-hoc test using the pivot quantities obtained by a ranking test. Tests the hypothesis that the ranking of each pair of groups are different. Parameters ---------- pivots : dictionary_like A dictionary with format 'groupname':'pivotal quantity' Returns ---------- Comparions : array-like Strings identifier of each comparison with format 'group_i vs group_j' Z-values : array-like The computed Z-value statistic for each comparison. p-values : array-like The associated p-value from the Z-distribution wich depends on the index of the comparison Adjusted p-values : array-like The associated adjusted p-values wich can be compared with a significance level References ---------- H. Finner, On a monotonicity problem in step-down multiple test procedures, Journal of the American Statistical Association 88 (1993) 920–923. """ k = len(ranks) values = ranks.values() keys = ranks.keys() versus = list(it.combinations(range(k), 2)) comparisons = [keys[vs[0]] + " vs " + keys[vs[1]] for vs in versus] z_values = [abs(values[vs[0]] - values[vs[1]]) for vs in versus] p_values = [2*(1-st.norm.cdf(abs(z))) for z in z_values] # Sort values by p_value so that p_0 < p_1 p_values, z_values, comparisons = map(list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) m = int(k*(k-1)/2.) adj_p_values = [min(max(1-(1-p_values[j])**(m/float(j+1)) for j in range(i+1)), 1) for i in range(m)] return comparisons, z_values, p_values, adj_p_values
def _S(k): """ Helper function for the Shaffer test. It obtains the number of independent test hypotheses when using an All vs All strategy using the number of groups to be compared. """ if k == 0 or k == 1: return {0} else: result = set() for j in reversed(range(1, k+1)): tmp = S(k - j) for s in tmp: result = result.union({sp.special.binom(j, 2) + s}) return list(result)
[docs]def shaffer_multitest(ranks): """ Performs a Shaffer post-hoc test using the pivot quantities obtained by a ranking test. Tests the hypothesis that the ranking of each pair of groups are different. Parameters ---------- pivots : dictionary_like A dictionary with format 'groupname':'pivotal quantity' Returns ---------- Comparions : array-like Strings identifier of each comparison with format 'group_i vs group_j' Z-values : array-like The computed Z-value statistic for each comparison. p-values : array-like The associated p-value from the Z-distribution wich depends on the index of the comparison Adjusted p-values : array-like The associated adjusted p-values wich can be compared with a significance level References ---------- J. Li, A two-step rejection procedure for testing multiple hypotheses, Journal of Statistical Planning and Inference 138 (2008) 1521–1527. """ k = len(ranks) values = ranks.values() keys = ranks.keys() versus = list(it.combinations(range(k), 2)) m = int(k*(k-1)/2.) A = _S(int((1 + sp.sqrt(1+4*m*2))/2)) t = [max([a for a in A if a <= m-i]) for i in range(m)] comparisons = [keys[vs[0]] + " vs " + keys[vs[1]] for vs in versus] z_values = [abs(values[vs[0]] - values[vs[1]]) for vs in versus] p_values = [2*(1-st.norm.cdf(abs(z))) for z in z_values] # Sort values by p_value so that p_0 < p_1 p_values, z_values, comparisons = map(list, zip(*sorted(zip(p_values, z_values, comparisons), key=lambda t: t[0]))) adj_p_values = [min(max(t[j]*p_values[j] for j in range(i+1)), 1) for i in range(m)] return comparisons, z_values, p_values, adj_p_values