/******************************************************************** * Statistic functions. Refer to Numerical Recipe in C * http://www.library.cornell.edu/nr/bookcpdf.html ********************************************************************/ import sfunc; load("matrix.dll"); TINY = 1.0e-30; /******************************************************************** * Given matrix, this routine returns its mean, average deviation, * variance, and if estra=true, standard deviation, skewness, * and kurtosis. ********************************************************************/ function moment(X, extra=false) { [m, n, n] = X:size(); if (n <= 1) error("matrix has less than 2 data."); ave = X:sum()/n; D = X - ave; dev = D:sum()/n; if (extra) { D2 = D * D; var = D2:sum()/(n - 1); std = sqrt(var); D /= std; D2 = D * D; skew = D2 * D; skew = skew:sum()/n; kurt = D2 * D2; kurt = kurt:sum()/n - 3; return [ave, dev, var, std, skew, kurt]; } else { D *= D; var = D:sum()/(n - 1); return [ave, dev, var]; } } /******************************************************************** * Given two matrixes X and Y, this routine computes their * correlation coefficient r, the signifficance level at which * the null hypothesis of zero correlation is disproved (probability * whose small value indicates a signifficant correlation), and * Fisher's z, whose value can be used in further statistical tests. ********************************************************************/ function corr(X, Y) { [m, n, nx] = X:size(); [m, n, ny] = Y:size(); ax = X:sum()/nx; ay = Y:sum()/ny; xt = X - ax; yt = Y - ay; sxx = xt * xt; sxx = sxx:sum(); syy = yt * yt; syy = syy:sum(); sxy = xt * yt; sxy = sxy:sum(); r = sxy / (sqrt(sxx * syy) + TINY); z = 0.5 * log((1.0 + r + TINY) / (1.0 - r + TINY)); df = nx - 2; t = r * sqrt(df / ((1.0 - r + TINY)*(1.0 + r + TINY))); prob = betai(0.5*df, 0.5, df/(df+t*t)); return [r, prob, z]; } /******************************************************************** * Given matrixes X and Y, this routine returns Student's t and its * signifficance prob, small values of prob indicating that the arrays * have signifficantly different means. The data arrays are assumed * to be drawn from populations with the same true variance. ********************************************************************/ function t_test(X, Y) { [ave1, var1, n1] = avevar(X); [ave2, var2, n2] = avevar(Y); df = n1 + n2 - 2; svar =((n1-1)*var1 + (n2-1)*var2) / df; t = (ave1 - ave2) / sqrt(svar*(1.0/n1 + 1.0/n2)); prob = betai(0.5*df, 0.5, df/(df + t*t)); return [t, prob]; } function avevar(X) { [m, n, n] = X:size(); ave = X:sum()/n; D = X - ave; ep = D:sum(); D *= D; return [ave, (D:sum() - ep*ep/n)/(n-1), n]; } /******************************************************************** * Given matrixes X and Y, this routine returns Student's t and its * signifficance prob, small values of prob indicating that the arrays * have signifficantly different means. The data arrays are assumed * to be drawn from populations with un-equal variance. ********************************************************************/ function tu_test(X, Y) { [ave1, var1, n1] = avevar(X); [ave2, var2, n2] = avevar(Y); a = var1/n1; b = var2/n2; t = (ave1 - ave2) / sqrt(a + b); df = (a+b)*(a+b) /(a*a/(n1-1) + b*b/(n2-1)); prob = betai(0.5*df, 0.5, df/(df+t*t)); return [t, prob]; } /******************************************************************** * Given paired matrixes X and Y, this routine returns * Student's t and its signifficance prob, small values of prob * indicating a signifficant difference of means. ********************************************************************/ function tp_test(X, Y) { [ave1, var1, n1] = avevar(X); [ave2, var2, n2] = avevar(Y); assert(n1 == n2); D = (X - ave1) * (Y - ave2); df = n1 - 1; cov = D:sum()/df; sd = sqrt((var1 + var2 - 2.0*cov)/n1); t = (ave1 - ave2) / (sd + TINY); prob = betai(0.5*df, 0.5, df/(df+t*t)); return [t, prob]; } /******************************************************************** * Given matrixes X and Y, this routine returns the value of f, and * its signifficance prob. Small values of prob indicate that the two * datasets have signifficantly different variances. ********************************************************************/ function f_test(X, Y) { [ave1, var1, n1] = avevar(X); [ave2, var2, n2] = avevar(Y); if (var1 > var2) { f = var1/var2; df1 = n1 - 1; df2 = n2 - 1; } else { f = var2 / var1; df1 = n2 - 1; df2 = n1 - 1; } prob = 2.0*betai(0.5*df2, 0.5*df1, df2/(df2+df1*f)); if (prob > 1.0) prob = 2.0 - prob; return [f, prob]; } /******************************************************************** * Given matrix X containing the observed numbers of events, and * matrix Y containing the expected numbers of events, this routine * returns the chi-square chisq and the signifficance prob. * A small value of prob indicates a signifficant difference * between the distributions X and Y. nc is the number of contrains. * Note: Y should be a known distribution. ********************************************************************/ function chi_test1(X, Y, nc=1) { I = Y <= 0; if (I:sum() > 0) error("bad expected number in chi test."); [m, n, nx] = X:size(); [m, n, ny] = Y:size(); assert(nx == ny); D = X - Y; D *= D; D /= Y; df = nx - nc; chisq = D:sum(); prob = gammaq(0.5*df, 0.5*chisq); return [chisq, prob, df]; } /******************************************************************** * Given matrix X containing the observed numbers of events, and * matrix Y containing the expected numbers of events, this routine * returns the chi-square chisq and the signifficance prob. * A small value of prob indicates a signifficant difference * between the distributions X and Y. nc is the number of contrains. * Note: X and Y are observations. ********************************************************************/ function chi_test2(X, Y, nc=0) { [m, n, nx] = X:size(); [m, n, ny] = Y:size(); assert(nx == ny); I = (Y < 0) || (X < 0); if (I:sum() > 0) error("bad expected number in chi test."); df = nx - nc; I = (Y == 0) && (X == 0); df -= I:sum(); D1 = X[I]; D2 = Y[I]; A = D1 - D2; A *= A; A /= (D1 + D2); chisq = A:sum(); prob = gammaq(0.5*df, 0.5*chisq); return [chisq, prob, df]; } /******************************************************************** * Linear fit: * Given a set of data points in matrixes X and Y, fit them to a * straight line y = a + bx by minimizing chi-square. Returned are * a, b and their respective probable uncertainties siga and sigb, * the chi-square chi2. * * Multi-linear fit: * When X and Y have different size, try to fit them to * y = c0 + c1*x1 + c2*x2... * Then the first n rows X will be replaced by the parameter variance * matrix, Y replaced by coefficients, and the function returns * ssq/(n-m) ********************************************************************/ function fit(X, Y) { [mx, nx, mnx] = X:size(); [my, ny, mny] = Y:size(); if (mnx != mny) return Y:mlfit(X); sx = X:sum(); sy = Y:sum(); sxoss = sx / mnx; T = X - sxoss; st2 = T * T; st2 = st2:sum(); b = T * Y; b = b:sum(); b /= st2; a = (sy - sx*b) / mnx; siga = sqrt((1.0 + sx*sx/(st2*mnx))/mnx); sigb = sqrt(1.0/st2); chi2 = Y - a - b*X; chi2 *= chi2; chi2 = chi2:sum(); sigdat = sqrt(chi2/(mnx-2)); siga = sigdat; sigb = sigdat; return [a, b, siga, sigb, chi2]; }