program multtest(distributions, multtestp, list, xyin, key, output); (* multtest: multiple Student's t-test Dr. Thomas D. Schneider National Institutes of Health National Cancer Institute Center for Cancer Research Nanobiology Program Molecular Information Theory Group Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu (use only if first address fails) http://www.ccrnp.ncifcrf.gov/~toms/ *) label 1; (* end of program *) const (* begin module version *) version = 1.21; (* of multtest.p 2007 Mar 21 2007 Mar 21, 1.21: upgrade simpson 2001 Jun 14, 1.20: documentation upgrade 1.19, 2000 April 19: working version origin from version 1.07 of ttest.p 1996 September 30 origin of ttest was probably 1994 January 15 *) (* end module version *) (* begin module describe.multtest *) (* name multtest: multiple Student's t-test synopsis multtest(distributions: in, multtestp: in, list: out, xyin: out, key: out, output: out) files distributions: parameters to control the program: A set of 3 numbers on one line defines a distribution: N (integer) mean (real) standard deviation (real) This file contains a series of such distributions, one per line. fudgefactor: If a line begins with 'f' then all standard deviations from that point on (until the next 'f') will be multiplied by the number following on the same line as f. This allows one to see the effects if the standard deviation does not account for all uncertainties. Lines that begin with '*' are comments. multtestp: a single character OR pairs of numbers, one per line, that represent the distributions to be compared by a ttest. The distributions listed are the only ones output to the list file. controlchar: If a line begins with a character then: l: compute the natural log of the mean and adjust the standard deviation accordingly (divide by the mean). c: set the color transform. The next character is the colortype: the colors produced in xyin are adjusted: n: normal colors Hue is set to the probability. i: interval colors. The 'i' must be followed by the number of intervals to truncate the colors to. Hue is set to the truncated probability. For example, if the number of intervals is 2, then the colors will be 0.0 to 0.5 and 0.5 to 1. s: standard interval colors. The colors will cut off at the points 0.50, 0.90 to 0.95 and 0.99. list: Input values, calculated Student's t value and probabilities that the numbers are the same, along with the Z score for the energy deviation from zero, and the probability that this deltadeltaG is zero. The pairs to be listed are defined in the multtestp file. xyin: input to xyplo program: column 1: (integer) first distribution number column 2: (integer) second distribution number column 3: (integer) hue fraction column 4: (integer) saturation fraction = 1 column 5: (integer) brightness fraction = 1 The hue is computed from the probability according to the colortype defined in multtestp. key: input to xyplo program, same colomuns but generates the key for the probabilities. output: messages to the user description This program performs T test computations for pairs of distributions and converts them to probabilities that the means are the same. This is a two-tailed Student's t test. examples For a multtestp file: 1 2 with a distributions file: 3 118 36 3 205 26 The resulting list file is: multtest 1.02 distribution 1 | distribution 2 number 3 | 3 mean 118.00000 | 205.00000 standard dev. 36.00000 | 26.00000 sigma-D = 25.63851 degrees of freedom = 4 t = 3.39333 p(1=2) = 0.02745 This is the probability that they are the same. documentation @book{Press1989, author = "W. H. Press and B. P. Flannery and S. A. Teukolsky and W. T. Vetterling", title = "Numerical Recipies in Pascal. The Art of Scientific Computing", publisher = "Cambridge University Press", address = "Cambridge", year = "1989"} Given a t value from a Student's t test, and the degrees of freedom, df, return the probability for a two tailed test. The code for doing this was originally in java script, from: Richard Lowry Department of Psychology Vassar College Poughkeepsie, NY 12604-0396 USA office: (914)437-7381 fax: (914)437-7538 lowry@vassar.edu http://faculty.vassar.edu/~lowry/VassarStats.html The original functional html containing this code is also given. It was translated to Pascal by Tom Schneider. see also {The source program:} ttest.p {The xyin file can be graphed using:} xyplo.p {Results can be checked at:} http://www.statsol.com/tools/stattools/ttestindependenttool.html http://faculty.vassar.edu/~lowry/VassarStats.html {The method of conversion to a log scale comes:} calc.p author Thomas Dana Schneider bugs technical notes *) (* end module describe.multtest *) (* begin module calc.const *) defrwidth = 9; (* default width of numbers *) defrdecimal = 2; (* default decimal places of numbers *) zbound = 38; (* largest z that the g function can tolerate without bombing The value zbound = 38 works for a Sun Sparc workstation. *) pmstring1 = ' \pm '; (* latex format *) pmstring2 = ' +/- '; (* regular format *) (* end module calc.const *) type (* from calc.p *) range = record (* representation of x +/- error *) estimate: real; (* the central value x *) error: real; (* the most probable values above and below x *) end; var distributions, multtestp, list, xyin, key: text; (* files used by this program *) (* calc variables *) showingerrors: boolean; (* true if error ranges are shown *) { not used: latex: boolean; (* if true show "\pm" otherwise use "+/-" for range. *) } pmstring: packed array[1..5] of char; (* string to show depending on latex variable *) fromto: boolean; (* true = [5 to 11] display, false = 8 +/- 3 display *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 'matmod 2.07 2007mar21 tds/gds'; *) (* begin module writerange *) procedure writerange(var fout: text; r: range; width, decimal: integer; exponential: boolean); (* write the range to fout, using the given width and decimal places for the numbers. If exponential is true, then use exponential form *) var zerodecimal: boolean; (* true if there are to be no decimal places *) procedure makeplusminus; begin if exponential then write(fout, r.estimate:width, pmstring, r.error:width) else if zerodecimal then write(fout, round(r.estimate):width, pmstring, round(r.error):width) else write(fout, r.estimate:width:decimal, pmstring, r.error:width:decimal) { then write(fout, r.estimate:width ,' +/- ', r.error:width ) else if zerodecimal then write(fout, round(r.estimate):width,' +/- ', round(r.error):width) else write(fout, r.estimate:width:decimal,' +/- ', r.error:width:decimal) } end; procedure makefromto; begin write(fout,'['); if exponential then write(fout,(r.estimate-r.error):width ,' to ', (r.estimate+r.error):width ) else if zerodecimal then write(fout,round(r.estimate-r.error):width,' to ', round(r.estimate+r.error):width) else write(fout,(r.estimate-r.error):width:decimal,' to ', (r.estimate+r.error):width:decimal); write(fout,']') end; begin (* writerange *) zerodecimal := (decimal <= 0); if showingerrors then begin if fromto then makefromto else makeplusminus end else begin if exponential then write(fout, r.estimate:width ) else if zerodecimal then write(fout, round(r.estimate):width) else write(fout, r.estimate:width:decimal) end end; (* writerange *) (* end module writerange *) (* begin module timesrange *) procedure timesrange(a, b: range; var atimesb: range); (* multiply a by b and put result in atimesb *) begin atimesb.estimate := a.estimate * b.estimate; if (a.estimate = 0) or (b.estimate = 0) then begin writeln(output,'WARNING: one of the estimates to be multiplied is zero'); writeln(output,'so the error will be set to zero'); atimesb.error := 0 end else atimesb.error := abs(atimesb.estimate) * sqrt( sqr(a.error/a.estimate) +sqr(b.error/b.estimate) ) end; (* end module timesrange *) (* begin module dividerange *) procedure dividerange(a, b: range; var aoverb: range); (* divide a by b and put result in aoverb *) begin aoverb.estimate := a.estimate / b.estimate; if (a.estimate = 0) or (b.estimate = 0) then begin writeln(output,'WARNING: one of the estimates to be divided is zero'); writeln(output,'so the error will be set to zero'); aoverb.error := 0 end else aoverb.error := abs(aoverb.estimate) * sqrt( sqr(a.error/a.estimate) +sqr(b.error/b.estimate) ) end; (* end module dividerange *) (* begin module logrange *) procedure logrange(a: range; base: real; var loga: range); (* take log base of a as loga *) begin loga.estimate := ln(a.estimate)/ln(base); loga.error := a.error/ a.estimate ; end; (* end module logrange *) (* begin module simpson *) procedure simpson(upper : real; var answer,tol : real); (* Perform a numerical integration of the normal distribution using Simpson's rule. The variable "lower" is the lower bound of the integration. The variable "upper" is the the upper bound of the integration. "answer" is the value of the integration, and "tol" is the tolerance of the integration procedure, and thus the error of the calculation. Written by Mark Shaner, 1992. Taken from Miller, Alan R. PASCAL PROGRAMS FOR SCIENTISTS AND ENGINEERS. SYBEX Inc., 1981. pgs 260-291 2007 Mar 21: Tom Schneider: Using a tolerance of 1/maxint caused an infinite loop on a 64 bit machine. Presumably it was attempting to compute the value to 20 or so decimal places. To avoid this, the tolerance is set to a more reasonable value. *) var i (* a counter *): integer; x,x1,x2, (* the independent variables for calculating the value of functions *) pi, (* the value of pi *) val, (* the value of 1/sqrt(2*pi), it is used in calculating the value of the gaussian for any x value, and is defined as a variable in order to speed calculations. *) deltax, (* the distance between every x value and the subsequent one *) evensum, (* the sum of the area under each of the even numbered parabolas *) oddsum, (* the sum of the area under each of the odd numbered parabolas *) endsum, (* the sum of the area under the first and last parabolas *) endcor, (* the value of the end correction, it is determined using dgauss *) sum1 : real; (* a place to store the previous sum so that it can be compared with the subsequent sum to determine if the tolerance level has been reached *) pieces : integer; (* the number of parabolas under the curve *) lower, (* the lower bound of the integration *) sum :real; (* the value of the area under the curve *) begin pi := 4.0*arctan(1.0); val := 1/sqrt (2*pi); (* activate both files to be used *) lower := 0; upper := abs(upper); { tol := 1/maxint; } tol := 1.0E-6; pieces := 2; deltax := (upper-lower)/pieces; x := lower + deltax; oddsum := val*exp(-0.5*x*x); evensum := 0.0; x1 := lower; x2 := upper; endsum := val*exp(-0.5*x1*x1) + val*exp(-0.5*x2*x2); endcor := -(x1*val)*exp(-0.5*x1*x1) - (-(x2*val)*exp(-0.5*x2*x2)); sum := (endsum + 4.0*oddsum)*deltax/3.0; repeat pieces := pieces*2; sum1 := sum; deltax := (upper - lower)/pieces; evensum := evensum + oddsum; oddsum := 0.0; for i := 1 to (pieces div 2) do begin x := lower + deltax*(2.0*i-1.0); oddsum := oddsum + val*exp(-0.5*x*x); end; sum := (7.0*endsum + 14.0*evensum + 16.0*oddsum + endcor*deltax) *deltax/15.0; until abs(sum-sum1) <= abs(tol*sum); answer := (0.5 - sum); if answer < 0.0 then answer := 0.0; (* safety for roundoff errors *) (* writeln (upper:7:5,' ',answer:7:5,'+/-',tol) *) end; (* end module simpson version = 'matmod 2.07 2007mar21 tds/gds'; *) (******************************************************************************) (* begin module ttestprobability *) function ttestprobability(t, df: real): real; (* Given a t value from a Student's t test, and the degrees of freedom, df, return the probability for a two tailed test. The code was originally in java script, from: Richard Lowry Department of Psychology Vassar College Poughkeepsie, NY 12604-0396 USA office: (914)437-7381 fax: (914)437-7538 lowry@vassar.edu http://faculty.vassar.edu/~lowry/VassarStats.html The original functional html containing this code is given below the Pascal. It was translated to Pascal by: Dr. Thomas D. Schneider National Cancer Institute Laboratory of Experimental and Computational Biology Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu http://www.lecb.ncifcrf.gov/~toms/ It is tested carefully in: testttestprobability. *) var pi: real; (* ration of circle circumfrence to diameter *) pj2: real; (* pi/2 *) pj4: real; (* pi/2 *) pi2: real; (* 2*pi *) e: real; (* e *) dgr: real; (* 180/pi *) function zip(q,i,j,b: real): real; var k: real; zz, z: real; begin zz := 1; z := zz; k := i; while (k<=j) do begin zz := zz*q*k/(k-b); z := z+zz; k := k+2 end; zip := z; end; function buzz(t,n: real): real; var rt, fk, ek, dk : real; begin t :=abs(t); rt :=t/sqrt(n); fk :=arctan(rt); if (n=1) then buzz := 1-fk/pj2 else begin ek := sin(fk); dk := cos(fk); if ((round(n) mod 2)=1) then buzz := 1-(fk+ek*dk*zip(dk*dk,2,n-3,-1))/pj2 else buzz := 1-ek*zip(dk*dk,1,n-3,-1) end end; { NOT USED function Abuzz(p,n: real): real; var v, dv, t: real; begin v := 0.5; dv := 0.5; t := 0; while (dv>1e-6) do begin t := 1/v-1; dv := dv/2; if(buzz(t,n)>p) then v := v-dv else v := v+dv end; Abuzz := t end; } begin (* ttestprobability *) pi := 4.0*arctan(1.0); pj2 := pi/2; pj4 := pi/4; pi2 := 2*pi; e := exp(1); dgr := 180/pi; ttestprobability := 1.0-(buzz(t, df)/2.0); end; (* ttestprobability *) (* The material below this point is a functioning javascript program that can be used as a web page. ******************************************************************************** t to P

version = 2.00 of t2p.html 2000 April 18
  t =
df =
  P (one-tailed)
  P (two-tailed)
0 then saturation := 1/p else saturation := 0.0; saturation := 0.8; if p > 0 then saturation := 1/(p*p) else saturation := 1.0; if p < 0.1 then if p > 0 then saturation := p else saturation := 0.0 else saturation := p; if p < 0.1 then saturation := 0.0 else saturation := p; 2000 Apr 4 Kd color scheme: saturation := 10*p; if saturation > 1.0 then saturation := 1.0; (* set saturation to 1 if p > 0.02, otherwise ramp down to zero. This sets the lowest color to white. *) saturation := 500000*p*p*p; (* 1% cutoff method:*) if p > 0.010 then saturation := 1.0 else saturation := 0.1 (* show probability of inequality *) if p < 0.02 then begin hue := p*50; saturation := 1.0; brightness := 1.0; end; else begin hue := 1.0; saturation := 0.0; brightness := 1.0; end; (* 0.02 cutoff, used with energy fig 2000 Apr 4 *) saturation := 50*p; if saturation > 1.0 then saturation := 1.0; (* 0.02 cutoff *) saturation := 50*p*p; if p > 0.05 then saturation := 1.0; if saturation > 1.0 then saturation := 1.0; } (* 95 % confidence limits *) if p > (1 - 0.95) then begin (* do colors *) saturation := 1.0; brightness := 1.0; symbol := 'c'; end else begin (* do hollowsquare *) saturation := 0.0; brightness := 0.0; symbol := 'b'; end; end; procedure showcolor(var xyin: text; d1, d2: real; symbol: char); (* show the color to xyin at coordinate (d1, d2).*) begin write(xyin, (d1+colorshift):wid:dec, ' ',(d2+colorshift):wid:dec, ' ',hue:wid:dec, ' ',saturation:wid:dec, ' ',brightness:wid:dec); if symbol = 'c' then write(xyin, ' ',symbol) else write(xyin, ' hollowsquare'); writeln(xyin); { writeln(xyin, (d1+colorshift):wid:dec, ' ',(d2+colorshift):wid:dec, ' ',hue:wid:dec, ' ',saturation:wid:dec, ' ',brightness:wid:dec); writeln(xyin, d1:1, ' ',d2:1, ' ',hue:wid:dec, ' ',saturation:wid:dec, ' ',brightness:wid:dec); } end; procedure compute; (* compute the t and p values using the global variables *) begin df := n[d1] + n[d2] - 2; sigmad := sqrt(sqr(s[d1])/n[d1] + sqr(s[d2])/n[d2]); t := abs(m[d1] - m[d2])/sigmad; p := 2.0*(1.0 - ttestprobability(t, df)); (* two tailed test!! *) end; procedure computetheenergy(var afile: text); (* compute the energy for d1 minus d2 *) const baseofnaturallogs = 2.718281828459045; var Z: real; (* Z score for energy deviation from zero *) p: real; (* probability that the energy deviation is zero *) tol: real; (* tolerance of p result *) begin (* compute the energy *) showingerrors := true; pmstring := pmstring1; r1.estimate := m[d1]; r1.error := s[d1]; r2.estimate := m[d2]; r2.error := s[d2]; dividerange (r1, r2, r1overr2); logrange (r1overr2, baseofnaturallogs, logr1overr2); (* energyfactor converts to bits and then to kcal/mole: *) energyfactor.estimate := 1.0 /(ln(2) * 2.3424); energyfactor.error := 0.0; timesrange(logr1overr2, energyfactor, deltadeltaG); write(afile,'deltadeltaG energy, deltaG[',d1:1, '] - deltaG[',d2:1,']: $'); { write(afile,'deltadeltaG energy, d1 - d2: $');} { writerange(afile,deltadeltaG, round(wid/2), dec, false);} writerange(afile,deltadeltaG, 1, 2, false); writeln(afile,'$ kcal/mole'); { write(afile,' r1/r2:');} { writerange(afile,r1overr2, wid, dec, false);} { writeln(afile);} {zzz} Z := (deltadeltaG.estimate - 0)/deltadeltaG.error; simpson(Z, p, tol); writeln(afile,'Z score for energy deviation from zero: ', Z:1:2); writeln(afile,'probability that this deltadeltaG is zero: ', p:1:2); {recompute} { logrange (r1overr2, 2, logr1overr2); (* energyfactor converts to bits and then to kcal/mole: *) energyfactor.estimate := 1.0 /(2.3424); energyfactor.error := 0.0; timesrange(logr1overr2, energyfactor, deltadeltaG); write(afile,'deltadeltaG energy, d1 - d2: $'); writerange(afile,deltadeltaG, round(wid/2), 2, false); writeln(afile,'$ kcal/mole'); } {zzz} end; begin writeln(output,'multtest ',version:4:2); rewrite(list); writeln(list,'multtest ',version:4:2); (* read in the distributions *) fudgefactor := 1.0; reset(distributions); d := 0; while not eof(distributions) do begin if distributions^ in ['0','1','2','3','4','5','6','7','8','9',' '] then begin d := d + 1; if d > maxdistrib then begin writeln(output,'cannot have more than ',maxdistrib:1,' distributions'); halt; end; readln(distributions,n[d],m[d],s[d]); s[d] := fudgefactor*s[d]; (* fudge factor for uncertainties *) end else if distributions^ = '*' then readln(distributions) else begin (* set fudge factor *) if distributions^ <> 'f' then begin writeln(output,'distributions can only contain numbers or f(udge)'); halt; end; get(distributions); readln(distributions,fudgefactor); writeln(output,'Fudge factor is set to: ',fudgefactor:wid:dec); end; end; colortype := 'n'; intervals := 20; dolog := false; reset(multtestp); while not eof(multtestp) do begin if multtestp^ in ['0','1','2','3','4','5','6','7','8','9',' '] then begin readln(multtestp, d1, d2); if (d1 > d) or (d2 > d) or (d1 < 1) or (d2 < 1) then begin writeln(output, 'd1 (=',d1:1,') or', ' d2 (=',d2:1,')', ' is out of the range of distributions, 1 to ' ,d:1); halt; end; compute; { writeln(output, 'compare ',d1:1,' to ', d2:1); } writeln(output, 'd1 = ',d1:2,': ', n[d1]:wid,' ',m[d1]:wid:dec,' ',s[d1]:wid:dec); writeln(output, 'd2 = ',d2:2,': ', n[d2]:wid,' ',m[d2]:wid:dec,' ',s[d2]:wid:dec); writeln(output,'t = ',t:wid:dec); writeln(output,'p(',d1:1,'=',d2:1,') = ',p:wid:dec); write(list,' '); write(list,'distribution ',d1:1); write(list,' | '); write(list,'distribution ',d2:1); writeln(list); write(list,'number '); write(list,n[d1]:wid); write(list,' | '); write(list,n[d2]:wid); writeln(list); write(list,'mean '); write(list,m[d1]:wid:dec); write(list,' | '); write(list,m[d2]:wid:dec); writeln(list); write(list,'standard dev. '); write(list,s[d1]:wid:dec); write(list,' | '); write(list,s[d2]:wid:dec); writeln(list); writeln(list,'sigma-D = ', sigmad:wid:dec); writeln(list,'degrees of freedom = ',df:1); writeln(list,'t = ',t:wid:dec); writeln(list,'p(',d1:1,'=',d2:1,') = ',p:wid:dec); writeln(list,'This is the probability that they are the same.'); computetheenergy(output); writeln(output,'------------------------------------'); computetheenergy(list); writeln(list,'------------------------------------'); writeln(list); end else begin read(multtestp, controlchar); if controlchar = 'c' then begin read(multtestp, colortype); if not (colortype in ['n','i','s','f']) then begin writeln(output,'colortype in multtestp must be one of: nisf'); halt; end; if colortype = 'i' then readln(multtestp, intervals) else if colortype = 'f' then readln(multtestp, fudgefactor) else readln(multtestp); end else if controlchar = 'l' then begin dolog := true; end else begin writeln(output,'unknown control character in multtestp: ', controlchar); halt; end end; end; (* convert to logs if needed *) if dolog then begin writeln(output,'USING LOGS'); for d := 1 to d do begin write(output, 'd= ',d:2, ' n[d] = ',n[d]:1, ' m[d] = ',m[d]:wid:dec, ' s[d] = ',s[d]:wid:dec); s[d] := s[d]/m[d]; { m[d] := ln(m[d])/ln(2); } m[d] := ln(m[d]); write(output, ' m''[d] = ',m[d]:wid:dec, ' s''[d] = ',s[d]:wid:dec); writeln(output); { (* begin module logrange *) procedure logrange(a: range; base: real; var loga: range); (* take log base of a as loga *) begin loga.estimate := ln(a.estimate)/ln(base); loga.error := a.error/ a.estimate ; end; (* end module logrange *) } end; end else begin writeln(output,'NOT USING LOGS'); end; (* create color graph *) rewrite(xyin); writeln(xyin,'* multtest ',version:4:2); writeln(xyin,'* color graph '); saturation := 0.5; brightness := 1.0; for d1 := 1 to d do begin for d2 := 1 to d do begin compute; setcolor; showcolor(xyin,d1,d2,symbol); end; end; (* create color key *) rewrite(key); writeln(key,'* multtest ',version:4:2); writeln(key,'* color key'); d2 := 0; (* start at 0.05 so that there is a white space to put the empty box *) {zzz} p := 0.0; setcolor; showcolor(key,0.4,0.5,'b'); symbol := 'c'; keyrange := 10; for d1 := 6*keyrange to keysmooth*keyrange do begin p := d1/(keysmooth*keyrange); setcolor; showcolor(key,(d1/keysmooth)/10-colorshift,0.5,symbol); end; { OLD: (* create color key *) rewrite(key); writeln(key,'* multtest ',version:4:2); writeln(key,'* color key'); d2 := 0; keyrange := 10; for d1 := 0 to keyrange do begin p := d1/keyrange; setcolor; showcolor(key,d1/10-colorshift,0.5,symbol); end; } { (* create color key, double range *) d1 := 0; keyrange := 20; for d2 := 0 to keyrange do begin p := d2/keyrange; setcolor; showcolor(key,d1,d2,symbol); end; } { hand tests t := 63.7; t := 31.8; t := 12.7; t := 3.8; p := ttestprob(df,t); writeln(list,' t = ',t:wid:dec); writeln(list,' p = ',p:wid:dec); writeln(list,'1-p = ',(1-p):wid:3); } end; (* end module multtest.themain *) begin { testttestprobability; } themain(distributions, multtestp, list, xyin, key); 1: end.