program makegauss(histog, makegaussp, gfit, output); (* makegauss: make a gaussian distribution from a histogram Dr. Thomas D. Schneider National Institutes of Health National Cancer Institute Center for Cancer Research Nanobiology Program Molecular Information Theory Group Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu (use only if first address fails) http://www.ccrnp.ncifcrf.gov/~toms/ *) label 1; (* end of program *) const (* begin module version *) version = 1.01; (* of makegauss.p 2007 Apr 15 2007 Apr 15: 1.01: cleanup 2007 Apr 15: 1.00: origin from fitgauss *) updateversion = 1.00; (* defines lowest acceptable current parameter file *) (* end module version *) (* begin module describe.makegauss *) (* name makegauss: make a gaussian distribution from a histogram synopsis makegauss(histog: in, makegaussp: in, gfit: out, output: out) files histog: the output of the genhis program, consisting of comment lines that begin with "*" and data lines that have pairs of bin position and number of items at that bin position. gfit: Same format as histog, but a gaussian distribution that is a least squares fit to the histog data. makegaussp: parameters to control the program. The file must contain the following parameters, one per line: parameterversion: The version number of the program. This allows the user to be warned if an old parameter file is used. mean (real): The desired mean stdev (real): The desired standard deviation maximum (real): The desired height of the peak output: messages to the user description The makegauss program fits a gaussian distribution corresponding to a histogram. The user specifies the parameters for where the gaussian will be put. The output is the same format as the input histogram. examples documentation see also {Program to generate a histogram from data: } genhis.p {Program to smooth a histogram: } smoothis.p {Program to fit a gaussian to data: } fitgauss.p {Example parameter file: } makegaussp author Thomas Dana Schneider bugs technical notes memmax is a constant that defines the largest allowed range. *) (* end module describe.makegauss *) (* begin module makegauss.const *) wid = 10; (* width of numbers to use for data *) dec = 5; (* number of decimal places to use for data *) memmax = 10000; (* largest allowed gfiting range *) (* end module makegauss.const *) var histog, (* file used by this program *) makegaussp, (* file used by this program *) gfit: text; (* file used by this program *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 'delmod 6.16 84 mar 12 tds/gds'; *) (* begin module copyaline *) procedure copyaline(var fin, fout: text); (* copy a line from file fin to file fout *) begin (* copyaline *) while not eoln(fin) do begin fout^ := fin^; put(fout); get(fin) end; readln(fin); writeln(fout); end; (* copyaline *) (* end module copyaline version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module makegauss.themain *) procedure themain(var histog, makegaussp, gfit: text); (* the main procedure of the program *) const debug = false; (* set to true to see debugging output *) debugwid = 5; (* width of numbers to use for debugging data *) debugdec = 2; (* number of decimal places to use for debugging data *) countmax = 10; (* how many data pieces to look at before halting while debugging *) pi = 3.14159265358974; (* used in calculating the gaussian curve *) var mean: real; (* the mean of values in the memory *) count: integer; (* counter for limiting debug output *) points: integer; (* number of points in the histogram *) memory: record (* the histogram to work with *) position: array[1..memmax] of real; (* positions of numbers *) value: array[1..memmax] of real; (* memory of numbers *) gauss: array[1..memmax] of real; (* memory of gaussian curve *) end; p: integer; (* index to the position in the histogram *) parameterversion: real; (* parameter version number *) range: integer; (* range to average over *) total: real; (* current total in the memory *) sumsquare: real; (* sum of squares *) stdev: real; (* sum of squares *) sumrange: real; (* sum range values *) sumsqrange: real; (* sum square range values *) weighted: real; (* partially computed weighting value *) s: real; (* partially computed weighting value *) (* stuff for gaussian: *) d1, (* first denominator, = 2*sd**2 *) d2, (* second denominator, = sd*sqrt(2*pi) *) x, (* the position for which the expectation is calculated *) ex: real; (* probability of getting a value in the interval *) number: real; (* predicted gaussian value *) interval: real; (* interval for computing probabilty *) entries: real; (* number of original data points *) printing: boolean; (* whether to print *) done: boolean; (* done finding best fit *) deviation: real; (* deviation from best fit *) newdeviation: real; (* newly computed deviation *) currentmean: real; (* current mean *) currentstdev: real; (* current stdev *) currententries: real; (* current entries *) fractionstep: real; (* the fractional step *) watchprogress: boolean; (* watch the gaussian curve converge? *) tolerance: real; (* fractional tolerance for convergence *) delta: real; (* current fractional tolerance for convergence *) maxsteps: integer; (* maximum number of steps to converge *) maximum: real; (* maximum of curve at mean NEW *) {zzz} procedure vary(var p: real; fraction: real); (* vary the variable p by some fractional amount *) var seed: real; (* a random number *) variation: real; (* the amount of variation *) begin { seed := random(0); } variation := 2.0 * (seed - 0.5) * fraction; { writeln(output,'seed = ',seed:1:dec, ' variation = ',variation:1:dec); } p := p + p * variation; end; procedure computegauss; (* compute gaussian curve, store in memory *) var p: integer; (* index to the position in the histogram *) begin d1 := 2 * stdev*stdev; d2 := stdev * sqrt(2 * pi); { interval := memory.position[2] - memory.position[1]; } interval := maximum; for p := 1 to points do begin ex := interval * exp( -(memory.position[p] - mean) * (memory.position[p] - mean)/d1); { * (memory.position[p] - mean)/d1) / d2; memory.gauss[p] := ex * entries; } memory.gauss[p] := ex; end; end; procedure printgauss; (* print gaussian curve from memory *) var p: integer; (* index to the position in the histogram *) begin for p := 1 to points do begin writeln(gfit, memory.position[p]:wid:dec, ' ',memory.gauss[p]:wid:dec, ' ',memory.value[p]:wid:dec); end; end; procedure computedeviation(var deviation: real); (* compute the least squares deviation of the gaussian curve from the observed values *) var p: integer; (* index to the position in the histogram *) diff: real; (* a single difference *) sum: real; (* sum of differences squared *) begin deviation := 0.0; for p := 1 to points do begin diff := memory.value[p] - memory.gauss[p]; deviation := deviation + diff * diff; end; { writeln(output,'deviation is: ',deviation:wid:dec); } end; {zzz} begin writeln(output,'makegauss ',version:4:2); reset(makegaussp); readln(makegaussp, parameterversion); if parameterversion < updateversion then begin writeln(output, 'You have an old parameter file!'); halt end; { readln(makegaussp, range); if range < 1 then begin writeln(output,'range must be positive'); halt end; if range > memmax then begin writeln(output,'range must be less than memmax (',memmax:1,')'); halt end; } readln(makegaussp, mean); readln(makegaussp, stdev); readln(makegaussp, maximum); reset(histog); rewrite(gfit); writeln(gfit,'* makegauss ',version:4:2); { writeln(output,'range is ',range:1); } points := 0; total := 0; sumsquare := 0; sumrange := 0; sumsqrange := 0; if debug then count := 0; (* read in the histogram *) while not eof(histog) do begin if (eoln(histog)) or (histog^ = '*') then begin copyaline(histog, gfit); end else begin (* pull in the new value *) points := succ(points); if points > memmax then begin writeln(output,'histogram must be shorter than memmax (', memmax:1,')'); halt end; readln(histog, memory.position[points], memory.value[points]); (* add in the new value to the running total: *) weighted := memory.value[points]*memory.position[points]; total := total + weighted; entries := entries + memory.value[points]; sumrange := sumrange + memory.value[points]; { sumsquare := sumsquare + (weighted * weighted); sumsqrange := sumsqrange + memory.value[points]*memory.value[points]; } if debug then writeln(output, memory.position[points]:wid:dec, ' ',memory.value[points]:wid:dec); if debug then write(output, ' nverage: ', mean:debugwid:debugdec, ' total: ',total:debugwid:debugdec); {zzz} if debug then writeln(output); {zzz} if debug then count := succ(count); if debug then if count >= countmax then halt; end end; { mean := total / sumrange; } { stdev := (sumsquare/sumsqrange) - (mean * mean); } writeln(output,'mean = ',mean:wid:dec); writeln(output,'entries = ',entries:wid:dec); { writeln(output,'stdev = ',stdev:wid:dec); } { (* figure out an initial standard deviation. I can't seem to get it right the simpler way ... *) sumsquare := 0.0; for p := 1 to points do begin s := (memory.value[p]*memory.position[p] - mean); sumsquare := sumsquare + s; end; stdev := (sumsquare/sumrange); } writeln(output,'stdev = ',stdev:wid:dec); writeln(output,'maximum = ',maximum:wid:dec); watchprogress := false; tolerance := 0.00005; fractionstep := 0.05; maxsteps := 10000; computegauss; { if watchprogress then printgauss; (* now vary the mean, the stdev and the entries to find the least squares fit to the memory curve *) currentmean := mean; currentstdev := stdev; currententries := entries; done := false; count := 0; computedeviation(deviation); while not done do begin vary(mean, fractionstep); vary(stdev, fractionstep); vary(entries, fractionstep); computegauss; computedeviation(newdeviation); if newdeviation < deviation then begin delta := (deviation - newdeviation)/deviation; if delta < tolerance then begin done := true; writeln(output,'reached tolerance: ',delta:wid:dec); end; (* writeln(output,'accepted!'); *) currentmean := mean; currentstdev := stdev; currententries := entries; deviation := newdeviation; if watchprogress then printgauss; end else begin (* writeln(output,'failed!'); *) (* reset the parameters: *) mean := currentmean; stdev := currentstdev; entries := currententries; end; count := succ(count); (* writeln(output,count:1); *) if count > maxsteps then done := true; end; } if not watchprogress then printgauss; end; (* end module makegauss.themain *) begin themain(histog, makegaussp, gfit); 1: end.