/* Output from p2c 1.21alpha-07.Dec.93, the Pascal-to-C translator */ /* From input file "genhis.p" */ #include /* generalized histogram program by Gary Stormo modified by: Dr. Thomas D. Schneider National Institutes of Health National Cancer Institute Center for Cancer Research Nanobiology Program Molecular Information Theory Group Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu (use only if first address fails) http://www.ccrnp.ncifcrf.gov/~toms/ module libraries required: delman, delmods */ /* end of program */ /* begin module version */ #define version 1.88 /* of genhis.p 2007 Aug 29 2007 Aug 29, 1.88: qqq 2007 Aug 29, 1.87: curve plot documentation inside file 2007 Aug 29, 1.86: curve plot as a real number, not integer! 2007 Aug 03, 1.85: give 0 anyway when can't compute uncertainty 2007 Jul 31, 1.84: bomb on having only one value in data 2007 Jun 14, 1.83: tolerance limit to exp in gaushist solves bug 2007 Jun 14, 1.82: crash on reading column 6 of rsdata file? error in exponentiation (Result too large) (error #700 at 1af03) 2006 Nov 15, 1.81: provide gaussian data 1.80, 2003 Jul 31: tabs now counted as blanks 1.79, 2002 Nov 1: bug: infinite loop with only one data item! 1.78, 2002 Feb 5: protect against bad non-numerical data 1.77, 2002 Feb 5: upgrade documentation 1.76, 2002 Feb 5: allow blank lines in the file. 1.75, 1999 Dec 3: produce curve file 1.74, 1997 Nov 20: previous changes origin before 1984 jan 6 */ /* end module version */ /* begin module describe.genhis */ /* name genhis: general histogram plotter synopsis genhis(data: in, histog: out, genhisp: in, curve: out, output: out) files data: File of numbers to be histogrammed. Header lines that begin with '*' may be copied from this file to 'histog' or may be skipped. The column from which to read the data may also be specified. See the description of the file 'genhisp' to see how to do this. Once the data region has begun, (that is, there is at least one non '*' line), then lines that begin with '*' are also skipped. Lines that are blank or for which there *is* no data column (because one asked for a column further out than is supplied) are skipped. Data are only separated by spaces or by tabs. Other control characters are not separaters. histog: the output histogram. contains the header lines copied from file 'data', plus data about the numbers (min, max, mean, variance and st. dev.), and the plot. may also contain a standard plot. genhisp: parameter input file. this is used to change any of the parameters from default values. any may be changed and they can be specified in any order. the first character on a line tells what parameter is to be set, the other information sets it. the parameters that can be changed, and their line codes: h - sets header reading; this is followed by two integers, the first specifying the number of lines to copy and the second the number of lines to skip; if the first number is <0 those lines beginning with '*' are copied; default is -1 0. c - sets the column of data that is to be analyzed and plotted; the default is column 1; note: a column is any string of nonblank characters; columns are separated by blanks; p - sets the standard plot; poisson and gaussian plots are available and are specified by following the p by either p or g. NEW: 2006 Nov 15: Capital P or G means provide the curve data in the histog. x - sets the x-axis scale; this is to be followed by either an n or an s, and then a number; if n, then the number of intervals on the x-axis is set; if s, then the size of intervals is set; default is to set the number of intervals to constant 'defslots'. r - sets the range of data to be plotted; this is followed by two numbers which specify the subrange of the data for which the plot is desired; default is to plot all the data. curve: gives the standard plot curve as position and value output: for messages to the user. description This program takes numerical data from a file and plots a histogram of those data. It also calculates the min, max, mean and variance of the data. If desired, the user may get a standard plot, based on the mean and variance, plotted along with the data. The user may specify the size or number of intervals on the x-axis. The y-axis is automatically scaled to fit on a page. The scaling factor is reported to the user. example try file datat7 with genhisp of x n 20 p g see also {Program to plot the graph in postscript:} genpic.p {An x-y plotting program:} xyplo.p author Gary Stormo bugs Try different x axis intervals: regular spikes can be data artifacts! MAJOR BUG: If the data column has identical values, then the program goes into an infinite loop and will generate an infinite sized histog file. Fortunately this is not an interesting histogram ... technical notes The constant 'pageheight' is used to set the scaling factor so that the plots do not exceede the size of a page. As of 1994 Aug 10 the SEM is calculated by dividing the standard deviation by sqrt(n-1). */ /* end module describe.genhis */ /* begin module universal.const */ #define e 2.71828459045 #define pi 3.14159265358974 /* end module universal.const */ /* begin module histogram.const */ #define maxslots 20000 /* maximum number of slots in the histogram */ #define defslots 100 /* the default number of slots */ #define pageheight 53 /* was 105; */ /* room on page for plotting; used to set scaling. pageheight should be set to 27 less than the number of spaces allowed across a page for your printer. */ #define dch '+' /* character for plotting data */ #define sch ':' /* character for plotting standard */ #define bch '*' /* character for plotting coincidence of standard and data */ #define wid 12 /* width of numbers to use for data */ #define dec 8 /* number of decimal places to use for data */ /* end module histogram.const */ /* begin module histogram.type */ typedef enum { none, gaussian, poisson } plots; /* the types of standard plots that can be done */ typedef enum { start, stop } endpoints; /* defines the range for plotting */ typedef struct histarray { long numbers[maxslots]; /* rounded histogram arrays */ double rnumbers[maxslots]; /* histogram as reals */ double range[2]; /* range of the data recorded */ double interval; /* the size of the histogram slots */ long slots; /* the number of slots */ } histarray; /* end module histogram.type */ /* begin module genhis.var */ Static _TEXT genhisp; /* parameter file */ Static _TEXT data; /* input file of numbers */ Static _TEXT histog; /* output histogram */ Static _TEXT curve; /* output standard plot */ /* end module genhis.var */ /* begin module histogram.var */ Static histarray dataarray; /* the histogram of the data */ Static histarray stanarray; /* histogram of standard values */ Static long hlines; /* the number of header lines in the numbers file */ Static long column; /* of the data file to read */ Static long entries; /* the number of numbers in the file */ Static double min; /* minimum of the numbers */ Static double max; /* maximum of the numbers */ Static double ave; /* the average (mean) number */ Static double scaling; /* for plotting on the page */ Static double vari; /* the variance of the numbers */ Static double uncertainty; /* Shannon's H function for the frequencies */ Static plots standplot; /* type of standard plot to use */ Static boolean splot; /* true if we plot a standard curve with the data */ Static boolean curvedata; /* give curve data */ Static jmp_buf _JL1; /* end module histogram.var */ /* begin module package.primitive */ /* ************************************************************************ */ /* begin module halt */ Static Void halt() { /* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. */ printf(" program halt.\n"); longjmp(_JL1, 1); } /* end module halt version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module copyaline */ Static Void copyaline(fin, fout) _TEXT *fin, *fout; { /* copy a line from file fin to file fout */ while (!P_eoln(fin->f)) { putc(P_peek(fin->f), fout->f); getc(fin->f); } fscanf(fin->f, "%*[^\n]"); getc(fin->f); putc('\n', fout->f); } /* copyaline */ /* end module copyaline version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module copylines */ Static long copylines(fin, fout, n) _TEXT *fin, *fout; long n; { /* copy n lines of file fin to file fout. the actual number of lines copied is returned. */ long index = 0; /* the current line number */ while (!BUFEOF(fin->f) && index < n) { copyaline(fin, fout); index++; } return index; } /* copylines */ /* end module copylines version = 'delmod 6.51 85 apr 17 tds/gds' */ /* ************************************************************************ */ /* end module package.primitive version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module skipblanks */ /* 2003 July 31: tab is considered a blank character */ Static boolean isblank(c) Char c; { /* is the character c blank or tab? */ return (c == ' ' || c == '\t'); } Static Void skipblanks(thefile) _TEXT *thefile; { /* skip over blanks until a non-blank, or end of line, is found */ while (isblank(P_peek(thefile->f)) & (!P_eoln(thefile->f))) getc(thefile->f); } Static Void skipnonblanks(thefile) _TEXT *thefile; { /* skip over nonblanks until a blank, or end of line, is found */ while ((!isblank(P_peek(thefile->f))) & (!P_eoln(thefile->f))) getc(thefile->f); } /* OLD PROCEDURES: tab previously was not a blank. procedure skipblanks(var thefile: text); (* skip over blanks until a non-blank, or end of line, is found *) begin while (thefile^ = ' ') and not eoln(thefile) do get(thefile); end; procedure skipnonblanks(var thefile: text); (* skip over nonblanks until a blank, or end of line, is found *) begin while (thefile^ <> ' ') and not eoln(thefile) do get(thefile); end; */ Static Void skipcolumn(thefile) _TEXT *thefile; { /* skip over a data column */ skipblanks(thefile); skipnonblanks(thefile); } /* end module skipblanks version = 4.59; (@ of prgmod.p 2003 Jul 31 */ /* begin module genhis.setparam */ Static Void setparam() { /* read from the parameter file the desired parameters. five parameters can be specified, and each specification is designated by the character that begins the line. unspecified parameters are set to default values. parameters: header lines - start line with 'h', followed by two integers; the first integer sets the number of lines to copy from 'numbers' to 'histog' (if <0 then lines beginning with '*' are copied); the second integer specifies the number of lines to skip over in 'numbers'; defaults are -1, 0, respectively. standard plot - start line with 'p'; if this is followed by a 'g' then a gaussian standard will be plotted, otherwise none. 'P' and 'G' mean to provide the data in histog. x-axis scale - start line with 'x' followed by 'n' or 's' followed by an integer; if 'n' then the number of intervals on the x-axis is set by the integer, if 's' then the size of the intervals is set. data column - start line with 'c' followed by an integer which sets the column in the input file that is to be used for the data; the default is 1. range of plot - start line with 'r' followed by two integers; the first integer specifies at what lower limit of data the plotting is to begin, and the second integer specifies at what upper limit it is to end; the default is all the data. */ Char ch; /* a character for reading parameters */ long c, s; /* the number of lines to copy and skip in the header */ long i; /* generic integer */ if (*data.name != '\0') { if (data.f != NULL) data.f = freopen(data.name, "r", data.f); else data.f = fopen(data.name, "r"); } else rewind(data.f); if (data.f == NULL) _EscIO2(FileNotFound, data.name); RESETBUF(data.f, Char); if (BUFEOF(data.f)) { printf(" data file empty\n"); halt(); } if (*histog.name != '\0') { if (histog.f != NULL) histog.f = freopen(histog.name, "w", histog.f); else histog.f = fopen(histog.name, "w"); } else { if (histog.f != NULL) rewind(histog.f); else histog.f = tmpfile(); } if (histog.f == NULL) _EscIO2(FileNotFound, histog.name); SETUPBUF(histog.f, Char); fprintf(histog.f, "* genhis %4.2f: generalized histogram data from\n", version); /* set the default values for parameters */ hlines = 0; column = 1; standplot = none; dataarray.slots = defslots; /* by setting range[start] > range[stop] we make them get set to the min and max of the data */ dataarray.range[(long)start] = 1.0; dataarray.range[(long)stop] = 0.0; /* change parameters according to user requests */ if (*genhisp.name != '\0') { if (genhisp.f != NULL) genhisp.f = freopen(genhisp.name, "r", genhisp.f); else genhisp.f = fopen(genhisp.name, "r"); } else rewind(genhisp.f); if (genhisp.f == NULL) _EscIO2(FileNotFound, genhisp.name); RESETBUF(genhisp.f, Char); while (!BUFEOF(genhisp.f)) { ch = getc(genhisp.f); if (ch == '\n') ch = ' '; if (ch != 'r' && ch != 'c' && ch != 'x' && ch != 'p' && ch != 'h') { printf("genhisp commands must begin with one of: hpxcr\n"); halt(); continue; } switch (ch) { case 'h': fscanf(genhisp.f, "%ld%ld%*[^\n]", &c, &s); getc(genhisp.f); if (c < 0) { /* we copy lines beginning with '*' to histog */ c = 0; while (P_peek(data.f) == '*') { fprintf(histog.f, "* "); copyaline(&data, &histog); c++; } } else { i = copylines(&data, &histog, c); if (i < c) { printf(" parameter error: header too short\n"); halt(); } } for (i = 1; i <= s; i++) { fscanf(data.f, "%*[^\n]"); getc(data.f); } /* update hlines to the number of lines in the data which have been copied and skipped */ hlines += c + s; break; case 'p': skipblanks(&genhisp); fscanf(genhisp.f, "%c%*[^\n]", &ch); getc(genhisp.f); if (ch == '\n') ch = ' '; if (ch != 'P' && ch != 'G' && ch != 'n' && ch != 'p' && ch != 'g') { printf("genhisp plot command can accept only g G p P or n commands\n"); halt(); } switch (ch) { case 'g': standplot = gaussian; break; case 'G': standplot = gaussian; break; case 'p': standplot = poisson; break; case 'P': standplot = poisson; break; case 'n': standplot = none; break; } if (ch == 'P' || ch == 'G') curvedata = true; else curvedata = false; break; case 'x': skipblanks(&genhisp); ch = getc(genhisp.f); if (ch == '\n') ch = ' '; if (ch == 's') { fscanf(genhisp.f, "%lg%*[^\n]", &dataarray.interval); getc(genhisp.f); if (dataarray.interval <= 0.0) { printf(" error in parameter file: interval size must be positive\n"); halt(); } dataarray.slots = 0; } else if (ch == 'n') { fscanf(genhisp.f, "%ld%*[^\n]", &dataarray.slots); getc(genhisp.f); if (dataarray.slots < 1) { printf(" error in parameter file: number of slots must be positive\n"); halt(); } else if (dataarray.slots > maxslots) { printf(" error in parameter file: number of slots must be less than %ld\n", (long)maxslots); halt(); } } else { printf(" x parameter must be followed by \"s\" or \"n\".\n"); halt(); } break; case 'c': fscanf(genhisp.f, "%ld%*[^\n]", &column); getc(genhisp.f); break; case 'r': fscanf(genhisp.f, "%lg%lg%*[^\n]", dataarray.range, &dataarray.range[(long)stop]); getc(genhisp.f); break; } } /* if we have not copied or skipped any lines in the header, i.e., if hlines = 0, then we copy those lines beginning with '*' */ if (hlines == 0) { while (P_peek(data.f) == '*') { copyaline(&data, &histog); hlines++; } } fprintf(histog.f, "*\n"); } /* end module genhis.setparam */ /* begin module histogram */ /* the following modules contain procedures for calculating and plotting histogram data. */ /* begin module histogram.datahist */ /* end module histogram.datahist */ /* begin module histogram.gaushist */ /* end module histogram.gaushist */ /* begin module histogram.poishist */ /* end module histogram.poishist */ /* begin module histogram.hplot */ /* end module histogram.hplot */ /* end module histogram */ /* begin module histogram.datahist */ Static Void datahist(thefile, hlines, column, entries, min, max, ave, vari, dataarray) _TEXT *thefile; long hlines, column, *entries; double *min, *max, *ave, *vari; histarray *dataarray; { /* skip hlines in thefile to get to the data. now read the data from the specified 'column' (must be an integer). count the number of entries to the end of the file, determine the min, max, sum and average. now determine the size of the intervals or the number of slots, depending on which has been specified. pass through the data a second time, counting the occurrences for each interval and calculating the variance. this procedure uses procedures skipblanks and skipnonblanks. */ double sum = 0.0; /* of the entries */ double num; /* a number read from thefile */ long int_; /* a particular interval */ long i, j; /* indecies */ double frequency; /* a frequency in a slot */ /*BUBBA*/ long bubbaline = 1; /* count of the lines by my friend bubba */ long FORLIM; if (*thefile->name != '\0') { if (thefile->f != NULL) thefile->f = freopen(thefile->name, "r", thefile->f); else thefile->f = fopen(thefile->name, "r"); } else { /* BUBBA starts counting at line 1 */ rewind(thefile->f); } if (thefile->f == NULL) _EscIO2(FileNotFound, thefile->name); RESETBUF(thefile->f, Char); if (BUFEOF(thefile->f)) { printf(" data file is empty\n"); halt(); } for (i = 1; i <= hlines; i++) { fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); bubbaline++; /* BUBBA */ } *entries = 0; /* first pass through the data to calculate min, max and ave */ while (!BUFEOF(thefile->f)) { /* skip lines that begin with '*' */ if (P_peek(thefile->f) == '*') { fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); bubbaline++; /* BUBBA */ continue; } /* get to the proper column for reading the data */ for (i = 2; i <= column; i++) { skipblanks(thefile); skipnonblanks(thefile); } skipblanks(thefile); /* 2002 Feb 5: Oh, come on!! Just skip the darn line!!! if eoln(thefile) then begin writeln(output,'parameter error: data column ', column:1,' does not exist'); writeln(output,entries:1,' entries successfully read'); halt; end; */ /* skip blank lines 2002 Feb 5 */ if (P_eoln(thefile->f)) { fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); bubbaline++; /* BUBBA */ /* writeln(output,'skipped blank line',bubbaline:1); (* BUBBA *) */ continue; } /* writeln(output,'skipping blank line'); */ /* writeln(output,'BUBBA MY OL'' BUDDY!! 2 ',bubbaline:1); (* BUBBA *) writeln(output,'thefile^=',thefile^); */ /* check for bad numerical data: */ if (P_peek(thefile->f) != '9' && P_peek(thefile->f) != '8' && P_peek(thefile->f) != '7' && P_peek(thefile->f) != '6' && P_peek(thefile->f) != '5' && P_peek(thefile->f) != '4' && P_peek(thefile->f) != '3' && P_peek(thefile->f) != '2' && P_peek(thefile->f) != '1' && P_peek(thefile->f) != '0' && P_peek(thefile->f) != '+' && P_peek(thefile->f) != '-') { printf("BUBBA found a BAD NUMBER on line %ld column %ld\n", bubbaline, column); printf("The \"number\" begins with the character \"%c\".\n", P_peek(thefile->f)); printf("BUBBA MY OL' BUDDY!! Saves the day again!\n"); halt(); } fscanf(thefile->f, "%lg%*[^\n]", &num); getc(thefile->f); bubbaline++; /* BUBBA */ /* writeln(output,'skipped blank line BUBBA',bubbaline:1); (* BUBBA *) */ (*entries)++; /* min and max are set to the first entry in the data and then are changed as higher and lower values are read */ if (*entries == 1) { *min = num; *max = num; } else { if (num < *min) *min = num; if (num > *max) *max = num; } sum += num; } if (*entries <= 0) return; *ave = sum / *entries; /* now do the second pass through the data and count the number of data points in each interval */ if (*thefile->name != '\0') { if (thefile->f != NULL) thefile->f = freopen(thefile->name, "r", thefile->f); else thefile->f = fopen(thefile->name, "r"); } else rewind(thefile->f); if (thefile->f == NULL) _EscIO2(FileNotFound, thefile->name); RESETBUF(thefile->f, Char); for (i = 1; i <= hlines; i++) { fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } *vari = 0.0; /* if range[start] > range[stop], that is the signal that we are going to set the range to the whole data */ if (dataarray->range[(long)start] > dataarray->range[(long)stop]) { dataarray->range[(long)start] = *min; dataarray->range[(long)stop] = *max; } /* if slots <= 0, that means we have specified the interval size and we determine the number of slots needed from that, else we know the number of slots and set the interval size */ if (dataarray->slots > 0) dataarray->interval = (dataarray->range[(long)stop] - dataarray->range[(long)start]) / dataarray->slots; /* set the exact number of slots needed for the interval size */ if (dataarray->interval == 0) dataarray->interval = 1.0; dataarray->slots = (long)((dataarray->range[(long)stop] - dataarray->range[(long)start]) / dataarray->interval) + 1; if (dataarray->slots > maxslots) { printf(" the calculated number of slots (%ld) will not fit\n", dataarray->slots); printf(" into the maximum number of slots (constant maxslots = %ld)\n", (long)maxslots); halt(); } FORLIM = dataarray->slots; /* clear the numbers */ for (i = 0; i < FORLIM; i++) dataarray->numbers[i] = 0; FORLIM = dataarray->slots; for (i = 0; i < FORLIM; i++) dataarray->rnumbers[i] = 0.0; /* read in the data */ /* for i := 1 to entries do begin This old method fails because the entries do not correspond directly to the number of lines if * lines are to be skipped! */ while (!BUFEOF(thefile->f)) { /* skip lines that begin with '*' */ if (P_peek(thefile->f) == '*') { fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); continue; } /* get to the proper column for reading the data */ for (j = 2; j <= column; j++) { skipblanks(thefile); skipnonblanks(thefile); } /* 2002 Feb 5: skip blank lines */ if (P_eoln(thefile->f)) { fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); continue; } fscanf(thefile->f, "%lg%*[^\n]", &num); getc(thefile->f); /* if num is within the range we put it into the proper interval */ if (num >= dataarray->range[(long)start] && num <= dataarray->range[(long)stop]) { int_ = (long)((num - dataarray->range[(long)start]) / dataarray->interval) + 1; dataarray->numbers[int_-1]++; } *vari += (*ave - num) * (*ave - num); } *vari /= *entries - 1; /* calculate the uncertainty of the distribution */ uncertainty = 0.0; FORLIM = dataarray->slots; for (i = 0; i < FORLIM; i++) { if (dataarray->numbers[i] > 0.0) { frequency = (double)dataarray->numbers[i] / *entries; uncertainty -= frequency * log(frequency); } } uncertainty /= log(2.0); } #define pi_ 3.14159265358974 /* used in calculating the curve */ #define tolerance (-700) /* end module histogram.datahist */ /* begin module histogram.gaushist */ Static Void gaushist(entries, ave, vari, stanarray) long entries; double ave, vari; histarray *stanarray; { /* fill the stanarray with values assuming the data fits a gaussian distribution. the equation used is from bevington, p.r., 'data reduction and error analysis for the physical sciences', p. 44: prob(x) = exp[-1/2((x-ave)/sd)**2]/sd*sqrt(2*pi). where prob(x) is the probability of getting the value x. to determine the expected number of values inside a given interval, we calculate prob(x) for the mid-point of the interval, multiply by the width of the interval and then multiply this total probability by the number of entries in the data. */ /* limit on exponentiation computation. exp(x) where x < -700 can crash: ./genhis: error in exponentiation (Result too large) (error #700 at 1b6bf) */ double sd; /* standard deviation */ double d1; /* first denominator, = 2*sd**2 */ double d2; /* second denominator, = sd*sqrt(2*pi) */ double x; /* the position for which the expectation is calculated */ double ex; /* probability of getting a value in the interval */ double exinner; /* ex := exp(exinner) for precomputing exp */ long i; /* index */ long FORLIM; if (vari == 0.0) { printf("WARNING: There is no variance to the data\n"); printf("so the Gaussian distribution cannot be plotted.\n"); printf("No standard plot will be made.\n"); fprintf(histog.f, "* no standard plot\n"); splot = false; return; } sd = sqrt(vari); d1 = 2 * vari; d2 = sd * sqrt(2 * pi_); FORLIM = stanarray->slots; /* write(output,', sd = ', sd:6:2); write(output,', d1 = ', d1:6:2); write(output,', d2 = ', d2:6:2); write(output,', interval = ', stanarray.interval:6:2); writeln(output); */ for (i = 0; i < FORLIM; i++) { x = stanarray->range[(long)start] + (i + 0.5) * stanarray->interval; /* write(output,'bubba 1'); write(output,', i = ', i:1); write(output,', x = ', x:6:2); write(output,', ave = ', ave:6:2); writeln(output); write(output, '(x - ave)/d1 = ', (x - ave)/d1 : 6:2); writeln(output); write(output, '(x - ave)/sd = ', (x - ave)/sd : 6:2); writeln(output); write(output, '((-(x - ave) * (x - ave)/d1) / d2 * interval) = ', ((-(x - ave) * (x - ave)/d1) / d2 * interval): 6:2); writeln(output); */ exinner = (ave - x) * (x - ave) / d1; if (exinner < tolerance) ex = 0.0; else ex = exp(exinner) / d2 * stanarray->interval; /* original before 2007 jun 14: ex := exp(-(x - ave) * (x - ave)/d1) / d2 * interval; writeln(output,'bubba 2'); */ stanarray->rnumbers[i] = ex * entries; stanarray->numbers[i] = (long)floor(stanarray->rnumbers[i] + 0.5); /* original: numbers[i+1] := round(ex * entries); */ } } #undef pi_ #undef tolerance /* end module histogram.gaushist */ /* begin module histogram.stanplot */ Static Void stanplot(fout, stanarray) _TEXT *fout; histarray stanarray; { /* print the stanarray distribution to file f */ double x; /* the position for which the expectation is calculated */ long i; /* index */ if (*fout->name != '\0') { if (fout->f != NULL) fout->f = freopen(fout->name, "w", fout->f); else fout->f = fopen(fout->name, "w"); } else { if (fout->f != NULL) rewind(fout->f); else fout->f = tmpfile(); } if (fout->f == NULL) _EscIO2(FileNotFound, fout->name); SETUPBUF(fout->f, Char); fprintf(fout->f, "* position value\n"); for (i = 0; i < stanarray.slots; i++) { x = stanarray.range[(long)start] + (i + 0.5) * stanarray.interval; /* writeln(fout, x:wid:dec, ' ', numbers[i+1]:wid:dec) gpc says: warning: second field width allowed only when writing `real' type */ fprintf(fout->f, "% .*E %*.*f\n", P_max(wid - 7, 1), x, wid, dec, stanarray.rnumbers[i]); /* writeln(fout, x:wid, ' ', numbers[i+1]:wid); write(fout, x:wid, ' ', numbers[i+1]:wid); if curvedata then write(fout, zzz writeln(fout); */ } } #define pi_ 3.14159265358974 /* used in calculating the curve */ /* end module histogram.stanplot */ /* begin module histogram.poishist */ Static Void poishist(entries, ave, stanarray) long entries; double ave; histarray *stanarray; { /* fill the stanarray with values assuming the data fits a poisson distribution. the probability distribution is defined as: prob(x) = ave**x * exp(-ave) / 'x-factorial' . note several things: the variance of a poisson distribution is the same as the average; the poisson distribution is only defined for ave > 0 and for x >= 0; the distribution is only really defined for integer x, but we can also calculate it for reals. since for large values of x 'x-factorial' is hard to calculate we use the following approximation based on sterling's approximation for 'x-factorial': ln(prob(x)) = x - ave + x * ln(ave/x) - ln(2*pi*x)/2 . for x = 1 this approximation is only off by the factor 1.08, and allows us to calculate the prob(x) for all real x > 1. for x between 0 and 1 we use the approximation prob(x) = exp(-ave). if ave <= 0 we skip out of the procedure. */ double x; /* the position for which the expectation is calculated */ double lnex; /* the natural log of ex */ double ex; /* probability of getting a value in the interval around x */ long i; /* index */ long FORLIM; if (ave <= 0) { printf( " warning: poisson not defined for ave <= 0, procedure poishist called but not used\n"); return; } FORLIM = stanarray->slots; for (i = 0; i < FORLIM; i++) { /* get the midpoint of the interval */ x = stanarray->range[(long)start] + (i + 0.5) * stanarray->interval; /* the poisson is not defined for x < 0, so we set the array to 0 */ if (x < 0) { ex = 0.0; /* for x between 0 and 1 use ex = exp(-ave) */ } else if (x <= 1) ex = exp(-ave); else { lnex = x - ave + x * log(ave / x) - log(2 * pi_ * x) / 2; ex = exp(lnex) * stanarray->interval; } /* original: numbers[i+1] := round(ex * entries) */ stanarray->rnumbers[i] = ex * entries; stanarray->numbers[i] = (long)floor(stanarray->rnumbers[i] + 0.5); } } #undef pi_ /* end module histogram.poishist */ /* begin module histogram.hplot */ Static Void hplot(thefile, scaling, splot, dataarray, stanarray, dch_, sch_, bch_) _TEXT *thefile; double scaling; boolean splot; histarray dataarray, stanarray; Char dch_, sch_, bch_; { /* plot the data (in dataarray) to 'thefile'. if splot is true, also plot the data in stanarray. the data is plotted using the character 'dch', the standard using 'sch' and 'bch' is used for the coincidence of the two. */ long dval; /* the data value */ long sval; /* the standard value */ long i, j; /* indicies */ fprintf(thefile->f, "* interval histogram"); if (curvedata) fprintf(thefile->f, " curve"); fprintf(thefile->f, "\n* beginning value "); if (curvedata) fprintf(thefile->f, " data"); fprintf(thefile->f, "\n*\n"); for (i = 0; i < dataarray.slots; i++) { dval = (long)floor(dataarray.numbers[i] * scaling + 0.5); fprintf(thefile->f, " %*.*f %*ld ", wid, dec, dataarray.range[(long)start] + i * dataarray.interval, wid, dataarray.numbers[i]); /* zzz */ /* write(thefile, ' bubba '); */ if (curvedata) fprintf(thefile->f, " %*.*f ", wid, dec, stanarray.rnumbers[i]); /*qqq ' ',numbers[i]:wid,' '); */ if (splot) { sval = (long)floor(stanarray.numbers[i] * scaling + 0.5); if (dval < sval) { for (j = 1; j <= dval; j++) putc(dch_, thefile->f); for (j = dval + 1; j < sval; j++) putc(' ', thefile->f); putc(sch_, thefile->f); } else if (dval == sval && dval > 0) { for (j = 1; j < dval; j++) putc(dch_, thefile->f); putc(bch_, thefile->f); } else { for (j = 1; j < sval; j++) putc(dch_, thefile->f); if (sval > 0) putc(bch_, thefile->f); for (j = sval + 1; j <= dval; j++) putc(dch_, thefile->f); } } else { for (j = 1; j <= dval; j++) putc(dch_, thefile->f); } putc('\n', thefile->f); } } /* end module histogram.hplot */ /* begin module genhis.writeit */ Static Void writeit() { /* write the information to thefile, and plot the results */ long maxval = 0; /* maximum value in the dataarray */ long i; /* an index */ double sd; /* standard deviation */ long FORLIM; fprintf(histog.f, "* parameters:\n"); fprintf(histog.f, "* %*ld is the data column used\n", wid, column); fprintf(histog.f, "* %*ld numbers are in the file\n", wid, entries); fprintf(histog.f, "* %*.*f is the minimum number\n", wid, dec, min); fprintf(histog.f, "* %*.*f is the maximum number\n", wid, dec, max); fprintf(histog.f, "* %*.*f is the MEAN\n", wid, dec, ave); sd = sqrt(vari); fprintf(histog.f, "* %*.*f is the STANDARD DEVIATION\n", wid, dec, sd); fprintf(histog.f, "* %*.*f is the STANDARD ERROR OF THE MEAN (SEM)\n", wid, dec, sd / sqrt(entries - 1.0)); fprintf(histog.f, "* %*.*f is the variance\n", wid, dec, vari); fprintf(histog.f, "* %*.*f is the uncertainty in bits\n", wid, dec, uncertainty); if (sd > 0) fprintf(histog.f, "* %*.*f is the computed uncertainty in bits (Shannon p.57)\n", wid, dec, log(sqrt(2 * pi * e) * sd) / log(2.0)); else fprintf(histog.f, "* 0 [sd <= 0, can't compute uncertainty from sd]\n"); fprintf(histog.f, "*\n"); fprintf(histog.f, "* %*.*f to %*.*f is the range of data plotted\n", wid, dec, dataarray.range[(long)start], wid, dec, dataarray.range[(long)stop]); fprintf(histog.f, "* %*.*f is the x-axis interval\n", wid, dec, dataarray.interval); fprintf(histog.f, "* %6ld is the number of intervals\n", dataarray.slots); FORLIM = dataarray.slots; /* find the maximum histogram value and set the scaling */ for (i = 0; i < FORLIM; i++) { if (maxval < dataarray.numbers[i]) maxval = dataarray.numbers[i]; } /* prevent a bomb if no values in the interval */ if (maxval == 0) maxval = pageheight; scaling = (double)pageheight / maxval; if (scaling > 1) scaling = 1.0; fprintf(histog.f, "* %*ld is the maximum y-axis value\n", wid, maxval); fprintf(histog.f, "* %*.*f is the y-axis scale\n", wid, dec, scaling); /* if a standard plot is wanted, generate it now */ fprintf(histog.f, "*\n"); if (standplot == none) { fprintf(histog.f, "* no standard plot\n"); splot = false; } else { splot = true; /* set the array parameters up the same */ stanarray.range[(long)start] = dataarray.range[(long)start]; stanarray.range[(long)stop] = dataarray.range[(long)stop]; stanarray.slots = dataarray.slots; stanarray.interval = dataarray.interval; if (standplot == gaussian) { gaushist(entries, ave, vari, &stanarray); stanplot(&curve, stanarray); if (splot == true) fprintf(histog.f, "* gaussian standard plotted\n"); } else if (standplot == poisson) { /* if ave <= 0 we cannot use the poisson */ if (ave > 0) { fprintf(histog.f, "* poisson standard plotted. "); if (stanarray.range[(long)start] < 0) { fprintf(histog.f, "* warning: poisson not defined for x < 0"); printf(" warning: poisson not defined for x < 0\n"); } fprintf(histog.f, "*\n"); poishist(entries, ave, &stanarray); } else { printf( "warning: poisson not defined for ave <= 0, no standard will be plotted\n"); splot = false; } } } fprintf(histog.f, "*\n"); if (entries > 0) hplot(&histog, scaling, splot, dataarray, stanarray, dch, sch, bch); else fprintf(histog.f, "* (no numbers are in the file, so no histogram is made)\n"); } /* end module genhis.writeit */ main(argc, argv) int argc; Char *argv[]; { /* begin module genhis.body */ PASCAL_MAIN(argc, argv); if (setjmp(_JL1)) goto _L1; curve.f = NULL; strcpy(curve.name, "curve"); histog.f = NULL; strcpy(histog.name, "histog"); data.f = NULL; strcpy(data.name, "data"); genhisp.f = NULL; strcpy(genhisp.name, "genhisp"); printf(" genhis %4.2f\n", version); setparam(); datahist(&data, hlines, column, &entries, &min, &max, &ave, &vari, &dataarray); writeit(); /* end module genhis.body */ _L1: if (genhisp.f != NULL) fclose(genhisp.f); if (data.f != NULL) fclose(data.f); if (histog.f != NULL) fclose(histog.f); if (curve.f != NULL) fclose(curve.f); exit(EXIT_SUCCESS); } /* End. */