/* Output from p2c 1.21alpha-07.Dec.93, the Pascal-to-C translator */ /* From input file "diana.p" */ #include /* diana: dinucleotide analysis of an aligned book R. Michael Stephens modified by: Dr. Thomas D. Schneider National Institutes of Health National Cancer Institute Center for Cancer Research Nanobiology Program Molecular Information Theory Group Frederick, Maryland 21702-1201 schneidt@mail.nih.gov permanent email: toms@alum.mit.edu http://www.ccrnp.ncifcrf.gov/~toms/ */ /* end of program */ /* begin module version */ #define version 2.13 /* of diana.p 2009 Apr 09 2009 Apr 09, 2.13: Rcorrelation/4 is Rnormalized, remove 17th column. 2009 Apr 09, 2.12: extend backspace 2009 Apr 09, 2.11: continued raw Rcorrelation (no error correction) 2009 Apr 09, 2.10: Give raw Rcorrelation (no error correction) 2009 Apr 09, 2.09: ignore inst file if reading book alignment!! 2009 Apr 09, 2.08: increase trimax from 20000 to 25000 2009 Apr 09, 2.07: bug: reading protseq when book exists! 2008 Jul 04, 2.06: compute with small sample correction for Rcorrelationsum 2008 Jul 03, 2.05: parse '-'; book or protseq; read fasta format 2.04, 2003 Jan 17: find and solve NaN error: addpair never adds the last row! Bug not located but division by zero protections installed. 2.03, 2003 Jan 15: find and solve NaN error 2.02, 2003 Jan 15: report 1-Rc instead of 1-Rnormalized in column 13 to properly account for small sample error!! in writeprism 2.01, 2002 Sep 5: finish upgrade 2.00, 2002 Aug 21: upgrade delmod modules: alignment 1.98, 2000 Jul 16: upgrade documentation 1.97, 1996 Oct 4: previous changes origin 1990 February 26 */ #define updateversion 2.01 /* defines lowest acceptable current parameter file */ #define previousupdateversion 1.88 /* previous updateversion */ #define versionupperbound 10.00 /* highest version that will be accepted */ /* end module version */ /* begin module describe.diana */ /* name diana: dinucleotide analysis of an aligned book synopsis diana(book: in, inst: in, protseq: in, dianap: in, da: out, output: out); files book: the standard delila book that is to be analyzed inst: the instruction set that was used to make the book protseq: aligned sequences in the protseq format (see alpro.p). The sequences can only contain bases a, c, g, t. This file may be empty, in which case the book and inst are used. Likewise, if the book is empty, the protseq will be used. One of the two must be empty. dianap: Parameters to control the program, one per line: 0. The version of diana that this parameter file is designed for. If the program finds an old version, it will *upgrade* the dianap file. 1. fromparam and toparam (two integers), determine the from-to range over which to do the analysis. If dianap is empty, the range from book and inst is used. 2. fromprotseq and toprotseq (two integers) define the range of the protseq. They are only used if the protseq file is used. 3. dicontrol: The first character of the third line controls printing to da: 'd': just sets of 16 correlation data are printed (ie, only d in column 1 of the da file). 'i': just information about the correlation is printed (ie, only i in column 1 of the da file). 'b': both data and information printed 4. colorslope, colorconstant: two real numbers on the fourth line control the reported value of the normalized information content (column 13 below). This allows one to adjust the range of colors produced in a color plot. For PostScript colors, 0.84 and 0.16 respecitvely gives yellow ranging up to red. Once the Rnormalized has been calculated, it is replaced by the value: (Rnormalized times colorslope) added to colorconstant. 5. alignmenttype: the first character on the first line, defines how to align the pieces. See the alist program for the detailed logic. There are three choices, as in the alist program: 'f' (for 'first') then the sequences are always aligned by their first base. 'i' then the sequences are aligned by the delila instructions. If the inst file is empty, alignment is forced to the 'b' mode. 'b' (for 'internal') then the alignment is on the internal zero of the book's sequence. This option is to be used when "default coordinate zero" is used in the Delila instructions. da: the di-analysis file that contains the output triangular array. column 1: Tells what that row of data is. There are three choices: n : the column is a normal data element in the triangle. d : the column is an element on the diagonal of the triangle, where the two coordinates are equal. i : the column contains the information content correlating the two positions defined by columns 3 and 4. column 2: Tells what the dinucleotide is. There are 16 dinucleotides aa, ac, ag, at, ... , tt as well as `in' or `id', which denote columns that contain information content. `id' means that the information is for the diagonal, while `in' are off-diagonal. Combining "in" with "id" gives the entire information triangle. column 3: The position on the sequence that corresponds to the x coordinate on a Cartesian graph. column 4: The position on the sequence that corresponds to the y coordinate on a Cartesian graph. column 5: A column of constant 1's usable by xyplo (usually xscolumn). column 6: A column of constant 1's usable by xyplo (usually xscolumn). column 7: The number of data points at a position column 8: The frequency of the dinucleotide in column 2 at position (column 3, column 4). If the column is an information column, then this is the information at that position. When the position is off diagonal, this is the correlation information Rcorrelation with small sample correction (ssc16). When the position is on diagonal it is Rsequence. (2008 Jul 4: "without small sample correction" was here, but it appears to be in the code as ssc4.) column 9: One minus the frequency (or 1 - column 8). If this is an information column, then this is the chi-square value. column 10: A column of constants usable by xyplo. If this is in an information row, then this column is the number of degrees of freedom at that position column 11: A column of constant 1's usable by xyplo (usually hue). column 12: A column of constant 0's usable by xyplo (usually saturation). column 13: Information from column 8 normalized by dividing by the maximum possible information (4 for non-diagonal, 2 for diagonal). This gives a number between 0 and 1. This number is then multiplied by the parameter colorslope and then added to colorconstant. column 14: 1 minus column 13 column 15: correlation coefficient for x to y on information (in or id) rows column 16: A column of constant 1's usable by xyplo (saturation alternative). output: error messages from the program description Diana goes through a book and looks at relationships of dinucleotide pairs within the sequences. The output of the program is in the form of a triangular array rather than the entire square of x to y relationships. This saves half of the calculations. For every pair of coordinates, the frequency of the dinucleotide pair is tabulated. The program also calculates the chi-square for the dinucleotide given the expected mono-nucleotide frequencies. The correlation information is calculated as the information in the dinucleotide frequencies less the information in each of the two mononucleotides. The output of the program may be sent into xyplo for graphics. Note the distinction between the `in' and `id' information columns. Information in a binding site is usually going to appear on the diagonal, so by making this distinction, one can eliminate the diagonal information peaks for statstical analysis with genhis. The program now has an error correction for small sample size. See Schneider1986 p. 427, eqn A6. s = 4 or s = 16. examples See xyplop.diana and xyplop.diana.color for example xyplop files. To create a xyin file when the di control is 'b', extract all lines from the da file that contain ' in '. To do this in Unix use: grep ' in ' da > xyin That will not include the diagonal, which contains Rsequence and so normally should not be compared to the correlations. To include the diagonal use: grep i da > xyin To sort the output files to find the most significant correlations, one can use the Unix commands: grep in da | sort +8 -9 -n -r | cat > da.sorted The -r puts the most significant ones first. documentation @article{Stephens.Schneider.Splice, author = "R. M. Stephens and T. D. Schneider", title = "Features of spliceosome evolution and function inferred from an analysis of the information at human splice sites", journal = "J. Mol. Biol.", volume = "228", pages = "1124-1136", year = "1992"} see also {Example dianap file:} dianap {Example xyplop files used in conjunction with this program:} xyplop.diana, xyplop.diana.color {The program to plot the output:} xyplo.p {Program to list aligned sequences:} alist.p {Program to graph histograms:} genhis.p {The alpro program defines the protseq format:} alpro.p {Related programs for plotting the data in three dimensions:} da3d.p, da3drotate.p author R. Michael Stephens Modified by Tom Schneider to have user requested range and: faster calculation in main loop [1995 Jan 9], new input routine that reads protseq format [1995 Jan 9]. bugs It would be nice to allow a restricted list of pairs to be done. protseq has no definition of coordinates, and this forces the use of the fromprotseq and toprotseq. A definition for multiply aligned sequences with coordinates should be created. technical notes */ /* end module describe.diana */ /* ************************************************************************ */ /* ************************************************************************ */ /* ************************************************************************ */ /* begin module diana.const */ #define trimax 25000 /* the maximum number of data points */ #define infofield 6 /* real number field width */ #define infodecim 5 /* number of decimal points */ /* end module diana.const */ /* begin module book.const */ /* constants needed for book manipulations */ #define dnamax 1024 /* length of dna arrays */ #define namelength 100 /* maximum key name length */ #define linelength 80 /* maximum line readable in book */ /* end module book.const version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module string.const */ #define maxstring 2000 /* the maximum string */ /* end module string.const version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module filler.const */ #define fillermax 50 /* the size of the filler array for a string */ /* end module filler.const version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module string.type */ /* pointer to a string */ typedef struct string { /* a string of characters */ Char letters[maxstring]; /* the letters in the string */ long length; /* the number of characters in the string */ long current; /* the letter we are working on */ Char *next; /* the next string in a series */ } string; /* end module string.type version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module filler.type */ /* the following is an array used to fill a string. it is convenient to have it much shorter than the maxstring, so that it is easy to fill the string using procedure fillstring. the user must declare the value of constant fillermax. */ typedef Char filler[fillermax]; /* end module filler.type version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module trigger.type */ typedef struct trigger { /* an object to be searched for */ string seek; /* the characters looked for */ long state; /* how close to triggering we are */ boolean skip; /* trigger not found- skip the line */ /* the trigger was found */ boolean found; } trigger; /* end module trigger.type version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module book.type */ /* types needed for book manipulations */ typedef long chset[5]; /* types defined in book definition */ typedef Char alpha[namelength]; /* this is not alfa */ /* name is a left justified string with blanks following the characters */ typedef struct name { alpha letters; /* zero means an unspecified structure */ char length; } name; typedef struct line { /* a line of characters */ Char letters[linelength]; char length; struct line *next; } line; typedef enum { plus, minus, dircomplement, dirhomologous } direction; typedef enum { linear, circular } configuration; typedef enum { on, off } state; typedef struct header { /* header of key */ name keynam; /* key name of structure */ line *fulnam; /* full name of structure */ /* note key */ line *note; } header; /* begin module base.type */ /* define the four nucleotide bases */ typedef enum { a, c, g, t } base; /* end module base.type version = 7.59; {of delmod.p 2002 Sep 5} */ /* sequence types */ typedef short dnarange; /* p2c: diana.p, line 345: * Note: Field width for seq assumes enum base has 4 elements [105] */ typedef uchar seq[(dnamax + 3) / 4]; typedef struct dnastring { seq part; dnarange length; struct dnastring *next; } dnastring; typedef struct orgkey { /* organism key */ header hea; /* genetic map units */ line *mapunit; } orgkey; typedef struct chrkey { /* chromosome key */ header hea; double mapbeg; /* number of genetic map beginning */ /* number of genetic map ending */ double mapend; } chrkey; typedef struct piekey { /* piece key */ header hea; double mapbeg; /* genetic map beginning */ configuration coocon; /* configruation (circular/linear) */ direction coodir; /* direction (+/-) relative to genetic map */ long coobeg; /* beginning nucleotide */ long cooend; /* ending nucleotide */ configuration piecon; /* configruation (circular/linear) */ direction piedir; /* direction (+/-) relative to coordinates */ long piebeg; /* beginning nucleotide */ long pieend; /* ending nucleotide */ } piekey; typedef struct piece { piekey key; dnastring *dna; } piece; typedef struct reference { name pienam; /* name of piece referred to */ double mapbeg; /* genetic map beginning */ direction refdir; /* direction relative to coordinates */ long refbeg; /* beginning nucleotide */ long refend; /* ending nucleotide */ } reference; typedef struct genkey { /* gene key */ header hea; reference ref; } genkey; typedef struct trakey { /* transcript key */ header hea; reference ref; } trakey; typedef struct markey { /* marker key */ header hea; reference ref; state sta; line *phenotype; struct marker *next; } markey; typedef struct marker { markey key; dnastring *dna; } marker; /* end module book.type version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module triangle.type */ /* define a triangular array as a linear one */ typedef struct trianglearray { long data[trimax + 1]; long lower; /* lower bound on the triangle */ long upper; /* upper bound on the triangle */ long side; /* length of side of the triangle */ long area; /* area of the triangle */ } trianglearray; /* end module triangle.type */ /* begin module diana.type */ /* stores the number of dinucleotides appearing at positions on a sequence */ typedef trianglearray trisquare[(long)t - (long)a + 1][(long)t - (long)a + 1]; /* define a data cube to store frequencies of a trisquare in */ typedef double realprism[(long)t - (long)a + 1][(long)t - (long)a + 1]; /* an array to store data for computing single base information values */ typedef long sumbase[(long)t - (long)a + 1]; /* an array to store the frequencies of the sumbase values */ typedef double basefreq[(long)t - (long)a + 1]; /* end module diana.type */ /* begin module book.var */ /* ************************************************************************ */ /* global variables needed for book manipulations */ /* free storage: */ Static line *freeline; /* unused lines */ Static dnastring *freedna; /* unused dnas */ Static boolean readnumber; /* whether to read a number from the notes, or to read in the notes */ Static long number; /* the number of the item just read */ Static boolean numbered; /* true when the item just read is numbered */ Static boolean skipunnum; /* a control variable to allow skipping of un-numbered items in the book */ /* ************************************************************************ */ /* end module book.var version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module diana.var */ Static _TEXT book; /* the delila book being analyzed */ Static _TEXT inst; /* the instructions used to create the delila book */ Static _TEXT protseq; /* sequences in protseq format */ Static _TEXT dianap; /* parameters to control the program */ Static _TEXT da; /* the output triangular analysis */ Static jmp_buf _JL1; /* end module diana.var */ /* ************************************************************************ */ /* begin module halt */ Static Void halt() { /* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. */ printf(" program halt.\n"); longjmp(_JL1, 1); } /* end module halt version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module copyaline */ Static Void copyaline(fin, fout) _TEXT *fin, *fout; { /* copy a line from file fin to file fout */ while (!P_eoln(fin->f)) { putc(P_peek(fin->f), fout->f); getc(fin->f); } fscanf(fin->f, "%*[^\n]"); getc(fin->f); putc('\n', fout->f); } /* copyaline */ /* end module copyaline version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module copylines */ Static long copylines(fin, fout, n) _TEXT *fin, *fout; long n; { /* copy n lines of file fin to file fout. the actual number of lines copied is returned. */ long index = 0; /* the current line number */ while (!BUFEOF(fin->f) && index < n) { copyaline(fin, fout); index++; } return index; } /* copylines */ /* end module copylines version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module package.trigger */ /* ************************************************************************ */ /* begin module clearstring */ Static Void clearstring(ribbon) string *ribbon; { /* empty the string */ long index; /* to the ribbon */ for (index = 0; index < maxstring; index++) ribbon->letters[index] = ' '; ribbon->length = 0; ribbon->current = 0; } /* clearstring */ Static Void initializestring(ribbon) string *ribbon; { /* start the string with a nil pointer. This routine should be called before doing linked list work. This allows the standard string routines to clear the string without killing the pointer. */ clearstring(ribbon); ribbon->next = NULL; } /* initializestring */ /* end module clearstring version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module writestring */ Static Void writestring(tofile, s) _TEXT *tofile; string *s; { /* write the string s to file tofile, no writeln */ long i; /* index to s */ long FORLIM; FORLIM = s->length; for (i = 0; i < FORLIM; i++) putc(s->letters[i], tofile->f); } /* writestring */ /* end module writestring version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module filler.fillstring */ Static Void fillstring(s, a_) string *s; Char *a_; { /* this procedure makes it reasonably easy to fill the string s with characters. one calls the procedure as: */ /* 1 2 3 4 5 */ /* 12345678901234567890123456789012345678901234567890 */ /* fillstring(s, 'this-is-the-string '); the two comments make it easy to line the characters up. also, for this example, it was assumed that the length of filler as defined by the constant fillermax was 50. */ long length = fillermax; /* of the string without trailing blanks */ long index; /* of s */ clearstring(s); while (length > 1 && a_[length-1] == ' ') length--; if (length == 1 && a_[length-1] == ' ') { printf("fillstring: the string is empty\n"); halt(); } for (index = 0; index < length; index++) s->letters[index] = a_[index]; s->length = length; s->current = 1; } /* fillstring */ /* end module filler.fillstring version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module filler.filltrigger */ Static Void filltrigger(t_, a_) trigger *t_; Char *a_; { /* fill the trigger t */ fillstring(&t_->seek, a_); } /* fillstring */ /* end module filler.filltrigger version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module trigger.proc */ /* this module allows one to scan a series of characters, as from an array or a file, and to "trigger" or detect a simple string in the series. the advantage of the trigger is that several triggers can "observe" a stream of characters at once, each looking for a different thing. some other modules required: interact.const, interact.type */ Static Void resettrigger(t_) trigger *t_; { /* reset the trigger to ground state */ t_->state = 0; t_->skip = false; t_->found = false; } /* resettrigger */ Static Void testfortrigger(ch, t_) Char ch; trigger *t_; { /* look at the character ch. if it is part of the trigger (at the current trigger state), then the trigger state goes higher. if it is not part of the trigger then the trigger state is reset, skip is true and one should skip onward to find the trigger. if the trigger is found, found is true. 1996 Sep 12: Bug found! In the case of a trigger "ab", the program used to miss it for situations like "aab". This was because at the first a it would step up. Then it would see the second a and recognize that was not part of ab. It would fail to realize that it could be the start of a new one. The code now accounts for that possibility. */ t_->state++; /* writestring(list,seek); writeln(list,'testfortrigger seek.letters[',state:1,']:', seek.letters[state],' ch:',ch); */ if (t_->seek.letters[t_->state - 1] == ch) { t_->skip = false; if (t_->state == t_->seek.length) t_->found = true; else t_->found = false; return; } /* it failed. But wait! It could be the beginning of a NEW trigger string! */ if (t_->seek.letters[0] == ch) { t_->state = 1; t_->skip = false; t_->found = false; return; } t_->state = 0; t_->skip = true; t_->found = false; /* reset trigger */ } /* testfortrigger */ /* end module trigger.proc version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module skipblanks */ Static Void skipblanks(thefile) _TEXT *thefile; { /* skip over blanks until a non-blank, or end of line, is found */ while ((P_peek(thefile->f) == ' ') & (!P_eoln(thefile->f))) getc(thefile->f); } Static Void skipnonblanks(thefile) _TEXT *thefile; { /* skip over nonblanks until a blank, or end of line, is found */ while ((P_peek(thefile->f) != ' ') & (!P_eoln(thefile->f))) getc(thefile->f); } Static Void skipcolumn(thefile) _TEXT *thefile; { /* skip over a data column */ skipblanks(thefile); skipnonblanks(thefile); } /* end module skipblanks version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* ************************************************************************ */ /* end module package.trigger version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module package.align */ /* ************************************************************************ */ /* begin module package.getpiece */ /* ************************************************************************ */ /* begin module package.brpiece */ /* ************************************************************************ */ /* begin module book.basis */ /* procedures needed for book manipulations */ /* get procedures should be used for all linked lists of records */ Static Void getline(l) line **l; { /* obtain a line from the free line list or by making a new one */ if (freeline != NULL) { *l = freeline; freeline = freeline->next; } else *l = (line *)Malloc(sizeof(line)); (*l)->length = 0; (*l)->next = NULL; } Static Void getdna(l) dnastring **l; { if (freedna != NULL) { *l = freedna; freedna = freedna->next; } else *l = (dnastring *)Malloc(sizeof(dnastring)); (*l)->length = 0; (*l)->next = NULL; } /* clear procedures should be called each time the records are no longer needed failure to do this may result in a stack overflow. */ Static Void clearline(l) line **l; { /* return a line to the free line list */ line *lptr; if (*l == NULL) return; lptr = *l; *l = (*l)->next; lptr->next = freeline; freeline = lptr; } Static Void writeline(afile, l, carriagereturn) _TEXT *afile; line *l; boolean carriagereturn; { /* write a line to a file, with carriage return if carriagereturn is true. */ long index; /* index to characters in l */ long FORLIM; FORLIM = l->length; for (index = 0; index < FORLIM; index++) putc(l->letters[index], afile->f); if (carriagereturn) putc('\n', afile->f); } Static Void showfreedna() { /* show the freedna list */ long counter = 0; /* count of freedna list */ dnastring *l; /* pointer into freedna list */ l = freedna; while (l != NULL) { counter++; printf("%ld", counter); printf(", length = %d\n", l->length); /* This is illegal according to gpc because one cannot write a pointer to a text file. It can be unearthed for debugging. write(output, ', pointer id: ',l:1); */ l = l->next; } } Static Void cleardna(l) dnastring **l; { dnastring *lptr; if (*l == NULL) return; lptr = *l; *l = (*l)->next; lptr->next = freedna; freedna = lptr; } Static Void clearheader(h) header *h; { /* clear the header h (remove lines to free storage) */ clearline(&h->fulnam); while (h->note != NULL) clearline(&h->note); } Static Void clearpiece(p) piece **p; { /* clear the dna of the piece */ while ((*p)->dna != NULL) cleardna(&(*p)->dna); clearheader(&(*p)->key.hea); } Static base chartobase(ch) Char ch; { /* convert a character into a base */ base Result; switch (ch) { case 'a': Result = a; break; case 'c': Result = c; break; case 'g': Result = g; break; case 't': Result = t; break; } return Result; } Static Char basetochar(ba) base ba; { /* convert a base into a character */ Char Result; switch (ba) { case a: Result = 'a'; break; case c: Result = 'c'; break; case g: Result = 'g'; break; case t: Result = 't'; break; } return Result; } Static base complement(ba) base ba; { /* take the complement of ba */ base Result; switch (ba) { case a: Result = t; break; case c: Result = g; break; case g: Result = c; break; case t: Result = a; break; } return Result; } Static Char chomplement(b) Char b; { /* create the character complement of base b. I must be getting hungry! */ return (basetochar(complement(chartobase(b)))); } Static long pietoint(p, pie) long p; piece *pie; { /* p is a coordinate on the piece. we want to transform p into a number from 1 to n: an internal coordinate system for easy manipulation of piece coordinates */ /* Note: the dirhomologous and dircomplement are treated as plus and minus directions, which MIGHT NOT BE RIGHT! */ long i; /* an intermediate value */ piekey *WITH; WITH = &pie->key; switch (WITH->piedir) { case dirhomologous: case plus: if (p >= WITH->piebeg) i = p - WITH->piebeg + 1; else i = p - WITH->coobeg + WITH->cooend - WITH->piebeg + 2; break; case dircomplement: case minus: if (p <= WITH->piebeg) i = WITH->piebeg - p + 1; else i = WITH->cooend - p + WITH->piebeg - WITH->coobeg + 2; break; } return i; } Static long inttopie(i, pie) long i; piece *pie; { /* i is in the range 1 to some maximum. it is an internal coordinate system for the program. we want to do a coordinate transformation to obtain a value in the range of the piece called pie: i=1 corresponds to piebeg and i=its maximum corresponds to pieend */ /* Note: the dirhomologous and dircomplement are treated as plus and minus directions, which MIGHT NOT BE RIGHT! */ long p; /* an intermediate value */ piekey *WITH; WITH = &pie->key; switch (WITH->piedir) { case dirhomologous: case plus: p = WITH->piebeg + i - 1; if (p > WITH->cooend) { if (WITH->coocon == circular) p += WITH->coobeg - WITH->cooend - 1; } break; case dircomplement: case minus: p = WITH->piebeg - i + 1; if (p < WITH->coobeg) { if (WITH->coocon == circular) p += WITH->cooend - WITH->coobeg + 1; } break; } return p; } Static long piecelength(pie) piece *pie; { /* return the length of the dna in pie */ return (pietoint(pie->key.pieend, pie)); } /* end module book.basis version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module book.getto */ Static Char getto(thefile, theline, ch) _TEXT *thefile; long *theline; long *ch; { /* search the file for a character in the first line which is a member of the set ch. Note: on 1999 March 10 the definition of this function was cleaned up. Instead of putting thefile on the line AFTER the charcter ch has been found, it puts thefile ON the line. Other routines like brdna and brpiece have to move to the next line themselves. This makes getto give the OBJECT. */ Char achar = ' '; /* a character in thefile */ boolean done = false; /* done finding achar */ while (!done) { if (BUFEOF(thefile->f)) { done = true; break; } achar = P_peek(thefile->f); if (P_inset(achar, ch)) { done = true; break; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; } if (P_inset(achar, ch)) return achar; else { return ' '; /* The old method - while (not(achar in ch)) and (not eof(thefile)) do begin readln(thefile,achar); theline := succ(theline) end; if (achar in ch) then getto:=achar else getto:=' ' */ } } /* end module book.getto version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module book.skipstar */ Static Void skipstar(thefile) _TEXT *thefile; { /* skip start of line (or star = '*'). */ if (BUFEOF(thefile->f)) { printf(" procedure skipstar: end of book found\n"); halt(); return; } if (P_peek(thefile->f) != '*') { printf(" procedure skipstar: bad book\n"); printf(" \"*\" expected as first character on the line, but \"%c\" was found\n", P_peek(thefile->f)); halt(); } getc(thefile->f); /* skip the star */ if (P_peek(thefile->f) != ' ') { /* skip the blank */ printf(" procedure skipstar: bad book\n"); printf(" \"* \" expected on a line but \"*%c\" was found\n", P_peek(thefile->f)); halt(); } getc(thefile->f); } /* skipstar */ /* end module book.skipstar version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module book.brreanum */ Static Void brreanum(thefile, theline, reanum) _TEXT *thefile; long *theline; double *reanum; { /* read a real number from the file */ skipstar(thefile); fscanf(thefile->f, "%lg%*[^\n]", reanum); getc(thefile->f); (*theline)++; } /* end module book.brreanum version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module book.brnumber */ Static Void brnumber(thefile, theline, num) _TEXT *thefile; long *theline, *num; { /* read a number from the file */ skipstar(thefile); fscanf(thefile->f, "%ld%*[^\n]", num); getc(thefile->f); (*theline)++; } /* end module book.brnumber version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module book.brname */ Static Void brname(thefile, theline, nam) _TEXT *thefile; long *theline; name *nam; { /* read a name from the file */ long i; /* an index to the name */ Char c_; /* a character read */ skipstar(thefile); nam->length = 0; do { nam->length++; c_ = getc(thefile->f); if (c_ == '\n') c_ = ' '; nam->letters[nam->length - 1] = c_; } while (!(P_eoln(thefile->f) || nam->length >= namelength || nam->letters[nam->length - 1] == ' ')); if (nam->letters[nam->length - 1] == ' ') nam->length--; if (nam->length < namelength) { for (i = nam->length; i < namelength; i++) nam->letters[i] = ' '; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; } /* brname */ /* end module book.brname version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module book.brline */ Static Void brline(thefile, theline, l) _TEXT *thefile; long *theline; line **l; { /* read a line from the file */ long i = 0; Char acharacter; skipstar(thefile); while (!P_eoln(thefile->f)) { i++; acharacter = getc(thefile->f); if (acharacter == '\n') acharacter = ' '; (*l)->letters[i-1] = acharacter; } (*l)->length = i; (*l)->next = NULL; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; } /* end module book.brline version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module book.brdirect */ Static Void brdirect(thefile, theline, direct) _TEXT *thefile; long *theline; direction *direct; { /* read a direction */ Char ch; skipstar(thefile); fscanf(thefile->f, "%c%*[^\n]", &ch); getc(thefile->f); if (ch == '\n') ch = ' '; (*theline)++; if (ch == '+') *direct = plus; else *direct = minus; } /* end module book.brdirect version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module book.brconfig */ Static Void brconfig(thefile, theline, config) _TEXT *thefile; long *theline; configuration *config; { /* read a configuration */ Char ch; skipstar(thefile); fscanf(thefile->f, "%c%*[^\n]", &ch); getc(thefile->f); if (ch == '\n') ch = ' '; (*theline)++; if (ch == 'l') *config = linear; else *config = circular; } /* end module book.brconfig version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module book.brnotenumber */ Static Void brnotenumber(thefile, theline, note) _TEXT *thefile; long *theline; line **note; { /* book note reading to obtain the number of the object. the procedure returns the value of the number as a global. (this is not such a good practice, but we are stuck with it for now.) */ *note = NULL; numbered = false; number = 0; /* force number to zero if there is no number at all */ /* the next character is n or * depending on whether there are notes */ if (P_peek(thefile->f) != 'n') return; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; if (P_peek(thefile->f) != 'n') { skipstar(thefile); if (!P_eoln(thefile->f)) { if (P_peek(thefile->f) == '#') { numbered = true; getc(thefile->f); /* move past the number symbol */ fscanf(thefile->f, "%ld", &number); } } do { fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; } while (P_peek(thefile->f) != 'n'); fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; return; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; } /* brnotenumber */ /* end module book.brnotenumber version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module book.brnote */ Static Void brnote(thefile, theline, note) _TEXT *thefile; long *theline; line **note; { /* read note key */ line *newnote; /* the new note */ line *previousnote; /* the last line of the notes */ *note = NULL; if (P_peek(thefile->f) != 'n') /* enter note */ return; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; if (P_peek(thefile->f) != 'n') { /* abort null note (n/n) */ getline(note); newnote = *note; while (P_peek(thefile->f) != 'n') { /* wait until end of note */ brline(thefile, theline, &newnote); previousnote = newnote; /* get next note */ getline(&newnote->next); newnote = newnote->next; } /* last note was not used, so: */ clearline(&newnote); previousnote->next = NULL; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; return; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; } /* brnote */ /* end module book.brnote version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module book.brheader */ Static Void brheader(thefile, theline, hea) _TEXT *thefile; long *theline; header *hea; { /* read the header of a key. */ fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); /* move past the object name - new definition 1999 Mar 13 */ (*theline)++; /*bbb*/ /* read key name */ brname(thefile, theline, &hea->keynam); /* read full name */ getline(&hea->fulnam); brline(thefile, theline, &hea->fulnam); /* read note key */ if (readnumber) brnotenumber(thefile, theline, &hea->note); else brnote(thefile, theline, &hea->note); } /* end module book.brheader version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module book.copyheader */ Static Void copyheader(fromhea, tohea) header fromhea, *tohea; { /* copy the header fromhea into tohea. Note that the linked objects are NOT copied, but merely pointed to. */ memcpy(tohea->keynam.letters, fromhea.keynam.letters, sizeof(alpha)); tohea->keynam.length = fromhea.keynam.length; tohea->note = fromhea.note; tohea->fulnam = fromhea.fulnam; } /* end module book.copyheader version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module book.brpiekey */ Static Void brpiekey(thefile, theline, pie) _TEXT *thefile; long *theline; piekey *pie; { /* read piece key, track the line number */ brheader(thefile, theline, &pie->hea); brreanum(thefile, theline, &pie->mapbeg); brconfig(thefile, theline, &pie->coocon); brdirect(thefile, theline, &pie->coodir); brnumber(thefile, theline, &pie->coobeg); brnumber(thefile, theline, &pie->cooend); brconfig(thefile, theline, &pie->piecon); brdirect(thefile, theline, &pie->piedir); brnumber(thefile, theline, &pie->piebeg); brnumber(thefile, theline, &pie->pieend); } /* end module book.brpiekey version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module book.brdna */ Static Void brdna(thefile, theline, dna) _TEXT *thefile; long *theline; dnastring **dna; { /* read in dna from thefile, track the line */ /* note: if the dna were circularized, by linking the last dnastring to the first, then the cleardna routine could not clear properly, and would loop forever... there is no reason to do that, since a simple mod function will allow one to access the circle. */ Char ch; dnastring *workdna; long SET[5]; long TEMP; getdna(dna); workdna = *dna; ch = getto(thefile, theline, P_addset(P_expset(SET, 0L), 'd')); fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; ch = getc(thefile->f); /* skipstar */ if (ch == '\n') ch = ' '; while (ch == '*') { ch = getc(thefile->f); /* skip blank */ if (ch == '\n') ch = ' '; do { ch = getc(thefile->f); if (ch == '\n') ch = ' '; if (ch == 't' || ch == 'g' || ch == 'c' || ch == 'a') { if (workdna->length == dnamax) { getdna(&workdna->next); workdna = workdna->next; } workdna->length++; TEMP = workdna->length - 1; P_clrbits_B(workdna->part, TEMP, 1, 3); P_putbits_UB(workdna->part, TEMP, (int)chartobase(ch), 1, 3); } } while (!P_eoln(thefile->f)); fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); /* go to next line */ (*theline)++; ch = getc(thefile->f); /* ch is either '*' or 'd' */ if (ch == '\n') ch = ' '; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); /* read past the d */ (*theline)++; } /* end module book.brdna version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module book.brpiece */ Static Void brpiece(thefile, theline, pie) _TEXT *thefile; long *theline; piece **pie; { /* read in a piece, change theline to reflect the lines traversed */ /* readln(thefile); (* move past the word 'piece' - new definition 1999 Mar 13 *) theline := succ(theline); (* BUG: was below! *) bbb*/ brpiekey(thefile, theline, &(*pie)->key); if (numbered || !skipunnum) brdna(thefile, theline, &(*pie)->dna); fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); /* move past the word 'piece' - new definition 1999 Mar 13 */ (*theline)++; } /* end module book.brpiece version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module book.brinit */ Static Void brinit(book, theline) _TEXT *book; long *theline; { /* check that the book is ok to read, and set up the global variables for br routines */ /* halt if the book is bad (first word is 'halt') or the first character is not * */ if (*book->name != '\0') { if (book->f != NULL) book->f = freopen(book->name, "r", book->f); else book->f = fopen(book->name, "r"); } else rewind(book->f); if (book->f == NULL) _EscIO2(FileNotFound, book->name); RESETBUF(book->f, Char); if (!BUFEOF(book->f)) { /* check for the date line */ if (P_peek(book->f) != '*') { if (P_peek(book->f) != 'h') printf(" this is not the first line of a book:\n"); else printf(" bad book:\n"); putchar(' '); while (!(P_eoln(book->f) | BUFEOF(book->f))) { putchar(P_peek(book->f)); getc(book->f); } putchar('\n'); halt(); } } else { printf(" book is empty\n"); halt(); } /* initialize free storage */ freeline = NULL; freedna = NULL; readnumber = true; /* usually we read in numbers for items */ number = 0; /* arbitrary value */ numbered = false; /* the piece has no number (none yet read in) */ skipunnum = false; *theline = 1; } /* brinit */ /* end module book.brinit version = 7.59; {of delmod.p 2002 Sep 5} */ /* ************************************************************************ */ /* end module package.brpiece version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module book.getpiece */ Static Void getpiece(thefile, theline, pie) _TEXT *thefile; long *theline; piece **pie; { /* move to and read in the next piece in the book */ Char ch; long SET[5]; ch = getto(thefile, theline, P_addset(P_expset(SET, 0L), 'p')); /* get to the next p(iece) in the book */ if (ch != ' ') { brpiece(thefile, theline, pie); /* 1999 june 2: removed this: ch:=getto(thefile,theline,['p']); (* read to end of p *) */ /* bbb - now done in brpiece readln(thefile); (* read past piece *) theline := succ(theline); */ } else clearpiece(pie); } /* end module book.getpiece version = 7.59; {of delmod.p 2002 Sep 5} */ /* ************************************************************************ */ /* end module package.getpiece version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module findblank */ Static Void findblank(afile) _TEXT *afile; { /* read a file to find the next blank character */ Char ch; do { ch = getc(afile->f); if (ch == '\n') ch = ' '; } while (ch != ' '); } /* end module findblank version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module findnonblank */ Static Void findnonblank(afile, ch) _TEXT *afile; Char *ch; { /* find the next non blank character in a file, return it in ch. */ *ch = ' '; while (!BUFEOF(afile->f) && *ch == ' ') { *ch = getc(afile->f); if (*ch == '\n') *ch = ' '; if (P_eoln(afile->f)) { fscanf(afile->f, "%*[^\n]"); getc(afile->f); } } } #define maximumrange 10000 /* if the alignment point is more than this distance from the piece ends, the program halts in an attempt to catch the alignment bug... 1991 Jan 11 It appears that the rewrite of the code has removed the bug, but the check will be kept. */ #define semicolon ';' /* end of delila instruction */ /* Local variables for align: */ struct LOC_align { _TEXT *inst; Char ch; /* a character in inst */ trigger endcomment; /* trigger to find '*-)' (ignore the dash!) */ trigger endcurly; /* trigger to find comments: '}' */ } ; /* procedure rd(var f: text; var ch: char); (* read ch from f allowing inspection of the result *) begin read(f,ch); write(output,ch); write(list,ch); write(output,'<',ch,'>'); end; procedure rdln(var f: text); (* readln f allowing inspection of the result *) begin readln(f); writeln(output); writeln(list); end; */ Local Void skipcomment(f, LINK) _TEXT *f; struct LOC_align *LINK; { /* skip an entire comment */ boolean comment = true; /* true means we are inside a comment */ /* skip to end of comment */ resettrigger(&LINK->endcomment); while (comment) { if (BUFEOF(f->f)) { printf("A comment does not end!\n"); halt(); } if (P_eoln(f->f)) { fscanf(f->f, "%*[^\n]"); getc(f->f); continue; } /* rdln(f) */ LINK->ch = getc(f->f); if (LINK->ch == '\n') LINK->ch = ' '; testfortrigger(LINK->ch, &LINK->endcomment); if (LINK->endcomment.found) { comment = false; /*write(output,'<'); rd(f,ch); write(output,'>');*/ } } } Local Void skipcurly(f, LINK) _TEXT *f; struct LOC_align *LINK; { /* skip an entire comment made by {}*/ boolean comment = true; /* true means we are inside a comment */ /* skip to end of comment */ resettrigger(&LINK->endcurly); while (comment) { if (BUFEOF(f->f)) { printf("A comment does not end!\n"); halt(); } if (P_eoln(f->f)) { fscanf(f->f, "%*[^\n]"); getc(f->f); continue; } /* rdln(f) */ LINK->ch = getc(f->f); if (LINK->ch == '\n') LINK->ch = ' '; testfortrigger(LINK->ch, &LINK->endcurly); if (LINK->endcurly.found) { comment = false; /*write(output,'<'); rd(f,ch); write(output,'>');*/ } } } Local Void skipquote(quote, LINK) trigger quote; struct LOC_align *LINK; { /* skip an entire quote of either the ' or " persuasion */ Char kind; /* the kind of quote, ' or " */ kind = quote.seek.letters[0]; /*writeln(output,'skipquote ',kind);*/ do { findnonblank(LINK->inst, &LINK->ch); /* get to the quote */ } while (!((LINK->ch == kind) | BUFEOF(LINK->inst->f))); if (LINK->ch != kind) { printf("end of quote starting with %c not found\n", kind); halt(); } } /* end module findnonblank version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module align.align */ Static Void align(inst_, book, theline, pie, length, alignedbase) _TEXT *inst_, *book; long *theline; piece **pie; long *length, *alignedbase; { /* documentation on align is in module info.align and delman.use.aligned.books. 1996 Sep 12: The routine now uses the trigger functions found in prgmod. The bug in the oldalign routine (that it misses the end of comments that end in a series of astrisks) has been fixed. It now checks that the piece corresponds to the book. */ struct LOC_align V; long p; /* index to a piece name */ long p1; /* another index to a piece name */ boolean done = false; /* done finding an aligning get */ long thebase; /* the base read in */ boolean indefault = false; /* true when within a default statement. These can contain the word 'piece', which must be ignored. */ trigger gettrigger; /* trigger to find 'get' */ trigger defaulttrigger; /* trigger to find 'default' */ trigger nametrigger; /* trigger to find 'name' */ trigger piecetrigger; /* trigger to find 'piece' */ trigger settrigger; /* trigger to find 'set' */ trigger begincomment; /* trigger to find '(-*' (ignore the dash!) */ trigger begincurly; /* trigger to find comments: '{' */ trigger quote1trigger; /* trigger to find single quote ' */ trigger quote2trigger; /* trigger to find double quote " */ name *WITH; long FORLIM; V.inst = inst_; filltrigger(&defaulttrigger, "default "); filltrigger(&gettrigger, "get "); filltrigger(&nametrigger, "name "); filltrigger(&piecetrigger, "piece "); filltrigger(&settrigger, "set "); filltrigger(&begincomment, "(* "); filltrigger(&V.endcomment, "*) "); filltrigger(&begincurly, "{ "); filltrigger(&V.endcurly, "} "); filltrigger("e1trigger, "' "); filltrigger("e2trigger, "\" "); resettrigger(&defaulttrigger); resettrigger(&gettrigger); resettrigger(&nametrigger); resettrigger(&piecetrigger); resettrigger(&settrigger); resettrigger(&begincomment); resettrigger(&begincurly); resettrigger("e1trigger); resettrigger("e2trigger); if (BUFEOF(book->f)) /* if there is still more to the book ... */ return; getpiece(book, theline, pie); /* read in the piece */ if (BUFEOF(book->f)) /* if we found a piece ... */ return; *length = pietoint((*pie)->key.pieend, *pie); /* calculate piece length */ /* now find in inst the next occurance of 'get' */ while (!done) { if (BUFEOF(V.inst->f)) { /* no instructions? */ *alignedbase = 1; /* simply align by the first base */ done = true; break; } if (P_eoln(V.inst->f)) { fscanf(V.inst->f, "%*[^\n]"); getc(V.inst->f); continue; } /*then rdln(inst)*/ V.ch = getc(V.inst->f); if (V.ch == '\n') V.ch = ' '; testfortrigger(V.ch, &begincomment); testfortrigger(V.ch, &begincurly); if (begincomment.found || begincurly.found) { if (V.ch == '*') { skipcomment(V.inst, &V); resettrigger(&begincomment); } else { resettrigger(&begincurly); skipcurly(V.inst, &V); } continue; } testfortrigger(V.ch, &gettrigger); if (gettrigger.found) { findnonblank(V.inst, &V.ch); /* get to "from" */ findblank(V.inst); /* get past "from" */ fscanf(V.inst->f, "%ld", &thebase); /* read in the alignedbase */ /*writeln(output);writeln(output,'thebase = ',thebase:1);*/ *alignedbase = pietoint(thebase, *pie); /*writeln(output,'alignedbase=',alignedbase:1);*/ done = true; } testfortrigger(V.ch, "e1trigger); if (quote1trigger.found) skipquote(quote1trigger, &V); testfortrigger(V.ch, "e2trigger); if (quote2trigger.found) skipquote(quote2trigger, &V); testfortrigger(V.ch, &defaulttrigger); if (defaulttrigger.found) { indefault = true; resettrigger(&defaulttrigger); } if (V.ch == semicolon) indefault = false; testfortrigger(V.ch, &settrigger); if (settrigger.found) { indefault = true; resettrigger(&settrigger); } if (V.ch == semicolon) indefault = false; /* check that piece names are correct */ testfortrigger(V.ch, &piecetrigger); if (indefault) continue; if (!piecetrigger.found) continue; skipblanks(V.inst); /* get to name */ WITH = &(*pie)->key.hea.keynam; FORLIM = WITH->length; for (p = 1; p <= FORLIM; p++) { V.ch = getc(V.inst->f); if (V.ch == '\n') V.ch = ' '; if (WITH->letters[p-1] != V.ch) { printf("The piece name in the book: \n"); /* p2c: diana.p, line 1566: Note: * Format for packed-array-of-char will work only if width < length [321] */ printf("%.*s\n", WITH->length, WITH->letters); printf("does not match the inst file name:\n"); /* write the letters that matched: */ for (p1 = 1; p1 < p; p1++) putchar(WITH->letters[p-1]); /* write the offending letter: */ putchar(V.ch); /* get the rest of the name and show it: */ done = P_eoln(V.inst->f); while (!done) { done = P_eoln(V.inst->f); if (done) break; V.ch = getc(V.inst->f); if (V.ch == '\n') V.ch = ' '; if (V.ch == ' ' || V.ch == ';') done = true; if (!done) putchar(V.ch); } putchar('\n'); /* mark the first letter that does not match: */ for (p1 = 1; p1 < p; p1++) putchar(' '); printf("^\n"); halt(); /* we are not inside a comment */ } } } /*rd(inst,ch);*/ if (*alignedbase > -maximumrange && *alignedbase <= *length + maximumrange) return; printf(" In procedure align:\n"); printf(" read in base was %ld\n", thebase); printf(" in internal coordinates: %ld\n", *alignedbase); printf(" maximum range was %ld\n", (long)maximumrange); printf(" piece length was %ld\n", *length); WITH = &(*pie)->key.hea.keynam; /* p2c: diana.p, line 1607: Note: * Format for packed-array-of-char will work only if width < length [321] */ printf(" piece name: %.*s\n", WITH->length, WITH->letters); printf(" piece number: %ld\n", number); printf(" aligned base is too far away... see the code\n"); halt(); } #undef maximumrange #undef semicolon /* end module align.align version = 7.59; {of delmod.p 2002 Sep 5} */ /* ************************************************************************ */ /* end module package.align version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module writepiename */ Static Void writepiename(thefile, pie, namespace) _TEXT *thefile; piece *pie; long namespace; { /* write the piece name to the file, left justified in a field of namespace */ long index; /* index to the field */ for (index = 1; index <= namespace; index++) { if (index <= pie->key.hea.keynam.length) putc(pie->key.hea.keynam.letters[index-1], thefile->f); else putc(' ', thefile->f); } } /* Local variables for numberdigit: */ struct LOC_numberdigit { long number, place; /* the exponent of logplace */ long myabsolute; /* the absolute value of number */ Char acharacter; /* the character to be returned */ } ; Local Void digit(LINK) struct LOC_numberdigit *LINK; { /* extract a digit at the place position */ long tenplace; /* ten times place */ long z; /* an intermediate value */ long d; /* the digit extracted */ tenplace = LINK->place * 10; z = LINK->myabsolute - LINK->myabsolute / tenplace * tenplace; if (LINK->place == 1) d = z; else d = z / LINK->place; switch (d) { case 0: LINK->acharacter = '0'; break; case 1: LINK->acharacter = '1'; break; case 2: LINK->acharacter = '2'; break; case 3: LINK->acharacter = '3'; break; case 4: LINK->acharacter = '4'; break; case 5: LINK->acharacter = '5'; break; case 6: LINK->acharacter = '6'; break; case 7: LINK->acharacter = '7'; break; case 8: LINK->acharacter = '8'; break; case 9: LINK->acharacter = '9'; break; } } /* digit */ Local Void sign(LINK) struct LOC_numberdigit *LINK; { /* put a negative sign out or a positive sign */ if (LINK->number < 0) LINK->acharacter = '-'; else LINK->acharacter = '+'; } /* sign */ /* end module writepiename version = 5.01; (@ of cluster.p 1990 February 13 */ /* begin module package.numbar */ /* ************************************************************************ */ /* begin module numberdigit */ Static Char numberdigit(number_, logplace) long number_, logplace; { /* return the digit at the place value ('logplace') position of number. example: numberdigit(13625, 3) = 3 numberdigit(13625, 4) = 1 2000 July 30 'myabsolute' replaced 'absolute', which is apparently a keyword for GPC. The name is kept to keep the code looking similar to its origin. */ struct LOC_numberdigit V; long count; /* used to make place */ V.number = number_; V.place = 1; for (count = 1; count <= logplace; count++) V.place *= 10; if (V.number == 0) { if (V.place == 1) V.acharacter = '0'; else V.acharacter = ' '; return V.acharacter; } V.myabsolute = labs(V.number); if (V.myabsolute < V.place / 10) { V.acharacter = ' '; return V.acharacter; } if (V.myabsolute >= V.place) digit(&V); else sign(&V); return V.acharacter; } /* numberdigit */ #define ln10 2.30259 /* natural log of 10 - for conversion to log base 10 */ #define epsilon 0.00001 /* a small number to correct log base 10 errors */ /* end module numberdigit version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module numbersize */ Static long numbersize(n) long n; { /* calculate amount of space to be reserved for the integer n */ long size; /* intermediate result */ if (n == 0) return 1; else { /* account for minus sign */ size = (long)(log((double)labs(n)) / ln10 + epsilon) + 1; /* the 1 is for the last digit */ /* the epsilon assures that we do not lose a place due to roundoff. eg, sometimes log base 10 of 10 would be 0.9999 instead of 1, and we would not do it right... note: this will fail for very large numbers on the order of 1/epsilon. */ if (n < 0) size++; return size; } } /* numbersize */ #undef ln10 #undef epsilon /* end module numbersize version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module numberbar */ Static long firstlastmax(firstnumber, lastnumber) long firstnumber, lastnumber; { /* compute the sizes of firstnumber and lastnumber (including + or - sign) and then determine which number is larger */ long firstlines; /* number of lines needed for firstumber */ long lastlines; /* number of lines needed for lastnumber */ firstlines = numbersize(firstnumber); if (firstnumber > 0) /* add one more for + sign */ firstlines++; lastlines = numbersize(lastnumber); if (lastnumber > 0) /* add one more for + sign */ lastlines++; if (firstlines > lastlines) return firstlines; else return lastlines; } Static Void numberbar(afile, spaces, firstnumber, lastnumber, linesused) _TEXT *afile; long spaces, firstnumber, lastnumber, *linesused; { /* write a bar of numbers to a file, with several spaces before. the number of lines used is returned */ long logplace; /* the log of the digit being looked at */ long number; /* the current number being written */ long spacecount; /* count of spaces */ /* 2000 June 24: This code was not sufficient to deal with the sign correctly. The numbersize routine now does *not* give the + sign (which makes it useful for other purposes) so we have to account for it here now. if abs(firstnumber) > abs(lastnumber) then linesused:= numbersize(firstnumber) else linesused:= numbersize(lastnumber); */ *linesused = firstlastmax(firstnumber, lastnumber); /* writeln(output,'numberbar says linesused = ',linesused:1); for logplace:=linesused-1 downto 0 do begin 1999 July 15: this is changed to linesused since numbersize now accounts for the sign: for logplace := linesused downto 0 do begin 2000 June 24: back to the old code ... */ for (logplace = *linesused - 1; logplace >= 0; logplace--) { for (spacecount = 1; spacecount <= spaces; spacecount++) putc(' ', afile->f); for (number = firstnumber; number <= lastnumber; number++) fputc(numberdigit(number, logplace), afile->f); putc('\n', afile->f); } } /* end module numberbar version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* ************************************************************************ */ /* end module package.numbar version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module book.getbase */ Static base getbase(position, pie) long position; piece *pie; { /* Get a base from the position (internal coordinates) of the piece. Protection is made against positions outside the piece. In the case of circles it would be convenient to wrap around when requests are off the end. So the routine will do a modular wrap for positions outside the range 1 to the length. This is a new feature as of 2000 March 22. */ dnastring *workdna; /* pointer to the dna part of pie */ long p; /* current count of bases into the workdna */ long spot; /* the last base of the dna part */ long thelength; /* the length of the piece */ /* writeln(output,'NEW getbase: position=',position:1,'^^^^^^^^^^^^^^^^^^^^'); */ /* handle cases of position out of range by circular wrapping */ thelength = piecelength(pie); while (position < 1) position += thelength; while (position > thelength) position -= thelength; workdna = pie->dna; p = workdna->length; while (position > p) { /* writeln(output,' workdna^.length=',workdna^.length:1); */ workdna = workdna->next; if (workdna == NULL) { printf("error in function getbase!\n"); halt(); } p += workdna->length; } /* writeln(output,'p=',p:1); */ if (true) { spot = workdna->length - p + position; /* writeln(output,'spot=',spot:1); showdnasegment(output,workdna, spot); */ if (spot <= 0) { printf("error in getbase, spot (= %ld) must be positive\n", spot); halt(); } if (spot > workdna->length) { printf("error in getbase, spot (=%ld) must be less than length (=%d)\n", spot, workdna->length); halt(); } /* writeln(output,'base = ', workdna^.part[spot]); */ return ((base)P_getbits_UB(workdna->part, spot - 1, 1, 3)); } printf("error in getbase: request off end of piece\n"); halt(); } /* end module book.getbase version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module align.withinalignment */ Static boolean withinalignment(alignedposition, alignedbase, length) long alignedposition, alignedbase, length; { /* this function tells one if an aligned position, relative to an aligned base in a piece of some length is within the piece. */ long p; /* the position on the piece */ p = alignedposition + alignedbase; return (p > 0 && p <= length); } #define maximumrange 500 /* end module align.withinalignment version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module align.maxminalignment */ Static Void maxminalignment(inst, book, theline, fromparam, toparam, alignmenttype) _TEXT *inst, *book; long *theline, *fromparam, *toparam; Char alignmenttype; { /* prescan the book to find the range over which the pieces of the book are spread, relative to the aligned base. the procedure uses the same variables that align does (so it can call align itself), and it returns the range in fromparam and toparam. alignmenttype: 'f' means alignment by First internal coordinate base, 'b' means alignment by Book, 'i' means alignment by Instructions. */ /* the maximum size aligned piece; this will presumably catch the alignment bug */ long distance; /* a distance to the aligned base */ piece *pie; long length, alignedbase; pie = (piece *)Malloc(sizeof(piece)); /* set an initial range for the two bounds */ *fromparam = LONG_MAX; *toparam = -LONG_MAX; if (*book->name != '\0') { if (book->f != NULL) book->f = freopen(book->name, "r", book->f); else book->f = fopen(book->name, "r"); } else rewind(book->f); if (book->f == NULL) _EscIO2(FileNotFound, book->name); RESETBUF(book->f, Char); if (*inst->name != '\0') { if (inst->f != NULL) inst->f = freopen(inst->name, "r", inst->f); else inst->f = fopen(inst->name, "r"); } else rewind(inst->f); if (inst->f == NULL) _EscIO2(FileNotFound, inst->name); RESETBUF(inst->f, Char); while (!BUFEOF(book->f)) { switch (alignmenttype) { case 'i': align(inst, book, theline, &pie, &length, &alignedbase); break; case 'b': case 'f': getpiece(book, theline, &pie); /* read in the piece */ length = piecelength(pie); break; } if (BUFEOF(book->f)) break; switch (alignmenttype) { case 'f': /* force alignment on first base */ alignedbase = 0; *fromparam = 1; distance = length - alignedbase; if (*toparam < distance) *toparam = distance; break; case 'i': /* use the alignedbase from the book */ distance = 1 - alignedbase; if (*fromparam > distance) *fromparam = distance; distance = length - alignedbase; if (*toparam < distance) *toparam = distance; break; case 'b': /* use the internal book */ alignedbase = pietoint(0L, pie); distance = 1 - alignedbase; if (*fromparam > distance) *fromparam = distance; distance = length - alignedbase; if (*toparam < distance) *toparam = distance; break; } clearpiece(&pie); } if (*toparam - *fromparam > maximumrange) { printf(" in procedure maxminalignment:\n"); printf(" alignedbase = %ld\n", alignedbase); printf(" fromparameter = %ld\n", *fromparam); printf(" toparameter = %ld\n", *toparam); printf(" this exceeds the maximum range allowed (%ld)\n", (long)maximumrange); printf(" see notes in the procedure. \n"); /* notes: if you desired this range, increase 'maximumrange'. otherwise, this may indicate a bug - either: 1) locate the bug (and tell tom schneider, please...) 2) reduce the size of the fragments, from one or the other end until the bombing is stopped. */ halt(); } /* make the book readable again */ if (*book->name != '\0') { if (book->f != NULL) book->f = freopen(book->name, "r", book->f); else book->f = fopen(book->name, "r"); } else rewind(book->f); if (book->f == NULL) _EscIO2(FileNotFound, book->name); RESETBUF(book->f, Char); if (*inst->name != '\0') { if (inst->f != NULL) inst->f = freopen(inst->name, "r", inst->f); else inst->f = fopen(inst->name, "r"); } else rewind(inst->f); if (inst->f == NULL) _EscIO2(FileNotFound, inst->name); RESETBUF(inst->f, Char); Free(pie); } #undef maximumrange /* end module align.maxminalignment version = 7.59; {of delmod.p 2002 Sep 5} */ /* begin module package.triangle.arrays */ /* ************************************************************************ */ Static long tripos(tri, x, y) trianglearray tri; long x, y; { /* return the linear position in the triangle array given by (x,y) */ long hold; /* for reversing the order of x and y */ /* switch the two values */ if (x < y) { hold = x; x = y; y = hold; } if (x < tri.lower || y > tri.upper) { printf("array lower: %5ld upper: %5ld exceeded", tri.lower, tri.upper); printf(" out of bounds at (%ld,%ld)\n", x, y); halt(); } /* reset the coordinates so that they range from (0,0) up */ y -= tri.lower; x -= tri.lower; /* the distance out in a linear array is the rectangular distance side*y, minus the area of a triangle of side y-1, plus the x distance: */ return (tri.side * y - y * (y + 1) / 2 + x); } Static Void cleartriangle(triangle, fromspot, tospot) trianglearray *triangle; long fromspot, tospot; { /* clear the triangular array triangle and assign the lower and upper bounds to fromspot and tospot */ long loop; /* a loop control variable */ long FORLIM; triangle->lower = fromspot; triangle->upper = tospot; triangle->side = triangle->upper - triangle->lower + 1; if (triangle->side <= 0) { printf("illegal triangle size; lower: %5ld", triangle->lower); printf(" upper: %5ld\n", triangle->upper); halt(); } triangle->area = tripos(*triangle, triangle->upper, triangle->upper); if (triangle->area > trimax) { printf("requested triangle size too large for trimax:\n"); printf("trimax is: %ld request was %ld\n", (long)trimax, triangle->area); halt(); } FORLIM = triangle->area; for (loop = 0; loop <= FORLIM; loop++) triangle->data[loop] = 0; } Static Void puttriangle(tri, value, x, y) trianglearray *tri; long value, x, y; { /* place the value at coordinates (x,y) in tri */ tri->data[tripos(*tri, x, y)] = value; } Static Void gettriangle(tri, value, x, y) trianglearray *tri; long *value, x, y; { /* get the value at coordinates (x,y) in tri */ *value = tri->data[tripos(*tri, x, y)]; } Static Void writetriangle(fout, tri) _TEXT *fout; trianglearray tri; { /* write out the triangle array tri to fout */ long x; /* loop control for the horizontal */ long y; /* loop control for the vertical */ long value; /* the value of the array element (x,y) in tri */ long FORLIM, FORLIM1; FORLIM = tri.lower; for (y = tri.upper; y >= FORLIM; y--) { FORLIM1 = tri.upper; for (x = tri.lower; x <= FORLIM1; x++) { if (x >= y) { gettriangle(&tri, &value, x, y); fprintf(fout->f, " %5ld", value); } else fprintf(fout->f, " -----"); } putc('\n', fout->f); } } /* ************************************************************************ */ /* end module package.triangle.arrays */ /* begin module diana.protseqrange */ Static Void protseqrange(protseq, fromparam, toparam) _TEXT *protseq; long *fromparam, *toparam; { /* determine the range of the protseq data. */ *fromparam = 1; /* unfortunately */ while ((P_peek(protseq->f) == '*') | (P_peek(protseq->f) == '>')) { /* skip header */ fscanf(protseq->f, "%*[^\n]"); getc(protseq->f); } *toparam = 0; do { getc(protseq->f); (*toparam)++; } while (!((P_peek(protseq->f) == '.') | P_eoln(protseq->f))); /*writeln(output,'toparam: ',toparam:1);*/ } /* Local variables for filehead: */ struct LOC_filehead { long fromrange, torange, pairs; /* number of pairs to be analzyed */ long side; /* length of triangle side */ } ; Local Void showdata(tofile, c_, LINK) _TEXT *tofile; Char c_; struct LOC_filehead *LINK; { /* show the data to the file, start lines with character c */ fprintf(tofile->f, "%c %ld %ld is the from-to range used\n", c_, LINK->fromrange, LINK->torange); fprintf(tofile->f, "%c %ld bases is the side of the data triangle\n", c_, LINK->side); fprintf(tofile->f, "%c %ld pairs will be analyzed [=side*(side+1)/2]\n", c_, LINK->pairs); } /* end module diana.protseqrange */ /* ************************************************************************ */ /* ************************************************************************ */ /* begin module diana.filehead */ Static Void filehead(fin, fout, fromrange_, torange_) _TEXT *fin, *fout; long fromrange_, torange_; { /* place the heading lines on the fout file */ struct LOC_filehead V; long copynumber = 1; /* the number of lines being copied from the text */ _TEXT TEMP; V.fromrange = fromrange_; V.torange = torange_; if (*fout->name != '\0') { if (fout->f != NULL) fout->f = freopen(fout->name, "w", fout->f); else fout->f = fopen(fout->name, "w"); } else { if (fout->f != NULL) rewind(fout->f); else fout->f = tmpfile(); } if (fout->f == NULL) _EscIO2(FileNotFound, fout->name); SETUPBUF(fout->f, Char); /* write the name and version of the program */ fprintf(fout->f, "* diana %4.2f\n", version); /* copy the identification lines to the fout */ if (*fin->name != '\0') { if (fin->f != NULL) fin->f = freopen(fin->name, "r", fin->f); else fin->f = fopen(fin->name, "r"); } else rewind(fin->f); if (fin->f == NULL) _EscIO2(FileNotFound, fin->name); RESETBUF(fin->f, Char); fprintf(fout->f, "* "); if (copylines(fin, fout, copynumber) != copynumber) { fprintf(fout->f, "inst does not have enough header lines\n"); halt(); } V.side = V.torange - V.fromrange + 1; V.pairs = (long)floor(V.side * (V.side + 1.0) / 2 + 0.5); showdata(fout, '*', &V); TEMP.f = stdout; *TEMP.name = '\0'; showdata(&TEMP, ' ', &V); /* add on description lines about graphing */ /* old format: writeln(fout,'*information(i)| type of| | | |', ' freq. | 1 - freq. |3 blank|norm- |1 -'); writeln(fout,'*normal(n) | dinucl-| x | y |3 blank|', ' (information if | |alized|norm.'); writeln(fout,'*diagonal(d) | eotide |coord|coord|columns|', 'column is marked i)|columns|info. |info.'); */ fprintf(fout->f, "*\n"); fprintf(fout->f, "* Definition of columns:\n"); fprintf(fout->f, "* 1: n (normal), d (diagonal), i (information)\n"); fprintf(fout->f, "* 2: dinucleotide, in (info, normal), id (info, diagonal)\n"); fprintf(fout->f, "* 3: x coordinate\n"); fprintf(fout->f, "* 4: y coordinate\n"); fprintf(fout->f, "* 5: 1, use for xscolumn for xyplop\n"); fprintf(fout->f, "* 6: 1, use for yscolumn for xyplop\n"); fprintf(fout->f, "* 7: sum.data[position]\n"); fprintf(fout->f, "* 8: Rcorrelation (with small sample correction)\n"); fprintf(fout->f, "* 9: chisquare\n"); fprintf(fout->f, "* 10: degrees of freedom\n"); fprintf(fout->f, "* 11: 1, use for hue for xyplop\n"); fprintf(fout->f, "* 12: 0, use for saturation for xyplop\n"); fprintf(fout->f, "* 13: Rnormalized = Rcorrelation/4 or 1-(Rsequence/2)\n"); fprintf(fout->f, "* 14: 1-Rnormalized\n"); fprintf(fout->f, "* 15: correlation coefficient for x to y\n"); fprintf(fout->f, "* 16: 1, use for saturation for xyplop\n"); /* column 17: raw Rcorrelation/4 (no small sample correction) writeln(fout,'* 17: Rcorrelation/4 (with NO small sample correction)'); */ } /* end module diana.filehead */ /* ********************************************************************* */ /* ********************************************************************* */ /* begin module diana.addpair */ Static Void addpair(xbase, ybase, xcoord, ycoord, dataprism) base xbase, ybase; long xcoord, ycoord; trianglearray (*dataprism)[(long)t - (long)a + 1]; { /* add to the total at the triangle position at the correct dataprism spot */ long distance; /* linear value corresponding to xcoord and ycoord */ trianglearray *WITH; distance = tripos(dataprism[(long)xbase - (long)a] [(long)ybase - (long)a], xcoord, ycoord); WITH = &dataprism[(long)xbase - (long)a][(long)ybase - (long)a]; /*zzzaaa*/ /* writeln(output,'addpair (',xcoord:1,',',ycoord:1,') ', basetochar(xbase), basetochar(ybase)); */ WITH->data[distance]++; } /* end module diana.addpair */ /* begin module diana.readbook */ Static Void readbook(apiece, book, theline, dataprism, fromparam, toparam, fromrequest, torequest, alignmenttype) piece **apiece; _TEXT *book; long theline; trianglearray (*dataprism)[(long)t - (long)a + 1]; long fromparam, toparam, fromrequest, torequest; Char alignmenttype; { /* go through the book and create the dataprism. Pull out pieces (apiece) and put the data from the requested region (fromrequest to torequest) into the dataprism. The book minimum-maximum range must be fromparam to toparam. When there is only a partial sequence, nothing is recorded. If alignmenttype is book, give an empty file to the align routine. */ long alignedbase; /* the aligned base value of each piece in the book */ long length; /* the length of the sequence in the piece */ base xbase; /* the base corresponding to the x coordinate */ long x; /* the actual x coordinate of the base to get */ long xcoord = 0; /* the x coordinate loop control variable */ long y; /* the actual y coordinate of the base to get */ base ybase; /* the base corresponding to the y coordinate */ long ycoord = 0; /* the y coordinate loop control variable */ _TEXT empty; /* empty file for alignment */ long count = 0; /* count of sequences */ Char bk = '\b'; /* backspace character */ /* initialize the variables */ empty.f = NULL; *empty.name = '\0'; /* clear the dataprism */ for (xbase = a; (long)xbase <= (long)t; xbase = (base)((long)xbase + 1)) { for (ybase = a; (long)ybase <= (long)t; ybase = (base)((long)ybase + 1)) cleartriangle(&dataprism[(long)xbase - (long)a] [(long)ybase - (long)a], fromrequest, torequest); } /* go through the book and analyze the pieces */ /*writeln(output,'alignmenttype: ', alignmenttype);*/ if (*empty.name != '\0') { if (empty.f != NULL) empty.f = freopen(empty.name, "w", empty.f); else empty.f = fopen(empty.name, "w"); } else { if (empty.f != NULL) rewind(empty.f); else empty.f = tmpfile(); } if (empty.f == NULL) _EscIO2(FileNotFound, empty.name); SETUPBUF(empty.f, Char); if (*empty.name != '\0') { if (empty.f != NULL) empty.f = freopen(empty.name, "r", empty.f); else empty.f = fopen(empty.name, "r"); } else rewind(empty.f); if (empty.f == NULL) _EscIO2(FileNotFound, empty.name); RESETBUF(empty.f, Char); printf(" inserting sequence: "); /* backspace character */ while (!BUFEOF(book->f)) { /*yyy*/ switch (alignmenttype) { case 'i': align(&inst, book, &theline, apiece, &length, &alignedbase); break; case 'b': align(&empty, book, &theline, apiece, &length, &alignedbase); break; } if (!BUFEOF(book->f)) { count++; /* writeln(output,count:4, ' ', number:4); generally the same */ printf(" %7ld%c%c%c%c%c%c%c%c", count, bk, bk, bk, bk, bk, bk, bk, bk); /* if count = 10 then halt; (* use for testing backup counter *) writeln(output,bk,bk,bk,count:4); writeln(output,bk,bk,bk,bk,count:4); writeln(output,'readbook: alignedbase =',alignedbase:1); writeln(output,'readbook: fromrequest =',fromrequest:1); writeln(output,'readbook: torequest =', torequest:1); */ for (xcoord = fromrequest; xcoord <= torequest; xcoord++) { /*zzzrrr*/ x = xcoord + alignedbase; if (x >= 1 && x <= length) { xbase = getbase(x, *apiece); /* write(output,'(1) x=',x:2, ' xbase = ',basetochar(xbase),' '); */ addpair(xbase, xbase, xcoord, xcoord, dataprism); for (ycoord = xcoord + 1; ycoord <= torequest; ycoord++) { y = ycoord + alignedbase; if (y >= 1 && y <= length) { ybase = getbase(y, *apiece); /* write(output,'(2) y=',y:2, ' ybase = ',basetochar(ybase),' '); */ addpair(xbase, ybase, xcoord, ycoord, dataprism); } } } } } clearpiece(apiece); } if (empty.f != NULL) fclose(empty.f); } #define debugging false /* turn on sequence printing */ /* end module diana.readbook */ /* begin module diana.readprotseq */ Static Void readprotseq(protseq, dataprism, fromparam, toparam, fromrequest, torequest) _TEXT *protseq; trianglearray (*dataprism)[(long)t - (long)a + 1]; long fromparam, toparam, fromrequest, torequest; { /* go through the protseq and create the dataprism. Put the data from the requested region (fromrequest to torequest) into the dataprism. The sequence minimum-maximum range must be fromparam to toparam. */ Char letter; /* a letter from the sequence */ long alignedbase; /* the aligned base value of each piece in the book */ long length; /* the length of the sequence in the piece */ Char sequence[trimax]; /* the sequence */ base xbase; /* the base corresponding to the x coordinate */ long xcoord = 0; /* the x coordinate loop control variable */ base ybase; /* the base corresponding to the y coordinate */ long ycoord = 0; /* the y coordinate loop control variable */ boolean done; /* done reading a sequence */ /*writeln(output,'BUBBA 0 readprotseq');*/ /* initialize the variables */ alignedbase = fromrequest - fromparam + 1; /* clear the dataprism */ for (xbase = a; (long)xbase <= (long)t; xbase = (base)((long)xbase + 1)) { for (ybase = a; (long)ybase <= (long)t; ybase = (base)((long)ybase + 1)) cleartriangle(&dataprism[(long)xbase - (long)a] [(long)ybase - (long)a], fromrequest, torequest); } /* go through the book and analyze the pieces */ if (*protseq->name != '\0') { if (protseq->f != NULL) protseq->f = freopen(protseq->name, "r", protseq->f); else protseq->f = fopen(protseq->name, "r"); } else rewind(protseq->f); if (protseq->f == NULL) _EscIO2(FileNotFound, protseq->name); RESETBUF(protseq->f, Char); while (!BUFEOF(protseq->f)) { /* read the next sequence */ while (P_peek(protseq->f) == '*') { /* skip header */ fscanf(protseq->f, "%*[^\n]"); getc(protseq->f); } while (P_peek(protseq->f) == '>') { /* skip header */ fscanf(protseq->f, "%*[^\n]"); getc(protseq->f); } length = 0; done = false; do { if (BUFEOF(protseq->f)) { if (debugging) /* BUBBA 0.5 */ printf(" %ld\n", length); done = true; } else { letter = getc(protseq->f); if (letter == '\n') letter = ' '; /*write(output,'BUBBA 0.1 ',length:1,' ',letter);*/ if (P_peek(protseq->f) == '.' || P_peek(protseq->f) == '>' || P_peek(protseq->f) == '*') { done = true; if (debugging) /* BUBBA 0.5 */ printf(" %ld\n", length); } else { if (letter == 'A') letter = 'a'; if (letter == 'C') letter = 'c'; if (letter == 'G') letter = 'g'; if (letter == 'T') letter = 't'; if (letter == 'U') letter = 't'; if (letter == '-' || letter == 't' || letter == 'g' || letter == 'c' || letter == 'a') { sequence[length-1] = letter; length++; /*writeln(output,'BUBBA 0.5 ',length:1,' ',letter); (* BUBBA *)*/ if (debugging) /* BUBBA */ putchar(letter); } } if (P_eoln(protseq->f)) { fscanf(protseq->f, "%*[^\n]"); getc(protseq->f); } } } while (!done); /* until (protseq^ = '.') or eoln(protseq); */ /*writeln(output,'BUBBA 3 readprotseq');*/ if (BUFEOF(protseq->f)) break; for (xcoord = fromrequest; xcoord <= torequest; xcoord++) { letter = sequence[xcoord + alignedbase - 1]; /*lll*/ if (letter != '-') { xbase = chartobase(letter); addpair(xbase, xbase, xcoord, xcoord, dataprism); for (ycoord = xcoord + 1; ycoord <= torequest; ycoord++) { if (sequence[ycoord + alignedbase - 1] != '-') { ybase = chartobase(sequence[ycoord + alignedbase - 1]); addpair(xbase, ybase, xcoord, ycoord, dataprism); } } } /* writeln(output,'BUBBA 4 readprotseq'); writeln(output,'BUBBA 4.5 ',sequence[xcoord+alignedbase]); writeln(output,'BUBBA 4.5 ',letter); writeln(output,'BUBBA 4.5, xcoord+alignedbase:', xcoord+alignedbase: 5); writeln(output,'BUBBA 4.5, xcoord:', xcoord: 5); writeln(output,'BUBBA 4.5, alignedbase:', alignedbase: 5); writeln(output,'BUBBA 5 readprotseq'); */ } } /*writeln(output,'BUBBA 6 readprotseq');*/ } #undef debugging /* end module diana.readprotseq */ /* begin module diana.totalprism */ Static Void totalprism(dataprism_, totalsum, fromparam, toparam) trianglearray (*dataprism_)[(long)t - (long)a + 1]; trianglearray *totalsum; long fromparam, toparam; { /* count the total number of data elements at each position on the triangle */ trisquare dataprism; long position; /* index variable for the triangle data array */ base xindex; /* x index variable for the dataprism array */ base yindex; /* y index variable for the dataprism array */ long FORLIM; trianglearray *WITH; memcpy(dataprism, dataprism_, sizeof(trisquare)); /* clear the totalsum array */ cleartriangle(totalsum, 1L, toparam - fromparam + 1); FORLIM = dataprism[0][0].area; /* go through the dataprism and sum each position */ for (position = 0; position <= FORLIM; position++) { for (xindex = a; (long)xindex <= (long)t; xindex = (base)((long)xindex + 1)) { for (yindex = a; (long)yindex <= (long)t; yindex = (base)((long)yindex + 1)) { WITH = &dataprism[(long)xindex - (long)a][(long)yindex - (long)a]; totalsum->data[position] += WITH->data[position]; } } } } #define bigoutput false /* give a big output for testing */ /* end module diana.totalprism */ /* begin module diana.writeprism */ Static Void writeprism(dianalysis, dataprism_, fromparam, toparam, sum, dicontrol, colorslope, colorconstant) _TEXT *dianalysis; trianglearray (*dataprism_)[(long)t - (long)a + 1]; long fromparam, toparam; trianglearray sum; Char dicontrol; double colorslope, colorconstant; { /* write dataprism into file dianalysis in a form readable by xyplo.p. If dicontrol is 'b' do both info and data, if 'd' just data, and if 'i' just info. */ trisquare dataprism; base baseindex; /* a loop variable for computing Rx and Ry values */ double chisquare; /* the correlation coefficient at a position */ double correlation; /* correlation coefficient for x to y */ boolean dodata; /* print data */ boolean doinfo; /* print info */ double expected; /* calculated number of dinucleotides at a position */ long freedom; /* total number of degrees of freedom at a position*/ double Hx; /* the uncertainty of the single xbase at a positon */ double Hy; /* the uncertainty of the single ybase at a positon */ double Hxy; /* the uncertainty of the dinucleotides at a position */ realprism freq; /* a variable to hold the frequencies of a triangle */ double ln2 = log(2.0); /* ln(2) precaulated for speed */ double observed; /* actual number of dinucleotides at a position */ long position; /* the linear equivalent to triangular coordinates */ double Rcorrelation; /* the correlation information at (Rx,Ry) */ double Rcorrelationsum = 0.0; /* sum of Rcorrelation */ double Rc; /* Rcorrelation with small sample correction */ double Rnormalized; /* the normalized information */ double Rx; /* the information content of the single xbase */ double Ry; /* the information content of the single ybase */ double Rxy; /* the information content of the dinucleotides */ double ssc4 = 3 / (2 * log(2.0)); /* small sample correction factor, precomputed. See Schneider1986 p. 427, eqn A6. s = 4. */ double ssc16 = 15 / (2 * log(2.0)); /* small sample correction factor, precomputed. See Schneider1986 p. 427, eqn A6. s = 16. */ base xbase; /* the x square loop control variable */ long xfreedom; /* number of degrees of freedom of the x position */ basefreq xfreq; /* holds the frequency of a certain xbase */ long xindex; /* the x triangle loop control variable */ sumbase xsum; /* an array of the sums of elements on the x-axis */ base ybase; /* the y square loop control variable */ long yfreedom; /* number of degrees of freedom of the y position */ basefreq yfreq; /* holds the frequency of a certain ybase */ long yindex; /* the y triangle loop control variable */ sumbase ysum; /* an array of the sums of elements on the y-axis */ trianglearray *WITH; double TEMP; memcpy(dataprism, dataprism_, sizeof(trisquare)); if (dicontrol == 'b') { dodata = true; doinfo = true; } else if (dicontrol == 'd') { dodata = true; doinfo = false; } else if (dicontrol == 'i') { dodata = false; doinfo = true; } for (xindex = fromparam; xindex <= toparam; xindex++) { for (yindex = xindex; yindex <= toparam; yindex++) { /* set variables to zero */ Hxy = 0.0; Hx = 0.0; Hy = 0.0; Rxy = 0.0; Rx = 0.0; Ry = 0.0; for (baseindex = a; (long)baseindex <= (long)t; baseindex = (base)((long)baseindex + 1)) { xsum[(long)baseindex - (long)a] = 0; ysum[(long)baseindex - (long)a] = 0; } /* transverse the dinucleotide matrix at the given position */ for (xbase = a; (long)xbase <= (long)t; xbase = (base)((long)xbase + 1)) { for (ybase = a; (long)ybase <= (long)t; ybase = (base)((long)ybase + 1)) { /* find the proper position in the triarray */ position = tripos(dataprism[(long)xbase - (long)a] [(long)ybase - (long)a], xindex, yindex); /* calculate sums and frequencies to write out everything */ WITH = &dataprism[(long)xbase - (long)a][(long)ybase - (long)a]; xsum[(long)xbase - (long)a] += WITH->data[position]; ysum[(long)ybase - (long)a] += WITH->data[position]; /* write (output,'xindex=',xindex:3,', '); write (output,'yindex=',yindex:3,', '); writeln(output,'sum.data[',position:3,']=',sum.data[position]:1); */ /* if sum.data[position] = 0 then begin write (output,'-0-'); write (output,'xindex=',xindex:3,' '); write (output,'yindex=',yindex:3,' '); write (output,'sum.data[',position:3,']=0'); writeln(output,' ',basetochar(xbase), basetochar(ybase)); halt; end; */ /*zzzqqq*/ /* if statement check 2003 Jan 17 */ if (sum.data[position] > 0) { freq[(long)xbase - (long)a][(long)ybase - (long)a] = (double)WITH->data[position] / sum.data[position]; if (freq[(long)xbase - (long)a][(long)ybase - (long)a] != 0.0) Hxy -= freq[(long)xbase - (long)a] [(long)ybase - (long)a] * log(freq[(long)xbase - (long)a] [(long)ybase - (long)a]); } /* print the values to the output file */ if (dodata) { if (xindex != yindex) putc('n', dianalysis->f); else putc('d', dianalysis->f); fprintf(dianalysis->f, " %c%c", basetochar(xbase), basetochar(ybase)); fprintf(dianalysis->f, " %ld", xindex); fprintf(dianalysis->f, " %ld", yindex); fprintf(dianalysis->f, " 1"); /* xscolumn */ fprintf(dianalysis->f, " 1"); /* yscolumn */ fprintf(dianalysis->f, " %ld", dataprism[(long)xbase - (long)a] [(long)ybase - (long)a].data[position]); fprintf(dianalysis->f, " %12.10f", freq[(long)xbase - (long)a][(long)ybase - (long)a]); fprintf(dianalysis->f, " %12.10f", 1 - freq[(long)xbase - (long)a][(long)ybase - (long)a]); fprintf(dianalysis->f, " 1"); /* degrees of freedom */ fprintf(dianalysis->f, " 1"); /* hue */ fprintf(dianalysis->f, " 0"); /* saturation */ fprintf(dianalysis->f, " 0"); /* normalized information */ fprintf(dianalysis->f, " 0"); /* 1 - normalized information */ fprintf(dianalysis->f, " 0"); /* correlation */ fprintf(dianalysis->f, " 1\n"); /* saturation alternative */ } } } /* set some more variables to zero */ xfreedom = 0; yfreedom = 0; freedom = 0; /*How could Rcorrelation be a GOOD value here but bad later*/ /* if sum.data[position] = 0 then begin write (output,'-1-'); write (output,'Rcorrelation ',Rcorrelation:3,' '); write (output,'xindex ',xindex:3,' '); write (output,'yindex ',yindex:3,' '); writeln(output,'sum.data[',position:3,'] = 0'); end; */ /*zzzqqq*/ /* find the frequencies and uncertainties of the mononucleotides */ for (baseindex = a; (long)baseindex <= (long)t; baseindex = (base)((long)baseindex + 1)) { /* if statement check 2003 Jan 17 */ if (sum.data[position] > 0) { /* first coordinate */ xfreq[(long)baseindex - (long)a] = (double)xsum[(long)baseindex - (long)a] / sum.data[position]; if (xfreq[(long)baseindex - (long)a] > 0) { Hx -= xfreq[(long)baseindex - (long)a] * log(xfreq[(long)baseindex - (long)a]); xfreedom++; } /* second coordinate */ yfreq[(long)baseindex - (long)a] = (double)ysum[(long)baseindex - (long)a] / sum.data[position]; if (yfreq[(long)baseindex - (long)a] > 0) { Hy -= yfreq[(long)baseindex - (long)a] * log(yfreq[(long)baseindex - (long)a]); yfreedom++; } } } /* calulate the number of degrees of freedom at a position */ freedom = (xfreedom - 1) * (yfreedom - 1); /* calculate the information contents of mono- and di-nucleotides */ /* The calculation should be: Rx := 2 - (Hx / ln(2)); Ry := 2 - (Hy / ln(2)); Rxy := 4 - (Hxy / ln(2)); Rcorrelation := Rxy - Rx - Ry; but this reduces to: */ Rcorrelation = (Hx + Hy - Hxy) / ln2; /* if sum.data[position] = 0 then begin write (output,'-2-'); write (output,'Rcorrelation=',Rcorrelation:3,' '); write (output,'xindex=',xindex:3,' '); write (output,'yindex=',yindex:3,' '); write (output,'Hx=',Hx:5:3,' '); write (output,'Hy=',Hy:5:3,' '); write (output,'Hxy=',Hxy:5:3,' '); writeln(output,'sum.data[',position:3,'] = 0'); end; */ /*zzzqqq*/ /* calculate the chi-square and correlation coefficient */ chisquare = 0.0; for (xbase = a; (long)xbase <= (long)t; xbase = (base)((long)xbase + 1)) { for (ybase = a; (long)ybase <= (long)t; ybase = (base)((long)ybase + 1)) { expected = xfreq[(long)xbase - (long)a] * yfreq[(long)ybase - (long)a] * sum.data[position]; observed = dataprism[(long)xbase - (long)a] [(long)ybase - (long)a].data[position]; if (expected != 0) { TEMP = observed - expected; chisquare += TEMP * TEMP / expected; } } } correlation = 0.0; /* for now */ if (doinfo) { fprintf(dianalysis->f, "i "); putc('i', dianalysis->f); if (bigoutput) { printf("* n= %ld", sum.data[position]); printf("* Rcorrelation = %*.*f", infofield, infodecim, Rcorrelation); } if (xindex == yindex) { /* on the diagonal, Rcorrelation is equal to Hx, so normalize to give the information content (without small sample correction) */ Rnormalized = 1.0 - Rcorrelation / 2.0; putc('d', dianalysis->f); /* 2003 Jan 17 check added */ if (sum.data[position] > 0) { Rc = Rcorrelation - ssc4 / sum.data[position]; Rcorrelationsum += Rc; } else Rc = 0.0; if (bigoutput) { printf("* ssc4/n = %*.*f", infofield, infodecim, ssc4 / sum.data[position]); printf("* Rcorrelation - ssc4/n = %*.*f", infofield, infodecim, Rc); } } else { Rnormalized = Rcorrelation / 4.0; putc('n', dianalysis->f); /* 2003 Jan 17 check added */ if (sum.data[position] > 0) { Rc = Rcorrelation - ssc16 / sum.data[position]; Rcorrelationsum += Rc; } else Rc = 0.0; /* if sum.data[position] = 0 then begin write (output,'-3-'); write (output,'Rcorrelation ',Rcorrelation:3,' '); write (output,'xindex ',xindex:3,' '); write (output,'yindex ',yindex:3,' '); writeln(output,'sum.data[',position:3,'] = 0'); end; */ /*zzzqqq*/ if (bigoutput) { printf("* ssc16/n = %*.*f", infofield, infodecim, ssc16 / sum.data[position]); printf("* Rcorrelation - ssc16/n = %*.*f", infofield, infodecim, Rc); } } if (bigoutput) putchar('\n'); Rnormalized = colorslope * Rnormalized + colorconstant; fprintf(dianalysis->f, " %ld", xindex); /* 3 */ fprintf(dianalysis->f, " %ld", yindex); /* 4 */ fprintf(dianalysis->f, " 1"); /* 5 */ fprintf(dianalysis->f, " 1"); /* 6 */ fprintf(dianalysis->f, " %ld", sum.data[position]); /* 7 */ fprintf(dianalysis->f, " %*.*f", infofield, infodecim, Rc); /* 8 */ fprintf(dianalysis->f, " %8.5f", chisquare); /* 9 */ fprintf(dianalysis->f, " %2ld", freedom); /* 10 */ fprintf(dianalysis->f, " 1"); /* 11 */ fprintf(dianalysis->f, " 0"); /* 12 */ fprintf(dianalysis->f, " %*.*f", infofield, infodecim, Rnormalized); /* 13 */ fprintf(dianalysis->f, " %*.*f", infofield, infodecim, 1 - Rc); /* 14 */ fprintf(dianalysis->f, " %*.*f", infofield, infodecim, correlation); /* 15 */ fprintf(dianalysis->f, " 1\n"); /* saturation alternative */ /* 16 */ /* write(dianalysis,' ', Rcorrelation/4:infofield:infodecim); (* 17 *) */ } /* write out the information content for the current position */ } } printf("sum of Rcorrelation %10.8f bits\n", Rcorrelationsum); fprintf(dianalysis->f, "* sum of Rcorrelation %10.8f bits\n", Rcorrelationsum); } #undef bigoutput /* Local variables for checknumber: */ struct LOC_checknumber { _TEXT *afile; boolean ok; /* result of this check */ } ; Local Void conclude(LINK) struct LOC_checknumber *LINK; { _TEXT TEMP; printf("Including this character, the rest of the data line is:\n"); TEMP.f = stdout; *TEMP.name = '\0'; copyaline(LINK->afile, &TEMP); LINK->ok = false; } /* end module diana.writeprism */ /* ********************************************************************* */ /* ********************************************************************* */ /* begin module checknumber */ Static boolean checknumber(afile_) _TEXT *afile_; { /* check that there is a number next in the file. If not, return false. This is useful for protection when reading a parameter file. */ struct LOC_checknumber V; V.afile = afile_; V.ok = true; /* be optimistic */ if (BUFEOF(V.afile->f)) { V.ok = false; printf("A number was expected on a data line, but"); printf(" the end of the file was found instead.\n"); return false; } skipblanks(V.afile); if (P_eoln(V.afile->f)) { printf("A number was expected on a data line, but"); printf(" the end of the line was found instead.\n"); conclude(&V); } if (P_peek(V.afile->f) == '+' || P_peek(V.afile->f) == '-' || P_peek(V.afile->f) == '.' || P_peek(V.afile->f) == '9' || P_peek(V.afile->f) == '8' || P_peek(V.afile->f) == '7' || P_peek(V.afile->f) == '6' || P_peek(V.afile->f) == '5' || P_peek(V.afile->f) == '4' || P_peek(V.afile->f) == '3' || P_peek(V.afile->f) == '2' || P_peek(V.afile->f) == '1' || P_peek(V.afile->f) == '0') return V.ok; printf("A number was expected on a data line, but"); printf(" the character \"%c\" was found instead.\n", P_peek(V.afile->f)); conclude(&V); return V.ok; } /* end module checknumber version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module copyfile */ Static Void copyfile(fin, fout) _TEXT *fin, *fout; { /* copy the rest of file fin to fout */ while (!BUFEOF(fin->f)) copyaline(fin, fout); } #define copylines_ 4 /* the number of copied lines */ /* end module copyfile version = 4.57; (@ of prgmod.p 2002 Sep 5 */ /* begin module diana.upgradeto201 */ Static Void upgradeto201(dianap) _TEXT *dianap; { /* upgrade the dianap file to version 2.01: specify alignmenttype: i f: first base, i: inst, b: book alignment The parameters when the upgrade feature was installed (2002 Sep 5) were: ------ -10 10 fromdesired, todesired -10 10 fromparam, toparam (only for use with protseq) i dicontrol: 'd' just data, 'i' just info, 'b' both 0.84 0.16 colorslope and colorconstant gives yellow ranging up to red. diana version 1.88 and higher ------ */ _TEXT internal; /* a place to hold the old dianap */ long line_; /* a line to be worked with */ double parameterversion = 2.01; /* parameter version number */ internal.f = NULL; *internal.name = '\0'; printf("upgrading to version %4.2f ...\n", parameterversion); /* copy alist to internal */ if (*dianap->name != '\0') { if (dianap->f != NULL) dianap->f = freopen(dianap->name, "r", dianap->f); else dianap->f = fopen(dianap->name, "r"); } else rewind(dianap->f); if (dianap->f == NULL) _EscIO2(FileNotFound, dianap->name); RESETBUF(dianap->f, Char); /* Don't do this! There IS no previous parameter version line! readln(dianap); (* skip old parameter version line *) */ if (*internal.name != '\0') { if (internal.f != NULL) internal.f = freopen(internal.name, "w", internal.f); else internal.f = fopen(internal.name, "w"); } else { if (internal.f != NULL) rewind(internal.f); else internal.f = tmpfile(); } if (internal.f == NULL) _EscIO2(FileNotFound, internal.name); SETUPBUF(internal.f, Char); for (line_ = 1; line_ <= copylines_; line_++) copyaline(dianap, &internal); /* write the NEW PARAMETER LINE */ fprintf(internal.f, "i f: first base, i: inst, b: book alignment\n"); /* finish the copy of alist to internal */ /* Dump the previous stuff about copyfile(dianap, internal); */ /* copy internal to alist */ if (*internal.name != '\0') { if (internal.f != NULL) internal.f = freopen(internal.name, "r", internal.f); else internal.f = fopen(internal.name, "r"); } else rewind(internal.f); if (internal.f == NULL) _EscIO2(FileNotFound, internal.name); RESETBUF(internal.f, Char); if (*dianap->name != '\0') { if (dianap->f != NULL) dianap->f = freopen(dianap->name, "w", dianap->f); else dianap->f = fopen(dianap->name, "w"); } else { if (dianap->f != NULL) rewind(dianap->f); else dianap->f = tmpfile(); } if (dianap->f == NULL) _EscIO2(FileNotFound, dianap->name); SETUPBUF(dianap->f, Char); fprintf(dianap->f, "%4.2f version of dianap that this parameter file is designed for.\n", parameterversion); copyfile(&internal, dianap); /* add the new material at the end: */ /* none to add */ if (*dianap->name != '\0') { if (dianap->f != NULL) dianap->f = freopen(dianap->name, "r", dianap->f); else dianap->f = fopen(dianap->name, "r"); } else rewind(dianap->f); if (dianap->f == NULL) _EscIO2(FileNotFound, dianap->name); RESETBUF(dianap->f, Char); /* ready to start reading again */ if (internal.f != NULL) fclose(internal.f); } #undef copylines_ /* end module diana.upgradeto201 */ /* begin module dianap.upgradeparameters */ Static Void upgradeparameters(dianap) _TEXT *dianap; { /* make sure that the parameters are the latest spiffy version */ double parameterversion; /* parameter version number */ fscanf(dianap->f, "%lg%*[^\n]", ¶meterversion); getc(dianap->f); if ((long)floor(100 * parameterversion + 0.5) >= (long)floor(100 * updateversion + 0.5) && (long)floor(100 * parameterversion + 0.5) <= (long)floor(100 * versionupperbound + 0.5)) return; /*qqq*/ parameterversion = 1.88; /* assume this version */ printf("^GYou have an old parameter file!, version %4.2f!\n", parameterversion); printf(" version = %4.2f\n", version); printf(" updateversion = %4.2f\n", updateversion); printf(" parameterversion = %4.2f\n", parameterversion); printf("versionupperbound = %4.2f\n", versionupperbound); if (parameterversion == 1.88) upgradeto201(dianap); if (*dianap->name != '\0') { if (dianap->f != NULL) dianap->f = freopen(dianap->name, "r", dianap->f); else dianap->f = fopen(dianap->name, "r"); } else rewind(dianap->f); if (dianap->f == NULL) _EscIO2(FileNotFound, dianap->name); RESETBUF(dianap->f, Char); fscanf(dianap->f, "%lg%*[^\n]", ¶meterversion); getc(dianap->f); if (parameterversion < updateversion) { printf("Sorry! I am unable to fully upgrade your parameter file\n"); printf("from version %4.2f to version %4.2f!\n", parameterversion, updateversion); printf("Start from a fresh copy or edit this one.\n"); halt(); } else printf("... upgrade successful!\n"); printf("See this page for the new documentation:\n"); printf("http://www.lecb.ncifcrf.gov/~toms/delila/diana.html\n"); printf("----------\n"); } /* Local variables for themain: */ struct LOC_themain { _TEXT *dianap; Char alignmenttype; /* type of book alignment */ Char bookorprotseq; /* 'b' means sequence data comes from the book 'p' means sequence data comes from the protseq */ double colorslope; /* value to multiply normalized information */ double colorconstant; /* add to normalized information after colorslope */ Char dicontrol; /* 'd': just data, 'i': just info, 'b': both printed */ long fromparam; /* the farthest aligned position back from 0 */ long fromrequest; /* requested coordinate to analyze from */ long fromprotseq; /* protseq first base */ long toparam; /* the farthest aligned position forward from 0 */ long torequest; /* requested coordinate to analyze to */ long toprotseq; /* protseq last base */ } ; /* Local variables for readparameters: */ struct LOC_readparameters { struct LOC_themain *LINK; boolean checkout; /* if true, all variable values are ok */ } ; Local Void cn(LINK) struct LOC_readparameters *LINK; { /* short version of call to check number */ LINK->checkout = checknumber(LINK->LINK->dianap); if (!LINK->checkout) /* avoid snowballing */ halt(); } Local Void readparameters(LINK) struct LOC_themain *LINK; { /* read in the parameters. 2002 Sep 5: install auto upgrade, lister.p is example */ struct LOC_readparameters V; V.LINK = LINK; V.checkout = true; /* be optimistic */ if (*LINK->dianap->name != '\0') { if (LINK->dianap->f != NULL) LINK->dianap->f = freopen(LINK->dianap->name, "r", LINK->dianap->f); else LINK->dianap->f = fopen(LINK->dianap->name, "r"); } else rewind(LINK->dianap->f); if (LINK->dianap->f == NULL) _EscIO2(FileNotFound, LINK->dianap->name); RESETBUF(LINK->dianap->f, Char); if (!BUFEOF(LINK->dianap->f)) { cn(&V); upgradeparameters(LINK->dianap); cn(&V); fscanf(LINK->dianap->f, "%ld%ld%*[^\n]", &LINK->fromrequest, &LINK->torequest); getc(LINK->dianap->f); cn(&V); fscanf(LINK->dianap->f, "%ld%ld%*[^\n]", &LINK->fromprotseq, &LINK->toprotseq); getc(LINK->dianap->f); fscanf(LINK->dianap->f, "%c%*[^\n]", &LINK->dicontrol); getc(LINK->dianap->f); if (LINK->dicontrol == '\n') LINK->dicontrol = ' '; cn(&V); fscanf(LINK->dianap->f, "%lg%lg%*[^\n]", &LINK->colorslope, &LINK->colorconstant); getc(LINK->dianap->f); fscanf(LINK->dianap->f, "%c%*[^\n]", &LINK->alignmenttype); getc(LINK->dianap->f); if (LINK->alignmenttype == '\n') LINK->alignmenttype = ' '; } else { /* initialize the variables */ LINK->fromrequest = LINK->fromparam; LINK->torequest = LINK->toparam; LINK->fromprotseq = LINK->fromparam; LINK->toprotseq = LINK->toparam; LINK->dicontrol = 'b'; LINK->colorslope = 0.84; LINK->colorconstant = 0.16; /* gives yellow ranging up to red */ LINK->alignmenttype = 'i'; /* read from inst to get alignment */ } if (LINK->fromrequest > LINK->torequest) { printf("fromrequest must be <= torequest\n"); halt(); } if (LINK->bookorprotseq == 'p') { LINK->fromparam = LINK->fromprotseq; LINK->toparam = LINK->toprotseq; } } /* readparameters */ /* end module dianap.upgradeparameters */ /* begin module diana.themain */ Static Void themain(book, inst, protseq, dianap_, da) _TEXT *book, *inst, *protseq, *dianap_, *da; { /* the main procedure of diana */ struct LOC_themain V; piece *apiece; /* the piece pointer used throughout the program */ trisquare dataprism; /* the data structure for diana */ long theline = 0; /* current line in book */ trianglearray totalsum; /* the total number of elements at each position */ /* announce the program */ V.dianap = dianap_; printf(" diana %4.2f\n", version); readparameters(&V); /* determine range of sequences available */ if (*book->name != '\0') { if (book->f != NULL) book->f = freopen(book->name, "r", book->f); else book->f = fopen(book->name, "r"); } else rewind(book->f); if (book->f == NULL) _EscIO2(FileNotFound, book->name); RESETBUF(book->f, Char); if (*protseq->name != '\0') { if (protseq->f != NULL) protseq->f = freopen(protseq->name, "r", protseq->f); else protseq->f = fopen(protseq->name, "r"); } else rewind(protseq->f); if (protseq->f == NULL) _EscIO2(FileNotFound, protseq->name); RESETBUF(protseq->f, Char); /* without initial readln, eof is false for an empty file for the GPC compiler! */ fscanf(book->f, "%*[^\n]"); getc(book->f); fscanf(protseq->f, "%*[^\n]"); getc(protseq->f); if (BUFEOF(book->f) & BUFEOF(protseq->f)) { printf("either book or protseq must contain sequence data\n"); halt(); } if ((!BUFEOF(book->f)) & (!BUFEOF(protseq->f))) { printf("One of book or protseq must be empty\n"); printf("eof(book)%s\n", BUFEOF(book->f) ? " TRUE" : "FALSE"); printf("eof(protseq)%s\n", BUFEOF(protseq->f) ? " TRUE" : "FALSE"); halt(); } if (!BUFEOF(book->f)) { /* reset the files and pointers */ brinit(book, &theline); if (*inst->name != '\0') { if (inst->f != NULL) inst->f = freopen(inst->name, "r", inst->f); else inst->f = fopen(inst->name, "r"); } else rewind(inst->f); if (inst->f == NULL) _EscIO2(FileNotFound, inst->name); RESETBUF(inst->f, Char); apiece = (piece *)Malloc(sizeof(piece)); /* find the minimum and maximum values needed to analyze the book */ printf(" Using book\n"); printf(" getting the range from the book.\n"); maxminalignment(inst, book, &theline, &V.fromparam, &V.toparam, V.alignmenttype); printf(" Book is from: %ld to: %ld\n", V.fromparam, V.toparam); V.bookorprotseq = 'b'; } if (*book->name != '\0') { if (book->f != NULL) book->f = freopen(book->name, "r", book->f); else book->f = fopen(book->name, "r"); } else rewind(book->f); if (book->f == NULL) _EscIO2(FileNotFound, book->name); RESETBUF(book->f, Char); if (*protseq->name != '\0') { if (protseq->f != NULL) protseq->f = freopen(protseq->name, "r", protseq->f); else protseq->f = fopen(protseq->name, "r"); } else rewind(protseq->f); if (protseq->f == NULL) _EscIO2(FileNotFound, protseq->name); RESETBUF(protseq->f, Char); fscanf(protseq->f, "%*[^\n]"); getc(protseq->f); /* required to test protseq eof by GPC! 2009 Apr 09 */ if (!BUFEOF(protseq->f)) { printf(" Using protseq\n"); printf(" Getting the range from the protseq.\n"); protseqrange(protseq, &V.fromparam, &V.toparam); /* writeln(output,' protseq is from: ',fromparam:1,' to: ',toparam:1); */ V.bookorprotseq = 'p'; } /* force the requested range into that available */ if (V.fromrequest < V.fromparam) { printf("************** WARNING **************\n"); printf("Requested FROM parameter (%ld)\n", V.fromrequest); printf( "cannot be lower than the sequence FROM limit (%ld). Using sequence limit: %ld\n", V.fromparam, V.fromparam); V.fromrequest = V.fromparam; } if (V.torequest > V.toparam) { printf("requested TO parameter (%ld)\n", V.torequest); printf( "cannot be lower than the sequence TO limit (%ld). Using sequence limit: %ld\n", V.toparam, V.toparam); V.torequest = V.toparam; } /* halt; {zzz} */ /* head the output file */ if (*da->name != '\0') { if (da->f != NULL) da->f = freopen(da->name, "w", da->f); else da->f = fopen(da->name, "w"); } else { if (da->f != NULL) rewind(da->f); else da->f = tmpfile(); } if (da->f == NULL) _EscIO2(FileNotFound, da->name); SETUPBUF(da->f, Char); switch (V.bookorprotseq) { case 'b': filehead(book, da, V.fromrequest, V.torequest); break; case 'p': filehead(protseq, da, V.fromrequest, V.torequest); break; } printf(" creating the dataprism\n"); /* go through the book and place each sequence on the prism */ switch (V.bookorprotseq) { case 'b': readbook(&apiece, book, theline, dataprism, V.fromparam, V.toparam, V.fromrequest, V.torequest, V.alignmenttype); break; case 'p': readprotseq(protseq, dataprism, V.fromparam, V.toparam, V.fromrequest, V.torequest); break; } /* count the number of data points in each space of the dataprism */ printf(" totaling the dataprism\n"); totalprism(dataprism, &totalsum, V.fromrequest, V.torequest); /* print the dataprism into da in a form readable to xyplo.p */ printf(" calculating and writing the dataprism to da\n"); writeprism(da, dataprism, V.fromrequest, V.torequest, totalsum, V.dicontrol, V.colorslope, V.colorconstant); } /* end module diana.themain */ main(argc, argv) int argc; Char *argv[]; { PASCAL_MAIN(argc, argv); if (setjmp(_JL1)) goto _L1; da.f = NULL; strcpy(da.name, "da"); dianap.f = NULL; strcpy(dianap.name, "dianap"); protseq.f = NULL; strcpy(protseq.name, "protseq"); inst.f = NULL; strcpy(inst.name, "inst"); book.f = NULL; strcpy(book.name, "book"); themain(&book, &inst, &protseq, &dianap, &da); _L1: if (book.f != NULL) fclose(book.f); if (inst.f != NULL) fclose(inst.f); if (protseq.f != NULL) fclose(protseq.f); if (dianap.f != NULL) fclose(dianap.f); if (da.f != NULL) fclose(da.f); exit(EXIT_SUCCESS); } /* End. */