/* Output from p2c 1.21alpha-07.Dec.93, the Pascal-to-C translator */ /* From input file "malign.p" */ #include /* optimal alignment of sequences in a book, to minimize uncertainty by David Mastronarde and Dr. Thomas D. Schneider National Institutes of Health National Cancer Institute Center for Cancer Research Nanobiology Program Molecular Information Theory Group Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu (use only if first address fails) http://www.ccrnp.ncifcrf.gov/~toms/ module libraries: delman, delmod */ /* end of program */ /* begin module version */ #define version 2.56 /* of malign.p 2005 Sep 28 2005 Sep 28: 2.56: insert malign.xyplop 2005 Feb 7: 2.55: cleanup 2005 Feb 7: 2.54: crash: "second operand of `mod' is <= 0" 2005 Jan 27: 2.53: increase maxnseq from 5000 to 10000 2001 Jul 10: 2.52: upgrade documentation 2001 Apr 18: 2.51: document file output 2000 Dec 13: 2.47: upgrade alignment for comments 2000 Jun 29: 2.47: upgrade documentation 2000 Feb 23: 2.44: add alignmenttype; upgrade delmod modules 1999 August 26: 2.44: add spaces to optinst so big numbers don't get fused together. STUPID. 1999 July 22: 2.43: change xyin to malignxyin to avoid conflicts 1999 June 17: 2.42: optinst has Delila 'same' instruction. 1998 Oct 19: range set to 1000 origin 1986 apr 22 */ /* end module version */ /* begin module describe.malign */ /* name malign: optimal alignment of a book, based on minimum uncertainty synopsis malign(inst: in, book: in, malignp: in, uncert: out, newalign: out, optalign: out, optinst: out, malignxyin: out, output: out) files inst: delila instructions of the form 'get from 56 -5 to 56 +10;' book: the book generated by delila using inst malignp: parameter file with the following parameters: winleft, winright: left and right ends of window for calculating uncertainty, relative to aligned base shiftmin, shiftmax: minimum and maximum shift of aligned base iseed: integer random seed nranseq: number of random sequences, or 0 to use sequences in book nshuffle: number of times to redo alignment after random shuffle ifpaired: 1 to treat each pair of sequences as complementary strands, 0 not to standout: output run #, pass # and H to standard output every pass if 1, every run if 0, or not at all if -1 npassout: output H and alignment every npassout passes to file newalign, or only at end of runs if zero, or not at all if -1 nshiftout: output L and H(L) every nshiftout sequence shifts (to file uncert), or only at end of passes if zero, or not at all if -1 tolerance: tolerance in change of H ntolpass: maximum number of passes with change below tolerance new parameter allowed but not required (default is i): alignmenttype: char; 'f' means alignment by First internal coordinate base, 'b' means alignment by Book, 'i' means alignment by Instructions. See the alist program for more information. Normally one will align by delila instructions. If this parameter is 'f', then the first base of the book is considered the zero base and if it is 'b' then the zero base is given by the coordinates of each piece in the book. uncert: uncertainty as function of position, for the last run, at the end of each pass or after selected number of sequence shifts Controlled by variable nshiftout. newalign: values of H and the relative alignments; starting, final, and intermediate if selected. Controlled by variable npassout. optalign: user-readable listing of unique optimal relative alignments and number of times each was achieved optinst: list of unique optimal alignments in absolute coordinates, to be used to make inst file for selected alignment This file is like optalign, but the coordinates are for the original sequence. malignxyin: a list of the number of occurrences of alignments and their H values in bits. This may be plotted with xyplo, as described in the paper. Each line contains these numbers: rank: from 1 to the number of alignment classes occurrences: how many times the class was found H: the uncertainty of the alignment, in bits R: the information content of the alignment, in bits with small sample correction. description Given a book of aligned sequences, this program searches for the alignment of the sequences that has the lowest uncertainty, i.e. the highest value of Rsequence. The user specifies the "window" of bases within which uncertainty is calculated, and the maximum number of bases that each sequence is allowed to shift from the original alignment. The program considers each sequence in turn, shifting it to an alignment with minimum uncertainty while holding the other sequences fixed. A "pass" is complete when all sequences have been considered. A "run" is complete when no alignments have changed in the preceding pass, and the alignment is then considered "optimal". The first run starts with the original alignment; every run after that starts with a "shuffled" alignment obtained by shifting each sequence independently by a random amount between the allowed limits. The program maintains a list of all of the unique optimal alignments achieved from these starting alignments, and it outputs them in order of increasing uncertainty. In version 2.33 and earlier, the program did not keep track of the organism and chromosome names in bestinst. This file is now superceeded by the malin program which copies the inst file to cinst and modifies it according to one of the alignments in optinst. ******************************************************************************** Summary of file output: Malign produces: uncert, newalign, optalign, optinst, malignxyin, output -- output -- Line 7 of "malignp" controls output Parameter: 1 - every run, pass, and uncertainty will be outputed to the screen 0 - only the lowest uncertainty run will be outputed -1 - nothing will be outputed *NOTE* no file will be produced regardless of the parameter -- newalign -- Line 8 of "malignp" controls newalign Parameter: 1 - produces a full newalign file 0 - produces a smaller newalign file (about half the size) -1 - produces no newalign file *NOTE* this is the largest file produced and is unnecessary -- uncert -- Line 9 of "malignp" controls uncert Parameter: 1 - produces a full uncert file 0 - produces a smaller uncert file (about half the size) -1 - produces an empty uncert file *NOTE* file will be produced regardless of the parameter, however this file is large and unnecessary -- optalign -- *NOTE* this file will always be produced and is needed to run malin -- optinst -- *NOTE* this file will always be produced and is needed to run malin -- malignxyin -- *NOTE* this file will always be produced and can be used to plot data using xyplo If you set our parameters so newalign and uncert are not created, this can save some space. (Thanks to Brent M. Jewett for compiling this information on 2001 Feb 7.) ******************************************************************************** documentation A paper describing the algorithm in detail is available from ftp://ftp.ncifcrf.gov/pub/delila/malign.ps @article{Schneider.Mastronarde.malign, author = "T. D. Schneider and D. Mastronarde", title = "{Fast} multiple alignment of ungapped {DNA} sequences using information theory and a relaxation method", journal = "Discrete Applied Mathematics", note = "ftp://ftp.ncifcrf.gov/pub/delila/malign.ps", volume = "71", pages = "259-268", year = "1996"} The use of malign to align sequences with a subtle pattern is described in: @article{Toledano1994, author = "M. B. Toledano and I. Kullik and F. Trinh and P. T. Baird and T. D. Schneider and G. Storz", title = "Redox-Dependent Shift of {OxyR-DNA} Contacts Along An Extended {DNA} Binding Site: A Mechanism for Differential Promoter Selection", journal = "Cell", volume = "78", pages = "897-909", year = "1994"} For how the information content and small sample correction are computed: @article{Schneider1986, author = "T. D. Schneider and G. D. Stormo and L. Gold and A. Ehrenfeucht", title = "Information content of binding sites on nucleotide sequences", journal = "J. Mol. Biol.", volume = "188", pages = "415-431", year = "1986"} see also {Paper Schneider.Mastronarde.malign:} http://www.lecb.ncifcrf.gov/~toms/paper/malign/ {Program to graph the malignxyin file:} xyplo.p {You can use the} malign.xyplop { file for the xyplop and the malignxyin for the xyin. Set the xyplom to be empty. I ALWAYS make this graph to see what is going on.} {Program to make delila instructions from nth alignment of malign:} malin.p {Example parameter file, malignp:} malignp {A COMPLETE SET FOR DEMONSTRATING THE PROGRAM} malign.inst {instructions for grabbing the first 6 EcoRI sites on the E. coli genome, but messed up by setting the last digit to zero} malign.book {The Delila book corresponding to malign.inst} malign.malignp {Parameter file for malign to realign the inst and book.} {If one uses malin to pick the first alignment, one finds that they are correctly realigned: - + 1--------- +++++++++1 098765432101234567890 ..................... EcoRI U00096 3842 + 1 cgacctgccggaattcagcct U00096 12889 + 2 tctggttgaagaattcaagaa U00096 32545 + 3 tcagggtatcgaattcgacta U00096 50237 + 4 ggtattcagcgaattccacga U00096 56282 + 5 agaggtagcggaattcgttct U00096 96860 + 6 gctacgtcaggaattcctgct } {Program for listing aligned sequences, as above:} alist.p author David Mastronarde and Tom Schneider bugs The realignment algorithm, which shifts all sequences by the same amount to attempt to keep the window near its original position, is somewhat ad hoc in nature and the effects of different settings for it parameters have not been explored. If the window spans two real sites with competing alignments, many optimal but meaningless alignments with similar uncertainties may be obtained. The random sequences can't be examined. For the computation of Rsequence, composition is assumed to be equiprobable, there is no provision for reading in a cmp file yet. technical notes The malignxyin file Rsequence has the small sample correction. The choice between the estimate and the more exact computation is determined by constant "kickover". The constant maxlen is one longer than the longest sequence. The constant maxnseq is the maximum number of sequences. */ /* end module describe.malign */ #define maxlen 2002 /* 1 + maximum length of sequences */ #define maxnseq 10000 /* maximum number of sequences */ /* constants for the ad hoc realignment procedure; see "realign" */ #define maxchange 50 /* 2 times maximum change in alignments allowed during realignment */ #define minmaxforchange 3 /* minimum number of sequences that must have the same shift from their original alignments before all sequences will get realigned so as to restore that subset of sequences to original alignment */ #define realignlimit 10 /* maximum pass number on which to do realignment */ #define kickover 50 /* e(hnb) for n values above this are estimated from ae(hnb) = hg - 3/(2 ln(2) n) while those below are calculated exactly */ /* begin module info.const */ #define infofield 8 /* field size for bits */ #define infodecim 5 /* decimal places for bits */ #define posfield 4 /* field size for aligned sequence positions */ #define nfield 4 /* size of field for printing n, the number of sites */ /* end module info.const */ /* begin module book.const */ /* constants needed for book manipulations */ #define dnamax 1024 /* length of dna arrays */ #define namelength 100 /* maximum key name length */ #define linelength 80 /* maximum line readable in book */ /* end module book.const version = 7.56; {of delmod.p 2000 Nov 21} */ /* trigger definitions follow to make program compilable. The prgmod.p program has the triggers */ /* module filler.const */ #define fillermax 50 /* the size of the filler array for a string */ /* module filler.const from prgmod.p 4.20 */ /* begin module interact.const */ /* begin module string.const */ #define maxstring 150 /* the maximum string */ /* end module string.const version = 4.39; (@ of prgmod.p 1999 November 28 */ /* end module interact.const version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module book.type */ /* types needed for book manipulations */ typedef long chset[5]; /* types defined in book definition */ typedef Char alpha[namelength]; /* this is not alfa */ /* name is a left justified string with blanks following the characters */ typedef struct name { alpha letters; /* zero means an unspecified structure */ char length; } name; typedef struct line { /* a line of characters */ Char letters[linelength]; char length; struct line *next; } line; typedef enum { plus, minus, dircomplement, dirhomologous } direction; typedef enum { linear, circular } configuration; typedef enum { on, off } state; typedef struct header { /* header of key */ name keynam; /* key name of structure */ line *fulnam; /* full name of structure */ /* note key */ line *note; } header; /* begin module base.type */ /* define the four nucleotide bases */ typedef enum { a, c, g, t } base; /* end module base.type version = 7.56; {of delmod.p 2000 Nov 21} */ /* sequence types */ typedef short dnarange; /* p2c: malign.p, line 393: * Note: Field width for seq assumes enum base has 4 elements [105] */ typedef uchar seq[(dnamax + 3) / 4]; typedef struct dnastring { seq part; dnarange length; struct dnastring *next; } dnastring; typedef struct orgkey { /* organism key */ header hea; /* genetic map units */ line *mapunit; } orgkey; typedef struct chrkey { /* chromosome key */ header hea; double mapbeg; /* number of genetic map beginning */ /* number of genetic map ending */ double mapend; } chrkey; typedef struct piekey { /* piece key */ header hea; double mapbeg; /* genetic map beginning */ configuration coocon; /* configruation (circular/linear) */ direction coodir; /* direction (+/-) relative to genetic map */ long coobeg; /* beginning nucleotide */ long cooend; /* ending nucleotide */ configuration piecon; /* configruation (circular/linear) */ direction piedir; /* direction (+/-) relative to coordinates */ long piebeg; /* beginning nucleotide */ long pieend; /* ending nucleotide */ } piekey; typedef struct piece { piekey key; dnastring *dna; } piece; typedef struct reference { name pienam; /* name of piece referred to */ double mapbeg; /* genetic map beginning */ direction refdir; /* direction relative to coordinates */ long refbeg; /* beginning nucleotide */ long refend; /* ending nucleotide */ } reference; typedef struct genkey { /* gene key */ header hea; reference ref; } genkey; typedef struct trakey { /* transcript key */ header hea; reference ref; } trakey; typedef struct markey { /* marker key */ header hea; reference ref; state sta; line *phenotype; struct marker *next; } markey; typedef struct marker { markey key; dnastring *dna; } marker; /* end module book.type version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module interact.type */ /* begin module string.type */ /* pointer to a string */ typedef struct string { /* a string of characters */ Char letters[maxstring]; /* the letters in the string */ long length; /* the number of characters in the string */ long current; /* the letter we are working on */ Char *next; /* the next string in a series */ } string; /* end module string.type version = 4.39; (@ of prgmod.p 1999 November 28 */ /* end module interact.type version = 7.56; {of delmod.p 2000 Nov 21} */ /* module filler.type */ /* the following is an array used to fill a string. it is convenient to have it much shorter than the maxstring, so that it is easy to fill the string using procedure fillstring. the user must declare the value of constant fillermax. */ typedef Char filler[fillermax]; /* module filler.type from prgmod.p 4.20 */ /* module trigger.type */ typedef struct trigger { /* an object to be searched for */ string seek; /* the characters looked for */ long state_; /* how close to triggering we are */ boolean skip; /* trigger not found- skip the line */ /* the trigger was found */ boolean found; } trigger; /* module trigger.type from prgmod.p 4.20 */ /* pointer to a seqrec record */ typedef struct seqrec { /* sequences and alignment info */ base alseq[maxlen]; /* the sequence */ long length; /* number of bases */ name piecename; /* name of the piece */ long alignread; /* alignment read in from file */ long alignstart; /* alignment at start of run */ long aligncurrent; /* current alignment */ long alignmin; /* minimum alignment */ long alignmax; /* maximum alignment */ /* pointer to next record */ struct seqrec *next; } seqrec; typedef long matrix[4][maxlen]; /* array for n(B,L) table */ typedef double poslist[maxlen]; /* array for H(L) values */ typedef double fltable[maxnseq + 1]; /* array for f*log(f) and diffs */ /* pointer to optlist record */ typedef struct optlist { /* storage for unique optimal alignments */ long albest[maxnseq]; /* the alignments */ double hbest; /* the h value */ long num; /* number of times achieved */ /* pointer to next record */ struct optlist *next; } optlist; Static _TEXT inst; /* the delila instructions required by the align procedures */ Static _TEXT book; /* the book to be aligned */ Static _TEXT malignp; /* parameter file */ Static _TEXT uncert; /* uncertainty as function of position */ Static _TEXT newalign; /* values of H and relative alignments */ Static _TEXT optalign; /* set of unique optimum relative alignments */ Static _TEXT optinst; /* set of unique optimum absolute alignments */ Static _TEXT malignxyin; /* ranking, occurrences and H */ Static long winleft, winright; /* left and right ends of window for calculating uncertainty, relative to aligned base */ Static long shiftmin, shiftmax; /* minimum and maximum shift of aligned base */ Static long iseed; /* integer random seed */ Static long nranseq; /* number of random sequences, or 0 to use sequences in book */ Static long nshuffle; /* number of times to redo alignment after random shuffle */ Static long ifpaired; /* flag to treat pairs of sequences as complementary strand */ Static long standout; /* standard output very pass, run, or not at all */ Static long npassout; /* period at which to output H and alignment */ Static long nshiftout; /* period at which to output L and H(L) */ Static long ntolpass; /* maximum number of passes below tolerance */ Static double tolerance; /* tolerance in change of H */ Static matrix nbl; /* the n(B,L) matrix */ Static poslist hl; /* the H(L) array */ Static fltable flogf, fldiff; /* tables of f*log(f) and differences of this */ Static fltable fldoubl; /* table of flogf(i+2)-flogf(i) */ Static seqrec *sepfirst, *sep, *seplast; /* pointers to sequence records */ Static optlist *oapfirst, *oap, *oaplast; /* pointers to alignment records */ Static double rand; /* random number returned by random */ Static double horiginal; /* value of h at starting alignment */ Static double hcurrent, hpass; /* values of H currently and at end of pass */ Static long numseq; /* number of sequences */ Static long window; /* length of window */ Static long shuffle; /* index for shuffling loop */ Static long numopt; /* number of optimal alignments on list */ Static long globshift; /* global shift when realigning all sequences */ Static long iseedparam; /* random seed as read in */ Static long iseed2; /* secondary seed used to resolve ties in minimum H */ Static long iseedsave; /* value of seed before shuffling for current run */ Static long notchanged; /* number of sequences over which no change has been made */ Static long intolerance; /* number of passes where H changed < tolerance */ Static boolean paired; /* boolean value for whether sequences paired */ Static Char alignmenttype; /* 'f' means alignment by First internal coordinate base, 'b' means alignment by Book, 'i' means alignment by Instructions */ Static long theline; /* current line in the book */ /* begin module book.var */ /* ************************************************************************ */ /* global variables needed for book manipulations */ /* free storage: */ Static line *freeline; /* unused lines */ Static dnastring *freedna; /* unused dnas */ Static boolean readnumber; /* whether to read a number from the notes, or to read in the notes */ Static long number; /* the number of the item just read */ Static boolean numbered; /* true when the item just read is numbered */ Static boolean skipunnum; Static jmp_buf _JL1; /* a control variable to allow skipping of un-numbered items in the book */ /* ************************************************************************ */ /* end module book.var version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module halt */ Static Void halt() { /* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. */ printf(" program halt.\n"); longjmp(_JL1, 1); } /* end module halt version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module copyaline */ Static Void copyaline(fin, fout) _TEXT *fin, *fout; { /* copy a line from file fin to file fout */ while (!P_eoln(fin->f)) { putc(P_peek(fin->f), fout->f); getc(fin->f); } fscanf(fin->f, "%*[^\n]"); getc(fin->f); putc('\n', fout->f); } /* copyaline */ /* end module copyaline version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module skipblanks */ Static Void skipblanks(thefile) _TEXT *thefile; { /* skip over blanks until a non-blank, or end of line, is found */ while ((P_peek(thefile->f) == ' ') & (!P_eoln(thefile->f))) getc(thefile->f); } Static Void skipnonblanks(thefile) _TEXT *thefile; { /* skip over nonblanks until a blank, or end of line, is found */ while ((P_peek(thefile->f) != ' ') & (!P_eoln(thefile->f))) getc(thefile->f); } Static Void skipcolumn(thefile) _TEXT *thefile; { /* skip over a data column */ skipblanks(thefile); skipnonblanks(thefile); } /* end module skipblanks version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module interact.clearstring */ /* begin module clearstring */ Static Void clearstring(ribbon) string *ribbon; { /* empty the string */ long index; /* to the ribbon */ for (index = 0; index < maxstring; index++) ribbon->letters[index] = ' '; ribbon->length = 0; ribbon->current = 0; } /* clearstring */ Static Void initializestring(ribbon) string *ribbon; { /* start the string with a nil pointer. This routine should be called before doing linked list work. This allows the standard string routines to clear the string without killing the pointer. */ clearstring(ribbon); ribbon->next = NULL; } /* initializestring */ /* end module clearstring version = 4.39; (@ of prgmod.p 1999 November 28 */ /* end module interact.clearstring version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module filler.fillstring */ Static Void fillstring(s, a_) string *s; Char *a_; { /* this procedure makes it reasonably easy to fill the string s with characters. one calls the procedure as: */ /* 1 2 3 4 5 */ /* 12345678901234567890123456789012345678901234567890 */ /* fillstring(s, 'this-is-the-string '); the two comments make it easy to line the characters up. also, for this example, it was assumed that the length of filler as defined by the constant fillermax was 50. */ long length = fillermax; /* of the string without trailing blanks */ long index; /* of s */ clearstring(s); while (length > 1 && a_[length-1] == ' ') length--; if (length == 1 && a_[length-1] == ' ') { printf("fillstring: the string is empty\n"); halt(); } for (index = 0; index < length; index++) s->letters[index] = a_[index]; s->length = length; s->current = 1; } /* fillstring */ /* end module filler.fillstring version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module filler.filltrigger */ Static Void filltrigger(t_, a_) trigger *t_; Char *a_; { /* fill the trigger t */ fillstring(&t_->seek, a_); } /* fillstring */ /* end module filler.filltrigger version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module trigger.proc */ /* this module allows one to scan a series of characters, as from an array or a file, and to "trigger" or detect a simple string in the series. the advantage of the trigger is that several triggers can "observe" a stream of characters at once, each looking for a different thing. some other modules required: interact.const, interact.type */ Static Void resettrigger(t_) trigger *t_; { /* reset the trigger to ground state */ t_->state_ = 0; t_->skip = false; t_->found = false; } /* resettrigger */ Static Void testfortrigger(ch, t_) Char ch; trigger *t_; { /* look at the character ch. if it is part of the trigger (at the current trigger state), then the trigger state goes higher. if it is not part of the trigger then the trigger state is reset, skip is true and one should skip onward to find the trigger. if the trigger is found, found is true. 1996 Sep 12: Bug found! In the case of a trigger "ab", the program used to miss it for situations like "aab". This was because at the first a it would step up. Then it would see the second a and recognize that was not part of ab. It would fail to realize that it could be the start of a new one. The code now accounts for that possibility. */ t_->state_++; /* writestring(list,seek); writeln(list,'testfortrigger seek.letters[',state:1,']:', seek.letters[state],' ch:',ch); */ if (t_->seek.letters[t_->state_ - 1] == ch) { t_->skip = false; if (t_->state_ == t_->seek.length) t_->found = true; else t_->found = false; return; } /* it failed. But wait! It could be the beginning of a NEW trigger string! */ if (t_->seek.letters[0] == ch) { t_->state_ = 1; t_->skip = false; t_->found = false; return; } t_->state_ = 0; t_->skip = true; t_->found = false; /* reset trigger */ } /* testfortrigger */ /* end module trigger.proc version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module package.align */ /* ************************************************************************ */ /* begin module package.getpiece */ /* ************************************************************************ */ /* begin module package.brpiece */ /* ************************************************************************ */ /* begin module book.basis */ /* procedures needed for book manipulations */ /* get procedures should be used for all linked lists of records */ Static Void getline(l) line **l; { /* obtain a line from the free line list or by making a new one */ if (freeline != NULL) { *l = freeline; freeline = freeline->next; } else *l = (line *)Malloc(sizeof(line)); (*l)->length = 0; (*l)->next = NULL; } Static Void getdna(l) dnastring **l; { if (freedna != NULL) { *l = freedna; freedna = freedna->next; } else *l = (dnastring *)Malloc(sizeof(dnastring)); (*l)->length = 0; (*l)->next = NULL; } /* clear procedures should be called each time the records are no longer needed failure to do this may result in a stack overflow. */ Static Void clearline(l) line **l; { /* return a line to the free line list */ line *lptr; if (*l == NULL) return; lptr = *l; *l = (*l)->next; lptr->next = freeline; freeline = lptr; } Static Void writeline(afile, l, carriagereturn) _TEXT *afile; line *l; boolean carriagereturn; { /* write a line to a file, with carriage return if carriagereturn is true. */ long index; /* index to characters in l */ long FORLIM; FORLIM = l->length; for (index = 0; index < FORLIM; index++) putc(l->letters[index], afile->f); if (carriagereturn) putc('\n', afile->f); } Static Void showfreedna() { /* show the freedna list */ long counter = 0; /* count of freedna list */ dnastring *l; /* pointer into freedna list */ l = freedna; while (l != NULL) { counter++; printf("%ld", counter); printf(", length = %d\n", l->length); /* This is illegal according to gpc because one cannot write a pointer to a text file. It can be unearthed for debugging. write(output, ', pointer id: ',l:1); */ l = l->next; } } Static Void cleardna(l) dnastring **l; { dnastring *lptr; if (*l == NULL) return; lptr = *l; *l = (*l)->next; lptr->next = freedna; freedna = lptr; } Static Void clearheader(h) header *h; { /* clear the header h (remove lines to free storage) */ clearline(&h->fulnam); while (h->note != NULL) clearline(&h->note); } Static Void clearpiece(p) piece **p; { /* clear the dna of the piece */ while ((*p)->dna != NULL) cleardna(&(*p)->dna); clearheader(&(*p)->key.hea); } Static base chartobase(ch) Char ch; { /* convert a character into a base */ base Result; switch (ch) { case 'a': Result = a; break; case 'c': Result = c; break; case 'g': Result = g; break; case 't': Result = t; break; } return Result; } Static Char basetochar(ba) base ba; { /* convert a base into a character */ Char Result; switch (ba) { case a: Result = 'a'; break; case c: Result = 'c'; break; case g: Result = 'g'; break; case t: Result = 't'; break; } return Result; } Static base complement(ba) base ba; { /* take the complement of ba */ base Result; switch (ba) { case a: Result = t; break; case c: Result = g; break; case g: Result = c; break; case t: Result = a; break; } return Result; } Static Char chomplement(b) Char b; { /* create the character complement of base b. I must be getting hungry! */ return (basetochar(complement(chartobase(b)))); } Static long pietoint(p, pie) long p; piece *pie; { /* p is a coordinate on the piece. we want to transform p into a number from 1 to n: an internal coordinate system for easy manipulation of piece coordinates */ long i; /* an intermediate value */ piekey *WITH; WITH = &pie->key; switch (WITH->piedir) { case plus: if (p >= WITH->piebeg) i = p - WITH->piebeg + 1; else i = p - WITH->coobeg + WITH->cooend - WITH->piebeg + 2; break; case minus: if (p <= WITH->piebeg) i = WITH->piebeg - p + 1; else i = WITH->cooend - p + WITH->piebeg - WITH->coobeg + 2; break; } return i; } Static long inttopie(i, pie) long i; piece *pie; { /* i is in the range 1 to some maximum. it is an internal coordinate system for the program. we want to do a coordinate transformation to obtain a value in the range of the piece called pie: i=1 corresponds to piebeg and i=its maximum corresponds to pieend */ long p; /* an intermediate value */ piekey *WITH; WITH = &pie->key; switch (WITH->piedir) { case plus: p = WITH->piebeg + i - 1; if (p > WITH->cooend) { if (WITH->coocon == circular) p += WITH->coobeg - WITH->cooend - 1; } break; case minus: p = WITH->piebeg - i + 1; if (p < WITH->coobeg) { if (WITH->coocon == circular) p += WITH->cooend - WITH->coobeg + 1; } break; } return p; } Static long piecelength(pie) piece *pie; { /* return the length of the dna in pie */ return (pietoint(pie->key.pieend, pie)); } /* end module book.basis version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module book.getto */ Static Char getto(thefile, theline, ch) _TEXT *thefile; long *theline; long *ch; { /* search the file for a character in the first line which is a member of the set ch. Note: on 1999 March 10 the definition of this function was cleaned up. Instead of putting thefile on the line AFTER the charcter ch has been found, it puts thefile ON the line. Other routines like brdna and brpiece have to move to the next line themselves. This makes getto give the OBJECT. */ Char achar = ' '; /* a character in thefile */ boolean done = false; /* done finding achar */ while (!done) { if (BUFEOF(thefile->f)) { done = true; break; } achar = P_peek(thefile->f); if (P_inset(achar, ch)) { done = true; break; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; } if (P_inset(achar, ch)) return achar; else { return ' '; /* The old method - while (not(achar in ch)) and (not eof(thefile)) do begin readln(thefile,achar); theline := succ(theline) end; if (achar in ch) then getto:=achar else getto:=' ' */ } } /* end module book.getto version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module book.skipstar */ Static Void skipstar(thefile) _TEXT *thefile; { /* skip start of line (or star = '*'). */ if (BUFEOF(thefile->f)) { printf(" procedure skipstar: end of book found\n"); halt(); return; } if (P_peek(thefile->f) != '*') { printf(" procedure skipstar: bad book\n"); printf(" \"*\" expected as first character on the line, but \"%c\" was found\n", P_peek(thefile->f)); halt(); } getc(thefile->f); /* skip the star */ if (P_peek(thefile->f) != ' ') { /* skip the blank */ printf(" procedure skipstar: bad book\n"); printf(" \"* \" expected on a line but \"*%c\" was found\n", P_peek(thefile->f)); halt(); } getc(thefile->f); } /* skipstar */ /* end module book.skipstar version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module book.brreanum */ Static Void brreanum(thefile, theline, reanum) _TEXT *thefile; long *theline; double *reanum; { /* read a real number from the file */ skipstar(thefile); fscanf(thefile->f, "%lg%*[^\n]", reanum); getc(thefile->f); (*theline)++; } /* end module book.brreanum version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module book.brnumber */ Static Void brnumber(thefile, theline, num) _TEXT *thefile; long *theline, *num; { /* read a number from the file */ skipstar(thefile); fscanf(thefile->f, "%ld%*[^\n]", num); getc(thefile->f); (*theline)++; } /* end module book.brnumber version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module book.brname */ Static Void brname(thefile, theline, nam) _TEXT *thefile; long *theline; name *nam; { /* read a name from the file */ long i; /* an index to the name */ Char c_; /* a character read */ skipstar(thefile); nam->length = 0; do { nam->length++; c_ = getc(thefile->f); if (c_ == '\n') c_ = ' '; nam->letters[nam->length - 1] = c_; } while (!(P_eoln(thefile->f) || nam->length >= namelength || nam->letters[nam->length - 1] == ' ')); if (nam->letters[nam->length - 1] == ' ') nam->length--; if (nam->length < namelength) { for (i = nam->length; i < namelength; i++) nam->letters[i] = ' '; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; } /* brname */ /* end module book.brname version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module book.brline */ Static Void brline(thefile, theline, l) _TEXT *thefile; long *theline; line **l; { /* read a line from the file */ long i = 0; Char acharacter; skipstar(thefile); while (!P_eoln(thefile->f)) { i++; acharacter = getc(thefile->f); if (acharacter == '\n') acharacter = ' '; (*l)->letters[i-1] = acharacter; } (*l)->length = i; (*l)->next = NULL; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; } /* end module book.brline version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module book.brdirect */ Static Void brdirect(thefile, theline, direct) _TEXT *thefile; long *theline; direction *direct; { /* read a direction */ Char ch; skipstar(thefile); fscanf(thefile->f, "%c%*[^\n]", &ch); getc(thefile->f); if (ch == '\n') ch = ' '; (*theline)++; if (ch == '+') *direct = plus; else *direct = minus; } /* end module book.brdirect version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module book.brconfig */ Static Void brconfig(thefile, theline, config) _TEXT *thefile; long *theline; configuration *config; { /* read a configuration */ Char ch; skipstar(thefile); fscanf(thefile->f, "%c%*[^\n]", &ch); getc(thefile->f); if (ch == '\n') ch = ' '; (*theline)++; if (ch == 'l') *config = linear; else *config = circular; } /* end module book.brconfig version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module book.brnotenumber */ Static Void brnotenumber(thefile, theline, note) _TEXT *thefile; long *theline; line **note; { /* book note reading to obtain the number of the object. the procedure returns the value of the number as a global. (this is not such a good practice, but we are stuck with it for now.) */ *note = NULL; numbered = false; number = 0; /* force number to zero if there is no number at all */ /* the next character is n or * depending on whether there are notes */ if (P_peek(thefile->f) != 'n') return; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; if (P_peek(thefile->f) != 'n') { skipstar(thefile); if (!P_eoln(thefile->f)) { if (P_peek(thefile->f) == '#') { numbered = true; getc(thefile->f); /* move past the number symbol */ fscanf(thefile->f, "%ld", &number); } } do { fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; } while (P_peek(thefile->f) != 'n'); fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; return; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; } /* brnotenumber */ /* end module book.brnotenumber version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module book.brnote */ Static Void brnote(thefile, theline, note) _TEXT *thefile; long *theline; line **note; { /* read note key */ line *newnote; /* the new note */ line *previousnote; /* the last line of the notes */ *note = NULL; if (P_peek(thefile->f) != 'n') /* enter note */ return; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; if (P_peek(thefile->f) != 'n') { /* abort null note (n/n) */ getline(note); newnote = *note; while (P_peek(thefile->f) != 'n') { /* wait until end of note */ brline(thefile, theline, &newnote); previousnote = newnote; /* get next note */ getline(&newnote->next); newnote = newnote->next; } /* last note was not used, so: */ clearline(&newnote); previousnote->next = NULL; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; return; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; } /* brnote */ /* end module book.brnote version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module book.brheader */ Static Void brheader(thefile, theline, hea) _TEXT *thefile; long *theline; header *hea; { /* read the header of a key. */ fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); /* move past the object name - new definition 1999 Mar 13 */ (*theline)++; /*bbb*/ /* read key name */ brname(thefile, theline, &hea->keynam); /* read full name */ getline(&hea->fulnam); brline(thefile, theline, &hea->fulnam); /* read note key */ if (readnumber) brnotenumber(thefile, theline, &hea->note); else brnote(thefile, theline, &hea->note); } /* end module book.brheader version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module book.copyheader */ Static Void copyheader(fromhea, tohea) header fromhea, *tohea; { /* copy the header fromhea into tohea. Note that the linked objects are NOT copied, but merely pointed to. */ memcpy(tohea->keynam.letters, fromhea.keynam.letters, sizeof(alpha)); tohea->keynam.length = fromhea.keynam.length; tohea->note = fromhea.note; tohea->fulnam = fromhea.fulnam; } /* end module book.copyheader version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module book.brpiekey */ Static Void brpiekey(thefile, theline, pie) _TEXT *thefile; long *theline; piekey *pie; { /* read piece key, track the line number */ brheader(thefile, theline, &pie->hea); brreanum(thefile, theline, &pie->mapbeg); brconfig(thefile, theline, &pie->coocon); brdirect(thefile, theline, &pie->coodir); brnumber(thefile, theline, &pie->coobeg); brnumber(thefile, theline, &pie->cooend); brconfig(thefile, theline, &pie->piecon); brdirect(thefile, theline, &pie->piedir); brnumber(thefile, theline, &pie->piebeg); brnumber(thefile, theline, &pie->pieend); } /* end module book.brpiekey version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module book.brdna */ Static Void brdna(thefile, theline, dna) _TEXT *thefile; long *theline; dnastring **dna; { /* read in dna from thefile, track the line */ /* note: if the dna were circularized, by linking the last dnastring to the first, then the cleardna routine could not clear properly, and would loop forever... there is no reason to do that, since a simple mod function will allow one to access the circle. */ Char ch; dnastring *workdna; long SET[5]; long TEMP; getdna(dna); workdna = *dna; ch = getto(thefile, theline, P_addset(P_expset(SET, 0L), 'd')); fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; ch = getc(thefile->f); /* skipstar */ if (ch == '\n') ch = ' '; while (ch == '*') { ch = getc(thefile->f); /* skip blank */ if (ch == '\n') ch = ' '; do { ch = getc(thefile->f); if (ch == '\n') ch = ' '; if (ch == 't' || ch == 'g' || ch == 'c' || ch == 'a') { if (workdna->length == dnamax) { getdna(&workdna->next); workdna = workdna->next; } workdna->length++; TEMP = workdna->length - 1; P_clrbits_B(workdna->part, TEMP, 1, 3); P_putbits_UB(workdna->part, TEMP, (int)chartobase(ch), 1, 3); } } while (!P_eoln(thefile->f)); fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); /* go to next line */ (*theline)++; ch = getc(thefile->f); /* ch is either '*' or 'd' */ if (ch == '\n') ch = ' '; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); /* read past the d */ (*theline)++; } /* end module book.brdna version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module book.brpiece */ Static Void brpiece(thefile, theline, pie) _TEXT *thefile; long *theline; piece **pie; { /* read in a piece, change theline to reflect the lines traversed */ /* readln(thefile); (* move past the word 'piece' - new definition 1999 Mar 13 *) theline := succ(theline); (* BUG: was below! *) bbb*/ brpiekey(thefile, theline, &(*pie)->key); if (numbered || !skipunnum) brdna(thefile, theline, &(*pie)->dna); fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); /* move past the word 'piece' - new definition 1999 Mar 13 */ (*theline)++; } /* end module book.brpiece version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module book.brinit */ Static Void brinit(book, theline) _TEXT *book; long *theline; { /* check that the book is ok to read, and set up the global variables for br routines */ /* halt if the book is bad (first word is 'halt') or the first character is not * */ if (*book->name != '\0') { if (book->f != NULL) book->f = freopen(book->name, "r", book->f); else book->f = fopen(book->name, "r"); } else rewind(book->f); if (book->f == NULL) _EscIO2(FileNotFound, book->name); RESETBUF(book->f, Char); if (!BUFEOF(book->f)) { /* check for the date line */ if (P_peek(book->f) != '*') { if (P_peek(book->f) != 'h') printf(" this is not the first line of a book:\n"); else printf(" bad book:\n"); putchar(' '); while (!(P_eoln(book->f) | BUFEOF(book->f))) { putchar(P_peek(book->f)); getc(book->f); } putchar('\n'); halt(); } } else { printf(" book is empty\n"); halt(); } /* initialize free storage */ freeline = NULL; freedna = NULL; readnumber = true; /* usually we read in numbers for items */ number = 0; /* arbitrary value */ numbered = false; /* the piece has no number (none yet read in) */ skipunnum = false; *theline = 1; } /* brinit */ /* end module book.brinit version = 7.56; {of delmod.p 2000 Nov 21} */ /* ************************************************************************ */ /* end module package.brpiece version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module book.getpiece */ Static Void getpiece(thefile, theline, pie) _TEXT *thefile; long *theline; piece **pie; { /* move to and read in the next piece in the book */ Char ch; long SET[5]; ch = getto(thefile, theline, P_addset(P_expset(SET, 0L), 'p')); /* get to the next p(iece) in the book */ if (ch != ' ') { brpiece(thefile, theline, pie); /* 1999 june 2: removed this: ch:=getto(thefile,theline,['p']); (* read to end of p *) */ /* bbb - now done in brpiece readln(thefile); (* read past piece *) theline := succ(theline); */ } else clearpiece(pie); } /* end module book.getpiece version = 7.56; {of delmod.p 2000 Nov 21} */ /* ************************************************************************ */ /* end module package.getpiece version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module findblank */ Static Void findblank(afile) _TEXT *afile; { /* read a file to find the next blank character */ Char ch; do { ch = getc(afile->f); if (ch == '\n') ch = ' '; } while (ch != ' '); } /* end module findblank version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module findnonblank */ Static Void findnonblank(afile, ch) _TEXT *afile; Char *ch; { /* find the next non blank character in a file, return it in ch. */ *ch = ' '; while (!BUFEOF(afile->f) && *ch == ' ') { *ch = getc(afile->f); if (*ch == '\n') *ch = ' '; if (P_eoln(afile->f)) { fscanf(afile->f, "%*[^\n]"); getc(afile->f); } } } #define maximumrange 10000 /* if the alignment point is more than this distance from the piece ends, the program halts in an attempt to catch the alignment bug... 1991 Jan 11 It appears that the rewrite of the code has removed the bug, but the check will be kept. */ #define semicolon ';' /* end of delila instruction */ /* Local variables for align: */ struct LOC_align { _TEXT *inst; Char ch; /* a character in inst */ trigger endcomment; /* trigger to find '*-)' (ignore the dash!) */ trigger endcurly; /* trigger to find comments: '}' */ } ; /* procedure rd(var f: text; var ch: char); (* read ch from f allowing inspection of the result *) begin read(f,ch); write(output,ch); write(list,ch); write(output,'<',ch,'>'); end; procedure rdln(var f: text); (* readln f allowing inspection of the result *) begin readln(f); writeln(output); writeln(list); end; */ Local Void skipcomment(f, LINK) _TEXT *f; struct LOC_align *LINK; { /* skip an entire comment */ boolean comment = true; /* true means we are inside a comment */ /* skip to end of comment */ resettrigger(&LINK->endcomment); while (comment) { if (BUFEOF(f->f)) { printf("A comment does not end!\n"); halt(); } if (P_eoln(f->f)) { fscanf(f->f, "%*[^\n]"); getc(f->f); continue; } /* rdln(f) */ LINK->ch = getc(f->f); if (LINK->ch == '\n') LINK->ch = ' '; testfortrigger(LINK->ch, &LINK->endcomment); if (LINK->endcomment.found) { comment = false; /*write(output,'<'); rd(f,ch); write(output,'>');*/ } } } Local Void skipcurly(f, LINK) _TEXT *f; struct LOC_align *LINK; { /* skip an entire comment made by {}*/ boolean comment = true; /* true means we are inside a comment */ /* skip to end of comment */ resettrigger(&LINK->endcurly); while (comment) { if (BUFEOF(f->f)) { printf("A comment does not end!\n"); halt(); } if (P_eoln(f->f)) { fscanf(f->f, "%*[^\n]"); getc(f->f); continue; } /* rdln(f) */ LINK->ch = getc(f->f); if (LINK->ch == '\n') LINK->ch = ' '; testfortrigger(LINK->ch, &LINK->endcurly); if (LINK->endcurly.found) { comment = false; /*write(output,'<'); rd(f,ch); write(output,'>');*/ } } } Local Void skipquote(quote, LINK) trigger quote; struct LOC_align *LINK; { /* skip an entire quote of either the ' or " persuasion */ Char kind; /* the kind of quote, ' or " */ kind = quote.seek.letters[0]; /*writeln(output,'skipquote ',kind);*/ do { findnonblank(LINK->inst, &LINK->ch); /* get to the quote */ } while (!((LINK->ch == kind) | BUFEOF(LINK->inst->f))); if (LINK->ch != kind) { printf("end of quote starting with %c not found\n", kind); halt(); } } /* end module findnonblank version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module align.align */ Static Void align(inst_, book, theline, pie, length, alignedbase) _TEXT *inst_, *book; long *theline; piece **pie; long *length, *alignedbase; { /* documentation on align is in module info.align and delman.use.aligned.books. 1996 Sep 12: The routine now uses the trigger functions found in prgmod. The bug in the oldalign routine (that it misses the end of comments that end in a series of astrisks) has been fixed. It now checks that the piece corresponds to the book. */ struct LOC_align V; long p; /* index to a piece name */ long p1; /* another index to a piece name */ boolean done = false; /* done finding an aligning get */ long thebase; /* the base read in */ boolean indefault = false; /* true when within a default statement. These can contain the word 'piece', which must be ignored. */ trigger gettrigger; /* trigger to find 'get' */ trigger defaulttrigger; /* trigger to find 'default' */ trigger nametrigger; /* trigger to find 'name' */ trigger piecetrigger; /* trigger to find 'piece' */ trigger settrigger; /* trigger to find 'set' */ trigger begincomment; /* trigger to find '(-*' (ignore the dash!) */ trigger begincurly; /* trigger to find comments: '{' */ trigger quote1trigger; /* trigger to find single quote ' */ trigger quote2trigger; /* trigger to find double quote " */ name *WITH; long FORLIM; V.inst = inst_; filltrigger(&defaulttrigger, "default "); filltrigger(&gettrigger, "get "); filltrigger(&nametrigger, "name "); filltrigger(&piecetrigger, "piece "); filltrigger(&settrigger, "set "); filltrigger(&begincomment, "(* "); filltrigger(&V.endcomment, "*) "); filltrigger(&begincurly, "{ "); filltrigger(&V.endcurly, "} "); filltrigger("e1trigger, "' "); filltrigger("e2trigger, "\" "); resettrigger(&defaulttrigger); resettrigger(&gettrigger); resettrigger(&nametrigger); resettrigger(&piecetrigger); resettrigger(&settrigger); resettrigger(&begincomment); resettrigger(&begincurly); resettrigger("e1trigger); resettrigger("e2trigger); if (BUFEOF(book->f)) /* if there is still more to the book ... */ return; getpiece(book, theline, pie); /* read in the piece */ if (BUFEOF(book->f)) /* if we found a piece ... */ return; *length = pietoint((*pie)->key.pieend, *pie); /* calculate piece length */ /* now find in inst the next occurance of 'get' */ while (!done) { if (BUFEOF(V.inst->f)) { /* no instructions? */ *alignedbase = 1; /* simply align by the first base */ done = true; break; } if (P_eoln(V.inst->f)) { fscanf(V.inst->f, "%*[^\n]"); getc(V.inst->f); continue; } /*then rdln(inst)*/ V.ch = getc(V.inst->f); if (V.ch == '\n') V.ch = ' '; testfortrigger(V.ch, &begincomment); testfortrigger(V.ch, &begincurly); if (begincomment.found || begincurly.found) { if (V.ch == '*') { skipcomment(V.inst, &V); resettrigger(&begincomment); } else { resettrigger(&begincurly); skipcurly(V.inst, &V); } continue; } testfortrigger(V.ch, &gettrigger); if (gettrigger.found) { findnonblank(V.inst, &V.ch); /* get to "from" */ findblank(V.inst); /* get past "from" */ fscanf(V.inst->f, "%ld", &thebase); /* read in the alignedbase */ /*writeln(output);writeln(output,'thebase = ',thebase:1);*/ *alignedbase = pietoint(thebase, *pie); /*writeln(output,'alignedbase=',alignedbase:1);*/ done = true; } testfortrigger(V.ch, "e1trigger); if (quote1trigger.found) skipquote(quote1trigger, &V); testfortrigger(V.ch, "e2trigger); if (quote2trigger.found) skipquote(quote2trigger, &V); testfortrigger(V.ch, &defaulttrigger); if (defaulttrigger.found) { indefault = true; resettrigger(&defaulttrigger); } if (V.ch == semicolon) indefault = false; testfortrigger(V.ch, &settrigger); if (settrigger.found) { indefault = true; resettrigger(&settrigger); } if (V.ch == semicolon) indefault = false; /* check that piece names are correct */ testfortrigger(V.ch, &piecetrigger); if (indefault) continue; if (!piecetrigger.found) continue; skipblanks(V.inst); /* get to name */ WITH = &(*pie)->key.hea.keynam; FORLIM = WITH->length; for (p = 1; p <= FORLIM; p++) { V.ch = getc(V.inst->f); if (V.ch == '\n') V.ch = ' '; if (WITH->letters[p-1] != V.ch) { printf("The piece name in the book: \n"); /* p2c: malign.p, line 1637: Note: * Format for packed-array-of-char will work only if width < length [321] */ printf("%.*s\n", WITH->length, WITH->letters); printf("does not match the inst file name:\n"); /* write the letters that matched: */ for (p1 = 1; p1 < p; p1++) putchar(WITH->letters[p-1]); /* write the offending letter: */ putchar(V.ch); /* get the rest of the name and show it: */ done = P_eoln(V.inst->f); while (!done) { done = P_eoln(V.inst->f); if (done) break; V.ch = getc(V.inst->f); if (V.ch == '\n') V.ch = ' '; if (V.ch == ' ' || V.ch == ';') done = true; if (!done) putchar(V.ch); } putchar('\n'); /* mark the first letter that does not match: */ for (p1 = 1; p1 < p; p1++) putchar(' '); printf("^\n"); halt(); /* we are not inside a comment */ } } } /*rd(inst,ch);*/ if (*alignedbase > -maximumrange && *alignedbase <= *length + maximumrange) return; printf(" In procedure align:\n"); printf(" read in base was %ld\n", thebase); printf(" in internal coordinates: %ld\n", *alignedbase); printf(" maximum range was %ld\n", (long)maximumrange); printf(" piece length was %ld\n", *length); WITH = &(*pie)->key.hea.keynam; /* p2c: malign.p, line 1678: Note: * Format for packed-array-of-char will work only if width < length [321] */ printf(" piece name: %.*s\n", WITH->length, WITH->letters); printf(" piece number: %ld\n", number); printf(" aligned base is too far away... see the code\n"); halt(); } #undef maximumrange #undef semicolon /* end module align.align version = 7.56; {of delmod.p 2000 Nov 21} */ /* ************************************************************************ */ /* end module package.align version = 7.56; {of delmod.p 2000 Nov 21} */ /* begin module flfill */ Static Void flfill(h, dh, ddh, n) double *h, *dh, *ddh; long n; { /* fills the array h with -f*log(f) for f = i/n, i running from 0 to n; fills the array dh with h[i+1]-h[i], ddh with h[i+2]-h[i] */ double freq; /* frequency */ double nreal; /* real value of n */ double ln2 = log(2.0); /* ln(2) */ long i; /* loop counter */ h[0] = 0.0; nreal = n; for (i = 1; i <= n; i++) { freq = i / nreal; h[i] = -(freq * log(freq) / ln2); dh[i-1] = h[i] - h[i-1]; if (i > 1) ddh[i-2] = h[i] - h[i-2]; } } #define pow18 262144L #define pow19 524288L #define pow30 1073741824L /* powers of 2 */ #define pow31m 2147483647.0 /* 2**31-1 */ /* end module flfill */ /* begin module shift.random.31 */ Static Void random(rand, iseed) double *rand; long *iseed; { /* This random number generator is based on a shift register with a single bit of feedback, as described in Electronics for Neurobiologists, by Brown, Maxfield and Moraff, MIT press 1973, referencing Random Process Simulation and Measurement by Korn, McGraw-Hill 1966. A 31-bit shift register is maintained in the integer iseed, which should initially be given any positive value. This 31-bit number is shifted right one bit and the output of the last (31st) bit and the 13th bit are added modulo 2 (exclusive orred) and fed back into the new first bit. This is done between 4 and 11 times, depending on the last 3 bits of the original number. The result is converted to a real number between 0 and 1 and returned as the value of rand. The 31-bit shift register goes through all 2**31-1 values before repeating; the repetition frequency of this algorithm could be less or greater depending on the seed, because of the random number of multiple shifts per call. */ long i, nrep; /* index, number of times to do shift */ nrep = ((*iseed) & 7) + 4; /* do it 4 to 11 times based on last 3 bits */ for (i = 1; i <= nrep; i++) { /* if last bit and 9th bit are equal, feed back a 0, otherwise a 1 */ if (((*iseed) & 1) == (((*iseed) & (pow19 - 1)) >= pow18)) *iseed /= 2; else *iseed = *iseed / 2 + pow30; } *rand = *iseed / pow31m; } /* random */ #undef pow18 #undef pow19 #undef pow30 #undef pow31m /* end module shift.random.31 */ /* begin module randbase */ Static Void randbase(nuc, iseed) base *nuc; long *iseed; { /* generate a random base, with p=0.25 for each base */ random(&rand, iseed); if (rand < 0.25) { *nuc = a; return; } if (rand < 0.5) { *nuc = c; return; } if (rand < 0.75) *nuc = g; else *nuc = t; } /* end module randbase */ /* begin module readparam */ Static Void readparam(pin) _TEXT *pin; { /* read and check parameters from parameter file pin */ if (*pin->name != '\0') { if (pin->f != NULL) pin->f = freopen(pin->name, "r", pin->f); else pin->f = fopen(pin->name, "r"); } else rewind(pin->f); if (pin->f == NULL) _EscIO2(FileNotFound, pin->name); RESETBUF(pin->f, Char); fscanf(pin->f, "%ld%ld%*[^\n]", &winleft, &winright); getc(pin->f); fscanf(pin->f, "%ld%ld%*[^\n]", &shiftmin, &shiftmax); getc(pin->f); fscanf(pin->f, "%ld%*[^\n]", &iseedparam); getc(pin->f); fscanf(pin->f, "%ld%*[^\n]", &nranseq); getc(pin->f); fscanf(pin->f, "%ld%*[^\n]", &nshuffle); getc(pin->f); fscanf(pin->f, "%ld%*[^\n]", &ifpaired); getc(pin->f); fscanf(pin->f, "%ld%*[^\n]", &standout); getc(pin->f); fscanf(pin->f, "%ld%*[^\n]", &npassout); getc(pin->f); fscanf(pin->f, "%ld%*[^\n]", &nshiftout); getc(pin->f); fscanf(pin->f, "%lg%*[^\n]", &tolerance); getc(pin->f); fscanf(pin->f, "%ld%*[^\n]", &ntolpass); getc(pin->f); if (!BUFEOF(pin->f)) { fscanf(pin->f, "%c%*[^\n]", &alignmenttype); getc(pin->f); if (alignmenttype == '\n') alignmenttype = ' '; if (alignmenttype != 'b' && alignmenttype != 'i' && alignmenttype != 'f') { printf("alignment type must be f, b, or i\n"); halt(); } } else alignmenttype = 'i'; window = winright - winleft + 1; /* window must be >0 and < maxlen but can be anywhere */ if (window <= 0 || window >= maxlen) { printf("parameter error: "); printf("window is%12ld, must be between 1 and%12ld\n", window, maxlen - 1L); halt(); } if (shiftmin > 0 || shiftmax < 0) { printf("parameter error: "); printf("shiftmin must be < or = 0, shiftmax must be > or = 0\n"); halt(); } iseed = iseedparam; iseed2 = iseed; if (iseed <= 0) { printf("parameter error: "); printf("random seed must be > 0\n"); halt(); } paired = (ifpaired > 0); /* convert ifpaired to boolean */ if (tolerance > 0.0 && ntolpass > 0) return; printf("parameter error: "); printf("tolerance and # of passes below tolerance must be > 0\n"); halt(); } /* end module readparam */ /* begin module writeparam */ Static Void writeparam(fout) _TEXT *fout; { /* writes standard header of user parameters to an output file */ if (*fout->name != '\0') { if (fout->f != NULL) fout->f = freopen(fout->name, "w", fout->f); else fout->f = fopen(fout->name, "w"); } else { if (fout->f != NULL) rewind(fout->f); else fout->f = tmpfile(); } if (fout->f == NULL) _EscIO2(FileNotFound, fout->name); SETUPBUF(fout->f, Char); /* initialize file */ fprintf(fout->f, "* malign%5.2f\n", version); /* program name and version */ fprintf(fout->f, "*\n"); if (*book.name != '\0') { if (book.f != NULL) book.f = freopen(book.name, "r", book.f); else book.f = fopen(book.name, "r"); } else rewind(book.f); if (book.f == NULL) _EscIO2(FileNotFound, book.name); RESETBUF(book.f, Char); /* copy first line of book */ copyaline(&book, fout); if (*book.name != '\0') { if (book.f != NULL) book.f = freopen(book.name, "r", book.f); else book.f = fopen(book.name, "r"); } else rewind(book.f); if (book.f == NULL) _EscIO2(FileNotFound, book.name); RESETBUF(book.f, Char); fprintf(fout->f, "*\n"); fprintf(fout->f, "* user specified parameters:\n"); fprintf(fout->f, "* %5ld%5ld window for calculating uncertainty, relative to aligned base\n", winleft, winright); fprintf(fout->f, "* %5ld%5ld minimum and maximum shift of aligned base\n", shiftmin, shiftmax); fprintf(fout->f, "* %12ld random seed\n", iseedparam); fprintf(fout->f, "* %6ld number of random sequences (0 for sequences in book)\n", nranseq); fprintf(fout->f, "* %6ld number of runs after random shuffle\n", nshuffle); fprintf(fout->f, "* %6ld independent sequences (0), or pairs of complementary sequences (1)\n", ifpaired); fprintf(fout->f, "* %6ld no (-1), limited (0) or full (1) standard output\n", standout); fprintf(fout->f, "* %6ld period at which to output H and alignment\n", npassout); fprintf(fout->f, "* %6ld period at which to output L and H(L)\n", nshiftout); fprintf(fout->f, "* %10.6f tolerance in change of H\n", tolerance); fprintf(fout->f, "* %6ld maximum number of passes below tolerance\n", ntolpass); fprintf(fout->f, "*\n"); } /* end module writeparam */ /* begin module readaligned */ Static Void readaligned(inst, book) _TEXT *inst, *book; { /* read aligned sequences from book and inst files, place in linked list */ piece *pie; /* standard pointer to piece */ long commonshift; /* common limit on shifting of paired sequences */ long length; /* length of piece */ long alignedbase; /* relative number of aligned base */ long almax; /* a maximum alignment */ long i; /* array index */ long FORLIM; if (*book->name != '\0') { if (book->f != NULL) book->f = freopen(book->name, "r", book->f); else book->f = fopen(book->name, "r"); } else rewind(book->f); if (book->f == NULL) _EscIO2(FileNotFound, book->name); RESETBUF(book->f, Char); /* reset files */ if (*inst->name != '\0') { if (inst->f != NULL) inst->f = freopen(inst->name, "r", inst->f); else inst->f = fopen(inst->name, "r"); } else rewind(inst->f); if (inst->f == NULL) _EscIO2(FileNotFound, inst->name); RESETBUF(inst->f, Char); pie = (piece *)Malloc(sizeof(piece)); /* create space for pieces */ numseq = 0; /* zero # of sequences */ while (!BUFEOF(book->f)) { switch (alignmenttype) { case 'i': align(inst, book, &theline, &pie, &length, &alignedbase); break; case 'b': case 'f': getpiece(book, &theline, &pie); /* read in the piece */ length = piecelength(pie); alignedbase = -inttopie(1L, pie); break; } if (BUFEOF(book->f)) /* I assume there's no sequence if eof */ break; numseq++; /* increment sequence count */ if (numseq > maxnseq) { printf("procedure readaligned error:\n"); printf("number of sequences greater than limit of %ld\n", (long)maxnseq); halt(); } if (length > maxlen) { printf("procedure readaligned error:\n"); printf("sequence %12ld, length%12ld, greater than limit of%12ld\n", numseq, length, (long)maxlen); halt(); } sep = (seqrec *)Malloc(sizeof(seqrec)); /* create new space */ if (numseq == 1) sepfirst = sep; /* set up first pointer */ else seplast->next = sep; /* or link from last record */ for (i = 0; i < length; i++) /* transfer bases */ sep->alseq[i] = (base)P_getbits_UB(pie->dna->part, i, 1, 3); sep->length = length; /* transfer the name from pie into the data structure */ sep->piecename.length = pie->key.hea.keynam.length; FORLIM = sep->piecename.length; for (i = 0; i < FORLIM; i++) sep->piecename.letters[i] = pie->key.hea.keynam.letters[i]; sep->alignread = alignedbase; sep->alignstart = alignedbase; sep->aligncurrent = alignedbase; sep->alignmin = alignedbase + shiftmin; sep->alignmax = alignedbase + shiftmax; if (alignmenttype != 'b') { almax = length - winright; /* if sep^.alignmax > almax then writeln(output,'modify alignmax!') else writeln(output,'max nope!'); if sep^.alignmin < (1-winleft) then writeln(output,'modify alignmin!') else writeln(output,'min nope!'); */ /* desired minimum alignment; may not be lower than 1-winleft */ if (sep->alignmin <= -winleft) sep->alignmin = 1 - winleft; /* desired maximum alignment; may not be higher than almax */ almax = length - winright; if (sep->alignmax > almax) sep->alignmax = almax; } if (sep->alignmax < alignedbase || sep->alignmin > alignedbase) { printf("procedure readaligned error:\n"); printf("In sequence %ld, the aligned base is too close to one end:\n", numseq); if (sep->alignmax < alignedbase) printf("sep^.alignmaxalignmin > alignedbase) printf("sep^.alignmin>alignedbase\n"); printf("alignedbase = %ld\n", alignedbase); printf("shiftmin = %3ld\n", shiftmin); printf("shiftmax = %3ld\n", shiftmax); printf("winleft = %3ld\n", winleft); printf("winright = %3ld\n", winright); printf("sep^.alignmin = alignedbase+shiftmin = %ld\n", sep->alignmin); printf("sep^.alignmax = alignedbase+shiftmax = %ld\n", sep->alignmax); if (sep->alignmin != alignedbase + shiftmin) { printf("EXCEPT that sep^.alignmin < (1-winleft)\n"); printf("SO sep^.alignmin = 1-winleft\n"); printf("WHERE winleft = %ld\n", winleft); } halt(); } /* if reading second sequence of pair, make shifts mutually limiting */ if (paired && !(numseq & 1)) { /* calculate minimum of allowed shift of last to left and this one to right, impose that as limit on both shifts */ commonshift = seplast->alignread - seplast->alignmin; if (commonshift > sep->alignmax - alignedbase) commonshift = sep->alignmax - alignedbase; seplast->alignmin = seplast->alignread - commonshift; sep->alignmax = alignedbase + commonshift; /* calculate minimum of allowed shift of last to right and this one to left, impose that as limit on both shifts */ commonshift = seplast->alignmax - seplast->alignread; if (commonshift > alignedbase - sep->alignmin) commonshift = alignedbase - sep->alignmin; seplast->alignmax = seplast->alignread + commonshift; sep->alignmin = alignedbase - commonshift; } seplast = sep; /* save pointer to put in link on next round */ } /* end of while loop searching through book */ sep->next = NULL; if (numseq == 0) { printf("no sequences found in the book\n"); halt(); } if (paired && (numseq & 1)) { printf("since the sequences should be paired, there should\n"); printf("be an odd number of them. There are %ld.\n", numseq); halt(); } Free(pie); } /* end module readaligned */ /* begin module genrandseqs */ Static Void genrandseqs() { /* generates random sequences and puts them into linked list */ long seqno, i; /* indices */ long len; /* length of sequences */ long FORLIM; numseq = nranseq; /* number of sequences */ len = winright - winleft + shiftmax - shiftmin + 1; FORLIM = numseq; for (seqno = 1; seqno <= FORLIM; seqno++) { sep = (seqrec *)Malloc(sizeof(seqrec)); /* create new space */ if (seqno == 1) sepfirst = sep; /* set up first pointer */ else seplast->next = sep; /* or link from last record */ for (i = 0; i < len; i++) randbase(&sep->alseq[i], &iseed); sep->length = len; sep->alignmin = 1 - winleft; sep->alignread = sep->alignmin - shiftmin; sep->alignmax = sep->alignread + shiftmax; sep->alignstart = sep->alignread; sep->aligncurrent = sep->alignread; seplast = sep; /* save pointer to put in link on next round */ } sep->next = NULL; } /* end module genrandseqs */ /* begin module fillnbl */ Static Void fillnbl() { /* computes the n(B,L) array from the linked list of sequences */ base b; /* temporary for base */ long i; /* array index */ long seqno; /* sequence counter */ long offset; /* offset into sequence */ long FORLIM, FORLIM1; sep = sepfirst; /* initialize pointer */ for (b = a; (long)b <= (long)t; b = (base)((long)b + 1)) { /* zero the matrix */ FORLIM1 = window; for (i = 0; i < FORLIM1; i++) nbl[(long)b][i] = 0; } FORLIM1 = numseq; for (seqno = 1; seqno <= FORLIM1; seqno++) { offset = sep->aligncurrent + winleft - 1; FORLIM = window; /* offset to get this sequence in the window */ for (i = 0; i < FORLIM; i++) { b = sep->alseq[i + offset]; /* base at this position */ nbl[(long)b][i]++; /* increment matrix entry */ } sep = sep->next; /* point to next sequence */ } } /* end module fillnbl */ /* begin module hcalc */ Static Void hcalc(nbl, flogf, hl, htot, window) long (*nbl)[maxlen]; double *flogf; double *hl; double *htot; long window; { /* calculates H(L) and total H from n(B,L) matrix, using flogf table */ long i; /* index to positions */ base b; /* index to bases */ double temp; /* a temporary real (what else?) */ *htot = 0.0; /* zero total */ for (i = 0; i < window; i++) { temp = 0.0; /* add up the flogf's in the ith column */ for (b = a; (long)b <= (long)t; b = (base)((long)b + 1)) temp += flogf[nbl[(long)b][i]]; *htot += temp; /* add to grand total */ hl[i] = temp; } } /* end module hcalc */ /* begin module changenbl */ Static Void changenbl(sep, winlen, increment) seqrec **sep; long winlen, increment; { /* subtracts or adds sequence pointed to by sep to the nbl array, up to the length specified by winlen */ long i, offset; /* index, offset into sequence for given alignment */ base b; /* temporary base */ seqrec *WITH; WITH = *sep; offset = WITH->aligncurrent + winleft - 1; /* offset into sequence */ for (i = 0; i < winlen; i++) { b = WITH->alseq[i + offset]; /* base at this window position */ nbl[(long)b][i] += increment; /* change nbl for this base */ } } /* end module changenbl */ /* begin module shiftseq */ Static Void shiftseq() { /* shift sequence between allowed alignments and find alignment with minimum H value */ long alatmin[maxlen]; /* alignments with minimum dh */ double dh; /* current value of dh for particular alignment */ double dhcur; /* dh for current alignment */ double dhmin = 1.0e10; /* running minimum dh value */ long numbermin; /* number of minimum dh's hit */ long alcur; /* working alignment */ long i, offset; /* index, offset into sequence for given alignment */ seqrec *WITH; long FORLIM, FORLIM1; /* b: base; (@ temporary base */ WITH = sep; /* subtract off the sequence from the nbl array */ changenbl(&sep, window, -1L); FORLIM = WITH->alignmax; /* run through each allowed alignment calculating dh */ for (alcur = WITH->alignmin; alcur <= FORLIM; alcur++) { dh = 0.0; /* accumulate dh here */ offset = alcur + winleft - 1; FORLIM1 = window; /* this is the core of the entire program, determine if the uncertainty h would be lower with this alignment, by summing the differences from the current h. That is, calculate dh. */ for (i = 0; i < FORLIM1; i++) dh += fldiff[nbl[(long)WITH->alseq[i + offset]][i]]; /* save dh if we are at the current alignment this saves us the trouble of calculating it separately */ if (alcur == WITH->aligncurrent) dhcur = dh; if (dh <= dhmin) { /* if at or below minimum */ if (dh < dhmin) /* zero list if new minimum */ numbermin = 0; dhmin = dh; /* save it */ numbermin++; /* put alignment on list */ alatmin[numbermin-1] = alcur; } } if (numbermin > 1) { /* resolve ties for minimum if they exist */ random(&rand, &iseed2); /* ie make numbermin be index to randomly selected alignment on list */ numbermin = (long)(0.99999 + numbermin * rand); if (numbermin == 0) numbermin = 1; /* writeln(output,'See there really are ties! (shiftseq)'); */ } if (alatmin[numbermin-1] == WITH->aligncurrent) /* if no change in alignment */ notchanged++; /* it's true of one more sequence */ else { /* adjust current h value */ notchanged = 0; /* otherwise zero count of unchanged sequences */ hcurrent += dhmin - dhcur; } WITH->aligncurrent = alatmin[numbermin-1]; /* add the sequence back to the nbl array at new alignment*/ changenbl(&sep, window, 1L); } /* end module shiftseq */ /* begin module doubleshift */ Static Void doubleshift() { /* shift sequence between allowed alignments and find alignment with minimum H value, for paired complementary sequences */ long alatmin[maxlen]; /* alignments with minimum dh */ double dh; /* current value of dh for particular alignment */ double dhcol; /* dh for one column */ double dhcur; /* dh for current alignment */ double dhmin = 1.0e10; /* running minimum dh value */ long numbermin; /* number of minimum dh's hit */ long alcur; /* working alignment */ long halfwindow; /* size of window actually being calculated for */ long offsetnex; /* offset into next sequence */ long i, offset; /* index, offset into sequence for given alignment */ base b, bn; /* base in this and next sequence */ seqrec *nxp; /* pointer to paired sequence */ long FORLIM; halfwindow = (window + 1) / 2; /* length of unique part of window */ nxp = sep->next; /* point to next sequence */ /* subtract off the sequences from the nbl array */ /* NOTE WELL: the right half of nbl goes to pot during this process */ changenbl(&sep, halfwindow, -1L); changenbl(&nxp, halfwindow, -1L); FORLIM = sep->alignmax; /* run through each allowed alignment calculating dh */ for (alcur = sep->alignmin; alcur <= FORLIM; alcur++) { dh = 0.0; /* accumulate dh here */ offset = alcur + winleft - 1; offsetnex = nxp->aligncurrent + sep->aligncurrent - alcur + winleft - 1; for (i = 0; i < halfwindow; i++) { b = sep->alseq[i + offset]; bn = nxp->alseq[i + offsetnex]; /* if bases are equal dh for the column is fldoubl value, otherwise it's the sum of fldiff's for the two bases */ if (b == bn) dhcol = fldoubl[nbl[(long)b][i]]; else dhcol = fldiff[nbl[(long)b][i]] + fldiff[nbl[(long)bn][i]]; dh += dhcol; } /* dh is twice this for the whole window, minus dh for the last column for an odd window */ if (window & 1) dh += dh - dhcol; else dh += dh; /* save dh if it's the current alignment */ if (alcur == sep->aligncurrent) dhcur = dh; if (dh <= dhmin) { /* if at or below minimum */ if (dh < dhmin) /* zero list if new minimum */ numbermin = 0; dhmin = dh; /* save it */ numbermin++; /* put alignment on list */ alatmin[numbermin-1] = alcur; } } if (numbermin > 1) { /* resolve ties for minimum if they exist */ random(&rand, &iseed2); /* ie make numbermin be index to randomly selected alignment on list */ numbermin = (long)(0.99999 + numbermin * rand); if (numbermin == 0) numbermin = 1; /* writeln(output,'See there really are ties! (doubleshift)'); */ } if (alatmin[numbermin-1] == sep->aligncurrent) /* if no change in alignment */ notchanged += 2; /* it's true of two more sequences */ else { /* adjust current h value */ notchanged = 0; /* otherwise zero count of unchanged sequences */ hcurrent += dhmin - dhcur; } sep->aligncurrent = alatmin[numbermin-1]; nxp->aligncurrent = nxp->alignread + sep->alignread - sep->aligncurrent; /* add the sequences back to the nbl array at new alignment*/ changenbl(&sep, halfwindow, 1L); changenbl(&nxp, halfwindow, 1L); } /* end module doubleshift */ /* begin module hloutput */ Static Void hloutput(fout, npass, shiftno) _TEXT *fout; long npass, shiftno; { /* output uncertainty as a function of position, H(L), for pass npass and shift number shiftno */ /* modificiation for xyplo program format: all data are in columns */ long i; /* index to positions */ long FORLIM; FORLIM = window; for (i = 0; i < FORLIM; i++) { fprintf(fout->f, " %5ld %5ld", npass, shiftno); /* position L, Hs(L) */ fprintf(fout->f, " %3ld %10.6f\n", winleft + i, hl[i]); } /* pass number, shift number */ } /* end module hloutput */ /* begin module alignoutput */ Static Void alignoutput(fout, npass) _TEXT *fout; long npass; { /* outputs current H, pass number npass, and alignments */ long seqno; /* index to sequences */ long FORLIM; sep = sepfirst; fprintf(fout->f, "pass%6ld, H =%12.7f relative alignments:\n", npass, hcurrent); FORLIM = numseq; for (seqno = 1; seqno <= FORLIM; seqno++) { fprintf(fout->f, "%4ld", sep->aligncurrent - sep->alignread); sep = sep->next; if (seqno % 20 == 0) putc('\n', fout->f); /* p2c: malign.p, line 2247: * Note: Using % for possibly-negative arguments [317] */ } if (numseq % 20 != 0) putc('\n', fout->f); /* p2c: malign.p, line 2249: * Note: Using % for possibly-negative arguments [317] */ } /* end module alignoutput */ /* begin module albestout */ Static Void albestout(fout, oap, numperline, field) _TEXT *fout; optlist *oap; long numperline, field; { /* outputs the array oap^.albest, numperline per line, field width = field */ long seqno; /* index to sequences */ long FORLIM; FORLIM = numseq; for (seqno = 1; seqno <= FORLIM; seqno++) { fprintf(fout->f, "%*ld", (int)field, oap->albest[seqno-1]); if (seqno % numperline == 0) putc('\n', fout->f); /* p2c: malign.p, line 2261: * Note: Using % for possibly-negative arguments [317] */ } if (numseq % numperline != 0) putc('\n', fout->f); /* p2c: malign.p, line 2263: * Note: Using % for possibly-negative arguments [317] */ } /* -1 or +1, defining the orientation of the Delila instruction */ Local Char sign(i) long i; { if (i >= 0) return '+'; else return '-'; } /* end module albestout */ /* begin module instoutput */ Static Void instoutput(fout, oap) _TEXT *fout; optlist *oap; { /* outputs a new inst file for alignment pointed to by oap*/ long seqno; /* index to sequences */ long i; /* index to each piece name */ long thesign = 1; long FORLIM, FORLIM1; if (*fout->name != '\0') { if (fout->f != NULL) fout->f = freopen(fout->name, "w", fout->f); else fout->f = fopen(fout->name, "w"); } else { if (fout->f != NULL) rewind(fout->f); else fout->f = tmpfile(); } if (fout->f == NULL) _EscIO2(FileNotFound, fout->name); SETUPBUF(fout->f, Char); fprintf(fout->f, "title \"malign %4.2f\";\n", version); fprintf(fout->f, "(* begin module numbering *)\n"); fprintf(fout->f, "(* number only the pieces, starting at 1 *)\n"); fprintf(fout->f, "default numbering piece;\n"); fprintf(fout->f, "default numbering 1;\n"); fprintf(fout->f, "default out-of-range reduce-range;\n"); fprintf(fout->f, "(* end module numbering *)\n"); fprintf(fout->f, "(* data on this alignment:\n"); fprintf(fout->f, "The H value: %*.*f\n", infofield, infodecim, oap->hbest); fprintf(fout->f, "Number of times achieved: %ld\n", oap->num); fprintf(fout->f, "*)\n\n"); fprintf(fout->f, "organism org;\n"); fprintf(fout->f, "chromosome chr;\n"); sep = sepfirst; FORLIM = numseq; /* if not paired, sign is always +1 */ for (seqno = 1; seqno <= FORLIM; seqno++) { fprintf(fout->f, "piece "); FORLIM1 = sep->piecename.length; for (i = 0; i < FORLIM1; i++) putc(sep->piecename.letters[i], fout->f); sep = sep->next; putc(';', fout->f); fprintf(fout->f, " (* piece %ld *)\n", seqno); /* writeln(fout,'get from', oap^.albest[seqno]:10,' -x', ' to ', oap^.albest[seqno]:10,' +y;'); */ if (paired && !(seqno & 1)) thesign = -1; else thesign = 1; fprintf(fout->f, "get from%10ld %c%ld to same %c%ld;\n", oap->albest[seqno-1], sign(thesign * winleft), labs(winleft), sign(thesign * winright), labs(winright)); /* old method: before 'same' command: ' to ', oap^.albest[seqno]:10,' ', sign(thesign*winright),abs(winright):1,';') */ } } /* end module instoutput */ /* begin module optalignout */ Static Void optalignout(fout) _TEXT *fout; { /* outputs relative alignments of the set of optimal alignments in a format usable to the user */ long optno; /* counter of optimal alignments */ long seqno; /* index to sequences */ long FORLIM, FORLIM1; writeparam(fout); /* initialize, write params to file */ fprintf(fout->f, "* relative aligned bases for the set of optimal alignments\n\n"); fprintf(fout->f, "%5ld sequences,%5ld optimal alignments in%6ld runs\n", numseq, numopt, shuffle); fprintf(fout->f, "%12.7f = H for original alignment\n", horiginal); oap = oapfirst; FORLIM = numopt; for (optno = 1; optno <= FORLIM; optno++) { /* do each optimal alignment in turn */ fprintf(fout->f, "\n%4ld)%6ld occurrences, H =%12.7f, relative aligned bases:\n", optno, oap->num, oap->hbest); /* write blank line then n and H */ sep = sepfirst; FORLIM1 = numseq; /* run through the sequences computing relative alignments as optimal alignment minus original alignment */ for (seqno = 1; seqno <= FORLIM1; seqno++) { fprintf(fout->f, "%4ld", oap->albest[seqno-1] - sep->alignread); if (seqno % 20 == 0) putc('\n', fout->f); /* p2c: malign.p, line 2359: * Note: Using % for possibly-negative arguments [317] */ sep = sep->next; } if (numseq % 20 != 0) putc('\n', fout->f); /* p2c: malign.p, line 2362: * Note: Using % for possibly-negative arguments [317] */ oap = oap->next; } } /* end module optalignout */ /* begin module optinstout */ Static Void optinstout(fout) _TEXT *fout; { /* outputs optimal alignments in absolute DNA coordinates for conversion to new inst files later */ /* should use inttopie routines: does it? */ long seqno; /* index to sequences */ long FORLIM; if (*fout->name != '\0') { if (fout->f != NULL) fout->f = freopen(fout->name, "w", fout->f); else fout->f = fopen(fout->name, "w"); } else { if (fout->f != NULL) rewind(fout->f); else fout->f = tmpfile(); } if (fout->f == NULL) _EscIO2(FileNotFound, fout->name); SETUPBUF(fout->f, Char); /* initialize file */ fprintf(fout->f, "%12ld %12ld\n", numseq, numopt); /* first line: # of sequences, aligns */ oap = oapfirst; do { /* for each optimal alignment */ fprintf(fout->f, "%5ld %12.7f\n", oap->num, oap->hbest); /* write num and H */ FORLIM = numseq; for (seqno = 1; seqno <= FORLIM; seqno++) { fprintf(fout->f, " %8ld", oap->albest[seqno-1]); /* these are absolute now! */ if (seqno % 10 == 0) putc('\n', fout->f); /* p2c: malign.p, line 2382: * Note: Using % for possibly-negative arguments [317] */ } if (numseq % 10 != 0) putc('\n', fout->f); /* p2c: malign.p, line 2384: * Note: Using % for possibly-negative arguments [317] */ oap = oap->next; } while (oap != NULL); } /* end module optinstout */ /* begin module optoutput */ Static Void optoutput() { /* calls procedure to output optimal alignments in readable relative form, then converts all optimal alignments to absolute DNA coordinates and calls procedures to output them */ long seqno; /* index to sequences */ piece *pie; /* pointer for piece */ long length, alignedbase; /* for call to align only */ long FORLIM; /* oapprevious: optlistptr; (* the previous alignment *)*/ optalignout(&optalign); /* output readable relative optimal alignments */ if (*book.name != '\0') { if (book.f != NULL) book.f = freopen(book.name, "r", book.f); else book.f = fopen(book.name, "r"); } else rewind(book.f); if (book.f == NULL) _EscIO2(FileNotFound, book.name); RESETBUF(book.f, Char); /* prepare to read book and inst again */ if (*inst.name != '\0') { if (inst.f != NULL) inst.f = freopen(inst.name, "r", inst.f); else inst.f = fopen(inst.name, "r"); } else rewind(inst.f); if (inst.f == NULL) _EscIO2(FileNotFound, inst.name); RESETBUF(inst.f, Char); pie = (piece *)Malloc(sizeof(piece)); FORLIM = numseq; for (seqno = 0; seqno < FORLIM; seqno++) { /* read each sequence in turn */ switch (alignmenttype) { case 'i': align(&inst, &book, &theline, &pie, &length, &alignedbase); break; case 'b': case 'f': getpiece(&book, &theline, &pie); /* read in the piece */ length = piecelength(pie); alignedbase = -inttopie(1L, pie); break; } oap = oapfirst; /* then for each optimal alignment, convert the internal alignment for that sequence to absolute DNA coordinate */ do { oap->albest[seqno] = inttopie(oap->albest[seqno], pie); oap = oap->next; } while (oap != NULL); } optinstout(&optinst); /* output file of absolute alignments */ /* instoutput(bestinst, oapfirst); (* file of new inst for best alignment *) */ } #define maxsize 200 /* the largest n allowed */ #define accuracy 10000 /* end module optoutput */ /* begin module calehnb */ Static Void calehnb(n, gna, gnc, gng, gnt, hg, ehnb, varhnb) long n, gna, gnc, gng, gnt; double *hg, *ehnb, *varhnb; { /* ; debugging: boolean */ /* calculate e(hnb) in bits/bp (ehnb) for a number (n) of example sequence sites. gna to gnt are the composition to use for the genome probabilities of a to t. the genomic entropy hg and the variance var(hnb) (=varhnb) are also calculated. if the variable debugging is passed to the procedure then the individual combinations of hnb are displayed. note: this procedure should not be broken into smaller procedures so that it remains efficient. version = 3.02; of procedure calehnb 1983 nov 23 */ /* less than (1/accuracy) bits error is demanded for the sum of pnb (see variable 'total') at the end of the procedure */ double log2 = log(2.0); /* natural log of 2, used to find log base 2 */ double logn; /* log of n */ double nlog2; /* n * log2 */ long gn; /* sum of gna..gnt */ double logpa, logpc, logpg, logpt; /* logs of genome probabilities */ /* log of n factorial is the sum of i=1 to n of log(i). the array below represents these logs up to n */ double logfact[maxsize + 1]; /* precalculated values of -p*log2(p), where p=nb/n for nb = 0 .. n. m stands for minus */ double mplog2p[maxsize + 1]; long i; /* index for logfact and mplog2p */ double logi; /* natural log of i */ long na; long nc = 0, ng = 0, nt = 0; /* numbers of bases in a site */ boolean done = false; /* true when the loop is completed */ double pnb; /* multinomial probability of a combination of na, nc, ng, nt */ double hnb; /* entropy for a combination of na..nt */ double pnbhnb; /* pnb*hnb, an intermediate result */ double sshnb = 0.0; /* sum of squares of hnb */ /* variables for testing program correctness: */ double total = 0.0; /* sum of pnb over all combinations of na..nt if this is not 1.00, the program is in error */ long counter = 0; /* counts the number of times through the loop */ /* prevent access to outside the arrays: */ if (n > maxsize) { printf(" procedure calehnb: n > maxsize (%ld>%ld)\n", n, (long)maxsize); halt(); } logn = log((double)n); nlog2 = n * log2; /* get logs of genome probabilities */ gn = gna + gnc + gng + gnt; logpa = log((double)gna / gn); logpc = log((double)gnc / gn); logpg = log((double)gng / gn); logpt = log((double)gnt / gn); /* find genomic entropy */ *hg = -((gna * logpa + gnc * logpc + gng * logpg + gnt * logpt) / (gn * log2)); *ehnb = 0.0; /* start error entropy at zero */ /* make table of log of n factorial up to n and entropies for nb/n */ logfact[0] = 0.0; /* factorial(0) = 0 */ mplog2p[0] = 0.0; for (i = 1; i <= n; i++) { logi = log((double)i); logfact[i] = logfact[i-1] + logi; mplog2p[i] = i * (logn - logi) / nlog2; } /* begin by looking at the combination with all a: na = n */ na = n; /* the following loop simulates a number of nested loops of the form: for b1=a to t do for b2=b1 to t do for b3=b2 to t do ... for bn=b(n-1) to t do ... the resulting set of variables increase in alphabetic order since no inner loop variable can have a value less than any outer loop. the number of times through the inner-most loop is given by: o = (n + 1)*(n + 2)*(n + 3)/6 in the case where there are four symbols (a,c,g,t) and n is the number of nested loops. a recursive set of loops would be possible, but it would use up too much memory in practical cases (up to n=150 or higher). a second algorithm sequests the loop variables into an array and increments them there. however, the goal is to get all possible combinations for na, nc, ng, nt, where the sum of these is n. the nested loops provide all the combinations in alphabetic order, assuring that there can not be any duplicates. to find nb (one of na..nt) one would look at which of the variables b1 to bn were of value b. this is a wasteful operation. the loop below simulates the array of control variables by changing each nb directly. */ do { /* pnb is calculated by taking the log of the expression fact(n) na nc ng nt pnb = ------------------- pa * pc * pg * pt . fact(na).. fact(nt) log(pnb) generates a series of sums, allowing the calculation to proceed by addition and multiplication rather than multiplication and exponentiation. the factorials become tractable in this way */ pnb = exp(logfact[n] - logfact[na] - logfact[nc] - logfact[ng] - logfact[nt] + na * logpa + nc * logpc + ng * logpg + nt * logpt); /* n factorial */ hnb = mplog2p[na] + mplog2p[nc] + mplog2p[ng] + mplog2p[nt]; pnbhnb = pnb * hnb; *ehnb += pnbhnb; sshnb += pnbhnb * hnb; /* sum of squares of hnb */ /* the following section keeps track of the calculation and writes out the current set of nb. */ counter++; /* if debugging then begin write(output,' ',counter:2,' '); for i := 1 to na do write(output,'a'); for i := 1 to nc do write(output,'c'); for i := 1 to ng do write(output,'g'); for i := 1 to nt do write(output,'t'); write(output,' ',na:3,nc:3,ng:3,nt:3); writeln(output,' pnb = ',pnb:10:5); end; */ total += pnb; /* the remaining portion of this repeat loop generates the values of na, nc, ng and nt. notice that there are 7 possibilities at each loop increment. other than the stop, in each case the sum of na+nc+ng+nt remains constant (=n). */ if (nt > 0) { /* ending on a t - do outer loops */ if (ng > 0) { /* turn g into t */ ng--; nt++; } else if (nc > 0) { /* turn one c into g, and all t to g (note ng = 0 initially) */ nc--; ng = nt + 1; nt = 0; } else if (na > 0) { /* turn one a into c and all g and t to c. (note ng=nc=0 initially) */ na--; nc = nt + 1; nt = 0; } else done = true; /* since nt = n */ } else { if (ng > 0) { /* turn g into t */ ng--; nt++; } else if (nc > 0) { /* turn c into g */ nc--; ng++; } else { na--; nc++; /* na > 0; turn a into c */ } } } while (!done); /* no t - increment innermost loop */ /* final adjustment: we only have the sum of squares so far */ *varhnb = sshnb - *ehnb * *ehnb; /* if this message appears, there is either a bug in the code or the computer cannot be as accurate as requested */ if (accuracy != (long)floor(accuracy * total + 0.5)) { printf(" procedure calehnb: the sum of probabilities is\n"); printf(" not accurate to one part in %ld\n", (long)accuracy); printf(" the sum of the probabilities is %10.8f\n", total); } /* if this message appear, then there is an error in the repeat-until loop: it did not repeat as many times as is expected from the algorithm */ if (counter == (long)floor((n + 1.0) * (n + 2) * (n + 3) / 6 + 0.5)) return; /* writeln(output, ' total: ',total:10:5); writeln(output,' count = ',counter:1); writeln(output,' (n+1)*(n+2)*(n+3)/6 = ', round((n+1)*(n+2)*(n+3)/6):1); */ printf(" procedure calehnb: program error, the number of\n"); printf(" calculations is in error\n"); halt(); } /* calehnb */ #undef maxsize #undef accuracy /* end module calehnb version = 5.35; (@ of rseq.p 1995 Feb 21 */ /* begin module calaehnb */ Static Void calaehnb(n, gna, gnc, gng, gnt, hg, aehnb, avarhnb) long n, gna, gnc, gng, gnt; double *hg, *aehnb, *avarhnb; { /* calculate ae(hnb) in bits/bp (=aehnb) for a number (n) of example sequence sites. gna to gnt are the composition to use for the genome probabilities of a to t. the genomic entropy (=hg) and the variance avar(hnb) (=avarhnb) are also calculated. this procedure gives approximations for e(hnb) and var(hnb). it is based on a formula derived by jeff haemer. see also: g. p. basharin theory probability appl. 4(3): 333-336 (1959) 'on a statistical estimate for the entropy of a sequence of independent random variables' version = 3.01; of procedure calaehnb 1983 nov 23 */ double log2 = log(2.0); /* natural log of 2 */ long gn; /* sum of gna..gnt */ double pa, pc, pg, pt; /* genomic probabilities */ double e; /* the approximate sampling error */ gn = gna + gnc + gng + gnt; pa = (double)gna / gn; pc = (double)gnc / gn; pg = (double)gng / gn; pt = (double)gnt / gn; *hg = -((pa * log(pa) + pc * log(pc) + pg * log(pg) + pt * log(pt)) / log2); e = 3 / (2 * log2 * n); *aehnb = *hg - e; *avarhnb = e * e; } /* calaehnb */ /* end module calaehnb version = 5.35; (@ of rseq.p 1995 Feb 21 */ /* begin module xyoutput */ Static Void xyoutput(malignxyin) _TEXT *malignxyin; { /* produce the malignxyin file */ double Hbefore; /* Hbefore for calculating information content */ long rank = 0; /* ranking of the alignment */ double Ehnb; /* small sample correction: 2 - e(n) */ long gna = 1000, gnc = 1000, gng = 1000, gnt = 1000; /* genomic composition */ double hg; /* genomic uncertainty */ double varhnb; /* var(hnb) */ if (*malignxyin->name != '\0') { if (malignxyin->f != NULL) malignxyin->f = freopen(malignxyin->name, "w", malignxyin->f); else malignxyin->f = fopen(malignxyin->name, "w"); } else { if (malignxyin->f != NULL) rewind(malignxyin->f); else malignxyin->f = tmpfile(); } if (malignxyin->f == NULL) _EscIO2(FileNotFound, malignxyin->name); SETUPBUF(malignxyin->f, Char); fprintf(malignxyin->f, "* malign%5.2f\n", version); fprintf(malignxyin->f, "* columns definitions:\n"); fprintf(malignxyin->f, "* 1: rank: from 1 to the number of alignment classes\n"); fprintf(malignxyin->f, "* 2: occurrences: how many times the class was found\n"); fprintf(malignxyin->f, "* 3: H: the uncertainty of the alignment, in bits\n"); fprintf(malignxyin->f, "* 4: R: the information content of the alignment, in bits\n"); oap = oapfirst; /* compute sample size correction for numseq */ if (numseq <= kickover) /* not using approximation */ calehnb(numseq, gna, gnc, gng, gnt, &hg, &Ehnb, &varhnb); else calaehnb(numseq, gna, gnc, gng, gnt, &hg, &Ehnb, &varhnb); /* using approximation */ Hbefore = Ehnb * (winright - winleft + 1); do { rank++; fprintf(malignxyin->f, " %10ld", rank); fprintf(malignxyin->f, " %10ld", oap->num); fprintf(malignxyin->f, " %*.*f", infofield, infodecim, oap->hbest); fprintf(malignxyin->f, " %*.*f\n", infofield, infodecim, Hbefore - oap->hbest); oap = oap->next; } while (oap != NULL); } /* end module xyoutput */ /* begin module realign */ Static Void realign() { /* shifts the window on all sequences so that as many sequences as possible keep their original alignment */ long alhist[maxchange]; /* histogram of align changes */ long histmax = 0; /* maximum value in histogram */ long binmax; /* bin in histogram with max value */ long bin; /* index to bins */ long alshift; /* alignment shift temporary value */ long alshiftmin = -10000, alshiftmax = 10000; /* min and max allowed grand shifts */ for (bin = 0; bin < maxchange; bin++) { alhist[bin] = 0; /* get min and max possible changes */ } sep = sepfirst; do { /* form a histogram of changes from original alignment */ bin = maxchange / 2 + sep->alignread - sep->aligncurrent; if (bin > 0 && bin <= maxchange) alhist[bin-1]++; alshift = sep->alignmin - sep->aligncurrent; if (alshift > alshiftmin) alshiftmin = alshift; alshift = sep->alignmax - sep->aligncurrent; if (alshift < alshiftmax) alshiftmax = alshift; sep = sep->next; } while (sep != NULL); /* find maximum of histogram of changes */ for (bin = 1; bin <= maxchange; bin++) { if (alhist[bin-1] > histmax) { histmax = alhist[bin-1]; binmax = bin; } } if (alhist[binmax-1] <= minmaxforchange) return; globshift = binmax - maxchange / 2; /* convert to alignment shift */ if (globshift > alshiftmax) globshift = alshiftmax; if (globshift < alshiftmin) globshift = alshiftmin; sep = sepfirst; while (globshift != 0 && sep != NULL) { sep->aligncurrent += globshift; sep = sep->next; } } /* end module realign */ /* begin module findbestalignment */ Static Void findbestalignment() { /* does repeated passes through the set of sequences until "convergence" */ long shiftno; /* shift number */ long npass = 0; /* number of passes */ double htemp; /* temporary h value */ boolean ifhlout; /* flag to do H(L) output this run */ boolean dohlout; /* flag to do H(L) output this shift */ ifhlout = (shuffle > nshuffle && nshiftout >= 0); /* do H(L) output if this is the last run and nshiftout >= 0 */ if (ifhlout) { fprintf(uncert.f, "* Position, L, and uncertainty, H(L), as the alignment improves\n"); fprintf(uncert.f, "*\n"); fprintf(uncert.f, "* %12ld positions\n", window); fprintf(uncert.f, "* pass number, shift number, position L, H(L)\n"); hloutput(&uncert, 0L, 0L); /* output starting H(L) */ } /* writeparam(uncert); (@ initialize and write params to file */ if (npassout >= 0) { /* if outputing new alignments */ fprintf(newalign.f, "\nrun number:%6ld, original random seed:%15ld\n", shuffle, iseedsave); /* blank line */ alignoutput(&newalign, 0L); /* output starting alignments */ } notchanged = 0; /* # of consecutive sequences with no change */ intolerance = 0; /* # of passes with change in H less than tolerance */ do { npass++; /* increment pass number */ hpass = hcurrent; /* save h at beginning of pass */ sep = sepfirst; shiftno = 0; do { /* shift sequences until all done or no change in one pass */ shiftno++; /* increment shift number */ if (paired) { /* for pairs, do shift every other time */ if (shiftno & 1) doubleshift(); } else shiftseq(); sep = sep->next; /* if doing H(L) output, do it if at the end of a pass of (or?) if nshiftout shifts have been done */ if (ifhlout) { dohlout = (shiftno == numseq); if (nshiftout > 0) { dohlout = (dohlout || shiftno % nshiftout == 0); /* p2c: malign.p, line 2862: * Note: Using % for possibly-negative arguments [317] */ } if (dohlout) { if (paired) /* fix nbl if doing paired */ fillnbl(); hcalc(nbl, flogf, hl, &htemp, window); /* get H and H(L) */ hloutput(&uncert, npass, shiftno); } } } while (sep != NULL && (notchanged <= numseq || globshift != 0)); /* terminate in the middle of a pass only if the last realignment attempt made no global shift of window on sequences */ if (hpass - hcurrent < tolerance) /* if change by less than tolerance */ intolerance++; /* increment count of this */ else intolerance = 0; /* otherwise zero the count */ /* shift window on all sequences if necessary, but don't do it past a certain # of pass to avoid falling into infinite loop */ if (!paired && npass < realignlimit) realign(); fillnbl(); /* fill the nbl array for the current alignment */ hcalc(nbl, flogf, hl, &hcurrent, window); /* calculate H and H(L) */ if (standout > 0) printf("%6ld%6ld%13.7f\n", shuffle, npass, hcurrent); if (npassout > 0) { if (npass % npassout == 0) { /* p2c: malign.p, line 2884: * Note: Using % for possibly-negative arguments [317] */ alignoutput(&newalign, npass); } } } while (intolerance <= ntolpass && notchanged < numseq); if (standout == 0) printf("%6ld%6ld%13.7f\n", shuffle, npass, hcurrent); /* 2005 Feb 7: TDS. The following code finally crashed because mod of a negative value is undefined. Duh. Follow the definition!!! Geepers. if npassout=0 then alignoutput(newalign, npass) else if (npass mod npassout) <> 0 then alignoutput(newalign, npass); */ if (npassout == 0) { alignoutput(&newalign, npass); return; } if (npassout > 0) { if (npass % npassout != 0) { /* p2c: malign.p, line 2900: * Note: Using % for possibly-negative arguments [317] */ alignoutput(&newalign, npass); } } } /* end module findbestalignment */ /* begin module randomalign */ Static Void randomalign() { /* generates a new random starting alignment */ long altrunc; /* randomly selected value with range of alignments */ long alrange; /* range of allowed alignments */ sep = sepfirst; do { random(&rand, &iseed); alrange = sep->alignmax - sep->alignmin; /* range of alignments */ altrunc = (long)(rand * (alrange + 1)); if (altrunc > alrange) altrunc = alrange; sep->alignstart = sep->alignmin + altrunc; sep->aligncurrent = sep->alignstart; sep = sep->next; if (paired) { /* if paired adjust next sequence by reverse */ sep->alignstart = sep->alignmax - altrunc; sep->aligncurrent = sep->alignstart; sep = sep->next; } } while (sep != NULL); } /* end module randomalign */ /* begin module optcheckadd */ Static Void optcheckadd() { /* checks if current best alignment is on list of optimal alignments; if so, increments count for that alignment; if not, adds it to list */ boolean match = false; /* flag if match existing one on list */ boolean termloop = false; /* flag to terminate loop */ long seqno; /* index to sequences */ long FORLIM; /* flag for match to existing ones on list */ /* flag to terminate loop */ oap = oapfirst; /* initialize to go through list */ oaplast = NULL; /* keep this pointing to last record */ /* go through list until there's a match or until oap points to first alignment with h greater than hcurrent */ if (oap != NULL) { if (hcurrent >= oap->hbest) { do { if (hcurrent == oap->hbest) { /* if h's match then */ match = true; /* it's a provisional match */ sep = sepfirst; seqno = 1; /* run through alignments until one doesn't match */ while (match && seqno <= numseq) { if (oap->albest[seqno-1] != sep->aligncurrent) match = false; seqno++; sep = sep->next; } /* if still match after check, increment number */ if (match) oap->num++; } oaplast = oap; oap = oap->next; if (oap == NULL) termloop = true; else { if (hcurrent < oap->hbest) termloop = true; } } while (!(termloop || match)); } } if (match) /* if no match, add to list */ return; oap = (optlist *)Malloc(sizeof(optlist)); /* create new entry */ /* if pointing to beginning of list (which may be nil anyway), then make this record link to beginning and point oapfirst to this record */ if (oaplast == NULL) { oap->next = oapfirst; oapfirst = oap; } else { /* otherwise link this record to the next one (which may be nil) and link last one to this one */ oap->next = oaplast->next; oaplast->next = oap; } numopt++; oap->num = 1; /* initialize # of this alignment */ oap->hbest = hcurrent; /* save H */ sep = sepfirst; /* store alignments */ FORLIM = numseq; for (seqno = 0; seqno < FORLIM; seqno++) { oap->albest[seqno] = sep->aligncurrent; sep = sep->next; } } /* end module optcheckadd */ main(argc, argv) int argc; Char *argv[]; { /* this is it at last */ PASCAL_MAIN(argc, argv); if (setjmp(_JL1)) goto _L1; malignxyin.f = NULL; strcpy(malignxyin.name, "malignxyin"); optinst.f = NULL; strcpy(optinst.name, "optinst"); optalign.f = NULL; strcpy(optalign.name, "optalign"); newalign.f = NULL; strcpy(newalign.name, "newalign"); uncert.f = NULL; strcpy(uncert.name, "uncert"); malignp.f = NULL; strcpy(malignp.name, "malignp"); book.f = NULL; strcpy(book.name, "book"); inst.f = NULL; strcpy(inst.name, "inst"); printf("malign%5.2f\n", version); if (*uncert.name != '\0') uncert.f = fopen(uncert.name, "w"); else uncert.f = tmpfile(); if (uncert.f == NULL) _EscIO2(FileNotFound, uncert.name); SETUPBUF(uncert.f, Char); readparam(&malignp); /* read the parameter file */ if (nranseq == 0) /* read the book into (something) */ readaligned(&inst, &book); else genrandseqs(); /* or generate random sequences */ flfill(flogf, fldiff, fldoubl, numseq); /* fill flogf and diff arrays */ if (npassout >= 0) { writeparam(&newalign); /* write params to alignment file */ fprintf(newalign.f, "* relative alignments at start, end of each run and during run if npassout>0\n"); } if (standout >= 0) printf(" run pass uncertainty\n"); shuffle = 0; oapfirst = NULL; /* pointer to first entry of list of optimal aligns */ numopt = 0; /* number of optimal alignments */ iseedsave = iseed; do { shuffle++; fillnbl(); /* fill the nbl array for the current alignment */ hcalc(nbl, flogf, hl, &hcurrent, window); /* calculate H and H(L) */ if (shuffle == 1) /* save original h */ horiginal = hcurrent; findbestalignment(); optcheckadd(); /* add alignment to list if it's unique new one */ iseedsave = iseed; randomalign(); } while (shuffle <= nshuffle); optoutput(); /* output files associated with optimum alignments */ xyoutput(&malignxyin); _L1: if (inst.f != NULL) fclose(inst.f); if (book.f != NULL) fclose(book.f); if (malignp.f != NULL) fclose(malignp.f); if (uncert.f != NULL) fclose(uncert.f); if (newalign.f != NULL) fclose(newalign.f); if (optalign.f != NULL) fclose(optalign.f); if (optinst.f != NULL) fclose(optinst.f); if (malignxyin.f != NULL) fclose(malignxyin.f); exit(EXIT_SUCCESS); } /* End. */