/* Output from p2c 1.21alpha-07.Dec.93, the Pascal-to-C translator */ /* From input file "helix.p" */ #include /* helix: find helices between sequences in two books by thomas schneider modules from: delmods */ /* end of program */ /* begin module version */ #define version 3.24 /* of helix 1994 sep 5 origin before 1990 December 21 surely before 1984 ... */ /* end module version */ /* begin module describe.helix */ /* name helix: find helices between sequences in two books synopsis helix(xbook: in, ybook: in, hlist: out, helixp: in, output: out) files xbook: a book from the delila system ybook: a book from the delila system hlist: a list of helices between pieces in xbook and ybook. the first line is the program identification the second two lines are the x and y book titles the third line gives the minimum length or the maximum energy of helixes recorded the fourth line states whether or not g-u pairs are allowed the fifth line states whether or not energies are printed the following lines are the helices breaks between sequences are indicated. helixp: parameters that control the helix list. if helixp is empty, default values are used. otherwise, the file must contain three lines: 1. if this number is a positive integer, it specifies the minimum length in base pairs of the helixes written in hlist. if it is a negative real number, it specifies the maximum energy in kcal of the helixes written. 2. if the first character is a "g" then g-u pairs are allowed, otherwise not. 3. if the first character is an "e" then the energy of each helix will be written in hlist. output: messages to the user description All sequences in xbook are compared to all sequences in ybook. The complementary helices (of some minimum length and longer or of some maximum energy or less) are listed in hlist by the 5' ends of the helix on both sequences. This information, along with the length of the helix, determines the location of the helix. One can allow g-u pairing if desired. If the helix lengths desired are very short, it is better to use dotmat (see "technical notes" below). The new Rules are now used to calculate the helix. documentation delman.use.comparison J. V. Maizel, Jr. and R. P. Lenk PNAS 78: 7665-7609 (1981) Tinoco et al. Nature New Biology vol 246 pp 40-41, 1973. S. M. Freier, R. Kierzek, J. A. Jaeger, N. Sugimoto, M. H. Caruthers, T. Nelson, and D. H. Turner, "Improved free-energy paramters for predictions of RNA duplex stability" PNAS 83: 9373-9377 (1986) see also matrix.p, dotmat.p, keymat.p author Thomas D. Schneider bugs GU pairs and bulges are not done using the new data. An option for pair-wise (rather than multiplicative) comparisons of sequences would be nice. technical notes The shortest length helix ever recorded in hlist is determined by the constant absminlength. This overrides the parameters. */ /* end module describe.helix */ /* technical note: lines of hlist are in the format made by procedure hrecord. breaks between sequences are in the format made by procedure nextsequences */ /* more constants: */ #define absminlength 2 /* the minimum length below which one should not use helix, and should use the dotmat program */ #define defminlenmaxene 5 /* default for minlenmaxene */ #define defguallowed false /* default for guallowed */ #define defgivefreeenergy false /* default for givefreeenergy */ #define prime '\'' /* single quote mark */ #define debugging true /* for debugging purposes */ /* begin module book.const */ /* constants needed for book manipulations */ #define dnamax 3000 /* length of dna arrays */ #define namelength 20 /* maximum key name length */ #define linelength 80 /* maximum line readable in book */ /* end module book.const version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.type */ /* types needed for book manipulations */ typedef long chset[5]; /* types defined in book definition */ typedef Char alpha[namelength]; /* this is not alfa */ /* name is a left justified string with blanks following the characters */ typedef struct name { alpha letters; /* zero means an unspecified structure */ char length; } name; typedef struct line { /* a line of characters */ Char letters[linelength]; char length; struct line *next; } line; typedef enum { plus, minus, dircomplement, dirhomologous } direction; typedef enum { linear, circular } configuration; typedef enum { on, off } state; typedef struct header { /* header of key */ name keynam; /* key name of structure */ line *fulnam; /* full name of structure */ /* note key */ line *note; } header; /* base types */ typedef enum { a, c, g, t } base; typedef short dnarange; /* p2c: helix.p, line 155: * Note: Field width for seq assumes enum base has 4 elements [105] */ typedef uchar seq[(dnamax + 3) / 4]; typedef struct dnastring { seq part; dnarange length; struct dnastring *next; } dnastring; typedef struct orgkey { /* organism key */ header hea; /* genetic map units */ line *mapunit; } orgkey; typedef struct chrkey { /* chromosome key */ header hea; double mapbeg; /* number of genetic map beginning */ /* number of genetic map ending */ double mapend; } chrkey; typedef struct piekey { /* piece key */ header hea; double mapbeg; /* genetic map beginning */ configuration coocon; /* configruation (circular/linear) */ direction coodir; /* direction (+/-) relative to genetic map */ long coobeg; /* beginning nucleotide */ long cooend; /* ending nucleotide */ configuration piecon; /* configruation (circular/linear) */ direction piedir; /* direction (+/-) relative to coordinates */ long piebeg; /* beginning nucleotide */ long pieend; /* ending nucleotide */ } piekey; typedef struct piece { piekey key; dnastring *dna; } piece; typedef struct reference { name pienam; /* name of piece referred to */ double mapbeg; /* genetic map beginning */ direction refdir; /* direction relative to coordinates */ long refbeg; /* beginning nucleotide */ long refend; /* ending nucleotide */ } reference; typedef struct genkey { /* gene key */ header hea; reference ref; } genkey; typedef struct trakey { /* transcript key */ header hea; reference ref; } trakey; typedef struct markey { /* marker key */ header hea; reference ref; state sta; line *phenotype; struct marker *next; } markey; typedef struct marker { markey key; dnastring *dna; } marker; /* end module book.type version = 'delmod 6.51 85 apr 17 tds/gds' */ Static _TEXT xbook; /* the primary sequences */ Static _TEXT ybook; /* the secondaries compared to xbook */ Static _TEXT hlist; /* list of exact helices */ Static _TEXT helixp; /* parameters for the program */ Static piece *xseq, *yseq; /* sequences from xbook and ybook */ Static long xnumber, ynumber; /* numbers of each piece */ Static double minlenmaxene; /* minimum helix length recorded in hlist or the highest energy when negative */ Static boolean guallowed; /* gu pairs are allowed */ Static boolean givefreeenergy; /* calculate the free energy of a helix and add it to the hlist */ /* begin module book.var */ /* ************************************************************************ */ /* global variables needed for book manipulations */ /* free storage: */ Static line *freeline; /* unused lines */ Static dnastring *freedna; /* unused dnas */ Static boolean readnumber; /* whether to read a number from the notes, or to read in the notes */ Static long number; /* the number of the item just read */ Static boolean numbered; /* true when the item just read is numbered */ Static boolean skipunnum; Static jmp_buf _JL1; /* a control variable to allow skipping of un-numbered items in the book */ /* ************************************************************************ */ /* end module book.var version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module halt */ Static Void halt() { /* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. */ printf(" program halt.\n"); longjmp(_JL1, 1); } /* end module halt version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module package.getpiece */ /* ************************************************************************ */ /* begin module package.brpiece */ /* ************************************************************************ */ /* begin module book.basis */ /* procedures needed for book manipulations */ /* get procedures should be used for all linked lists of records */ Static Void getline(l) line **l; { /* obtain a line from the free line list or by making a new one */ if (freeline != NULL) { *l = freeline; freeline = freeline->next; } else *l = (line *)Malloc(sizeof(line)); (*l)->length = 0; (*l)->next = NULL; } Static Void getdna(l) dnastring **l; { if (freedna != NULL) { *l = freedna; freedna = freedna->next; } else *l = (dnastring *)Malloc(sizeof(dnastring)); (*l)->length = 0; (*l)->next = NULL; } /* clear procedures should be called each time the records are no longer needed failure to do this may result in a stack overflow. */ Static Void clearline(l) line **l; { /* return a line to the free line list */ line *lptr; if (*l == NULL) return; lptr = *l; *l = (*l)->next; lptr->next = freeline; freeline = lptr; } Static Void cleardna(l) dnastring **l; { dnastring *lptr; if (*l == NULL) return; lptr = *l; *l = (*l)->next; lptr->next = freedna; freedna = lptr; } Static Void clearheader(h) header *h; { /* clear the header h (remove lines to free storage) */ clearline(&h->fulnam); while (h->note != NULL) clearline(&h->note); } Static Void clearpiece(p) piece **p; { /* clear the dna of the piece */ while ((*p)->dna != NULL) cleardna(&(*p)->dna); clearheader(&(*p)->key.hea); } Static base chartobase(ch) Char ch; { /* convert a character into a base */ base Result; switch (ch) { case 'a': Result = a; break; case 'c': Result = c; break; case 'g': Result = g; break; case 't': Result = t; break; } return Result; } Static Char basetochar(ba) base ba; { /* convert a base into a character */ Char Result; switch (ba) { case a: Result = 'a'; break; case c: Result = 'c'; break; case g: Result = 'g'; break; case t: Result = 't'; break; } return Result; } Static base complement(ba) base ba; { /* take the complement of ba */ base Result; switch (ba) { case a: Result = t; break; case c: Result = g; break; case g: Result = c; break; case t: Result = a; break; } return Result; } Static long pietoint(p, pie) long p; piece *pie; { /* p is a coordinate on the piece. we want to transform p into a number from 1 to n: an internal coordinate system for easy manipulation of piece coordinates */ long i; /* an intermediate value */ piekey *WITH; WITH = &pie->key; switch (WITH->piedir) { case plus: if (p >= WITH->piebeg) i = p - WITH->piebeg + 1; else i = p - WITH->coobeg + WITH->cooend - WITH->piebeg + 2; break; case minus: if (p <= WITH->piebeg) i = WITH->piebeg - p + 1; else i = WITH->cooend - p + WITH->piebeg - WITH->coobeg + 2; break; } return i; } Static long inttopie(i, pie) long i; piece *pie; { /* i is in the range 1 to some maximum. it is an internal coordinate system for the program. we want to do a coordinate transformation to obtain a value in the range of the piece called pie: i=1 corresponds to piebeg and i=its maximum corresponds to pieend */ long p; /* an intermediate value */ piekey *WITH; WITH = &pie->key; switch (WITH->piedir) { case plus: p = WITH->piebeg + i - 1; if (p > WITH->cooend) { if (WITH->coocon == circular) p += WITH->coobeg - WITH->cooend - 1; } break; case minus: p = WITH->piebeg - i + 1; if (p < WITH->coobeg) { if (WITH->coocon == circular) p += WITH->cooend - WITH->coobeg + 1; } break; } return p; } Static long piecelength(pie) piece *pie; { /* return the length of the dna in pie */ return (pietoint(pie->key.pieend, pie)); } /* end module book.basis version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.getto */ Static Char getto(thefile, ch) _TEXT *thefile; long *ch; { /* search the file for a character in the first line which is a member of the set ch. */ Char achar = ' '; while ((!P_inset(achar, ch)) & (!BUFEOF(thefile->f))) { fscanf(thefile->f, "%c%*[^\n]", &achar); getc(thefile->f); if (achar == '\n') achar = ' '; } if (P_inset(achar, ch)) return achar; else return ' '; } /* end module book.getto version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.skipstar */ Static Void skipstar(thefile) _TEXT *thefile; { /* skip start of line (or star = '*'). */ if (P_peek(thefile->f) != '*') { printf(" procedure skipstar: bad book\n"); printf(" \"*\" expected as first character on the line, but \"%c\" was found\n", P_peek(thefile->f)); halt(); } getc(thefile->f); /* skip the star */ if (P_peek(thefile->f) != ' ') { /* skip the blank */ printf(" procedure skipstar: bad book\n"); printf(" \"* \" expected on a line but \"*%c\" was found\n", P_peek(thefile->f)); halt(); } getc(thefile->f); } /* skipstar */ /* end module book.skipstar version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brreanum */ Static Void brreanum(thefile, reanum) _TEXT *thefile; double *reanum; { /* read a real number from the file */ skipstar(thefile); fscanf(thefile->f, "%lg%*[^\n]", reanum); getc(thefile->f); } /* end module book.brreanum version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brnumber */ Static Void brnumber(thefile, num) _TEXT *thefile; long *num; { /* read a number from the file */ skipstar(thefile); fscanf(thefile->f, "%ld%*[^\n]", num); getc(thefile->f); } /* end module book.brnumber version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brname */ Static Void brname(thefile, nam) _TEXT *thefile; name *nam; { /* read a name from the file */ long i; /* an index to the name */ Char c_; /* a character read */ skipstar(thefile); nam->length = 0; do { nam->length++; c_ = getc(thefile->f); if (c_ == '\n') c_ = ' '; nam->letters[nam->length - 1] = c_; } while (!(P_eoln(thefile->f) || nam->length >= namelength || nam->letters[nam->length - 1] == ' ')); if (nam->letters[nam->length - 1] == ' ') nam->length--; if (nam->length < namelength) { for (i = nam->length; i < namelength; i++) nam->letters[i] = ' '; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } /* brname */ /* end module book.brname version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brline */ Static Void brline(thefile, l) _TEXT *thefile; line **l; { /* read a line from the file */ long i = 0; long j; Char acharacter; long FORLIM; skipstar(thefile); while (!P_eoln(thefile->f)) { i++; acharacter = getc(thefile->f); if (acharacter == '\n') acharacter = ' '; (*l)->letters[i-1] = acharacter; } if (i < (*l)->length) { FORLIM = (*l)->length; for (j = i; j < FORLIM; j++) (*l)->letters[j] = ' '; } (*l)->length = i; (*l)->next = NULL; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } /* end module book.brline version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brdirect */ Static Void brdirect(thefile, direct) _TEXT *thefile; direction *direct; { /* read a direction */ Char ch; skipstar(thefile); fscanf(thefile->f, "%c%*[^\n]", &ch); getc(thefile->f); if (ch == '\n') ch = ' '; if (ch == '+') *direct = plus; else *direct = minus; } /* end module book.brdirect version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brconfig */ Static Void brconfig(thefile, config) _TEXT *thefile; configuration *config; { /* read a configuration */ Char ch; skipstar(thefile); fscanf(thefile->f, "%c%*[^\n]", &ch); getc(thefile->f); if (ch == '\n') ch = ' '; if (ch == 'l') *config = linear; else *config = circular; } /* end module book.brconfig version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brnotenumber */ Static Void brnotenumber(thefile, note) _TEXT *thefile; line **note; { /* book note reading to obtain the number of the object. the procedure returns the value of the number as a global. (this is not such a good practice, but we are stuck with it for now.) */ *note = NULL; numbered = false; number = 0; /* force number to zero if there is no number at all */ /* the next character is n or * depending on whether there are notes */ if (P_peek(thefile->f) != 'n') return; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); if (P_peek(thefile->f) == 'n') { fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); return; } skipstar(thefile); if (!P_eoln(thefile->f)) { if (P_peek(thefile->f) == '#') { numbered = true; getc(thefile->f); /* move past the number symbol */ fscanf(thefile->f, "%ld", &number); } } do { fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } while (P_peek(thefile->f) != 'n'); fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } /* brnotenumber */ /* end module book.brnotenumber version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brnote */ Static Void brnote(thefile, note) _TEXT *thefile; line **note; { /* read note key */ line *newnote; /* the new note */ line *previousnote; /* the last line of the notes */ *note = NULL; if (P_peek(thefile->f) != 'n') /* enter note */ return; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); if (P_peek(thefile->f) == 'n') { /* abort null note (n/n) */ fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); return; } getline(note); newnote = *note; while (P_peek(thefile->f) != 'n') { /* wait until end of note */ brline(thefile, &newnote); previousnote = newnote; /* get next note */ getline(&newnote->next); newnote = newnote->next; } /* last note was not used, so: */ clearline(&newnote); previousnote->next = NULL; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } /* brnote */ /* end module book.brnote version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brheader */ Static Void brheader(thefile, hea) _TEXT *thefile; header *hea; { /* read the header of a key. */ /* read key name */ brname(thefile, &hea->keynam); /* read full name */ getline(&hea->fulnam); brline(thefile, &hea->fulnam); /* read note key */ if (readnumber) brnotenumber(thefile, &hea->note); else brnote(thefile, &hea->note); } /* end module book.brheader version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brpiekey */ Static Void brpiekey(thefile, pie) _TEXT *thefile; piekey *pie; { /* read piece key */ brheader(thefile, &pie->hea); brreanum(thefile, &pie->mapbeg); brconfig(thefile, &pie->coocon); brdirect(thefile, &pie->coodir); brnumber(thefile, &pie->coobeg); brnumber(thefile, &pie->cooend); brconfig(thefile, &pie->piecon); brdirect(thefile, &pie->piedir); brnumber(thefile, &pie->piebeg); brnumber(thefile, &pie->pieend); } /* end module book.brpiekey version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brdna */ Static Void brdna(thefile, dna) _TEXT *thefile; dnastring **dna; { /* read in dna from thefile */ /* note: if the dna were circularized, by linking the last dnastring to the first, then the cleardna routine could not clear properly, and would loop forever... there is no reason to do that, since a simple mod function will allow one to access the circle. */ Char ch; dnastring *workdna; long SET[5]; long TEMP; getdna(dna); workdna = *dna; ch = getto(thefile, P_addset(P_expset(SET, 0L), 'd')); ch = getc(thefile->f); /* skipstar */ if (ch == '\n') ch = ' '; while (ch == '*') { ch = getc(thefile->f); /* skip blank */ if (ch == '\n') ch = ' '; do { ch = getc(thefile->f); if (ch == '\n') ch = ' '; if (ch == 't' || ch == 'g' || ch == 'c' || ch == 'a') { if (workdna->length == dnamax) { getdna(&workdna->next); workdna = workdna->next; } workdna->length++; TEMP = workdna->length - 1; P_clrbits_B(workdna->part, TEMP, 1, 3); P_putbits_UB(workdna->part, TEMP, (int)chartobase(ch), 1, 3); } } while (!P_eoln(thefile->f)); fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); /* go to next line */ ch = getc(thefile->f); /* ch is either '*' or 'd' */ if (ch == '\n') ch = ' '; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } /* end module book.brdna version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brpiece */ Static Void brpiece(thefile, pie) _TEXT *thefile; piece **pie; { /* read in a piece */ brpiekey(thefile, &(*pie)->key); if (numbered || !skipunnum) brdna(thefile, &(*pie)->dna); } /* end module book.brpiece version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brinit */ Static Void brinit(book) _TEXT *book; { /* check that the book is ok to read, and set up the global variables for br routines */ /* halt if the book is bad (first word is 'halt') or the first character is not * */ if (*book->name != '\0') { if (book->f != NULL) book->f = freopen(book->name, "r", book->f); else book->f = fopen(book->name, "r"); } else rewind(book->f); if (book->f == NULL) _EscIO2(FileNotFound, book->name); RESETBUF(book->f, Char); if (!BUFEOF(book->f)) { /* check for the date line */ if (P_peek(book->f) != '*') { if (P_peek(book->f) != 'h') printf(" this is not the first line of a book:\n"); else printf(" bad book:\n"); putchar(' '); while (!(P_eoln(book->f) | BUFEOF(book->f))) { putchar(P_peek(book->f)); getc(book->f); } putchar('\n'); halt(); } } else { printf(" book is empty\n"); halt(); } /* initialize free storage */ freeline = NULL; freedna = NULL; readnumber = true; /* usually we read in numbers for items */ number = 0; /* arbitrary value */ numbered = false; /* the piece has no number (none yet read in) */ skipunnum = false; } /* brinit */ /* end module book.brinit version = 'delmod 6.51 85 apr 17 tds/gds' */ /* ************************************************************************ */ /* end module package.brpiece version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.getpiece */ Static Void getpiece(thefile, pie) _TEXT *thefile; piece **pie; { /* move to and read in the next piece in the book */ Char ch; long SET[5]; ch = getto(thefile, P_addset(P_expset(SET, 0L), 'p')); /* get to the next p(iece) in the book */ if (ch != ' ') { brpiece(thefile, pie); ch = getto(thefile, P_addset(P_expset(SET, 0L), 'p')); /* read past closing p */ } } /* end module book.getpiece version = 'delmod 6.51 85 apr 17 tds/gds' */ /* ************************************************************************ */ /* end module package.getpiece version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.getbase */ Static base getbase(position, pie) long position; piece *pie; { /* get a base from the nth position (internal coordinates) of the piece. no protection is made against positions outside the piece */ dnastring *workdna; long p = dnamax; /* the last base of the dna part */ workdna = pie->dna; while (position > p) { p += dnamax; workdna = workdna->next; } return ((base)P_getbits_UB(workdna->part, position - p + dnamax - 1, 1, 3)); } /* end module book.getbase version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module copyaline */ Static Void copyaline(fin, fout) _TEXT *fin, *fout; { /* copy a line from file fin to file fout */ while (!P_eoln(fin->f)) { putc(P_peek(fin->f), fout->f); getc(fin->f); } fscanf(fin->f, "%*[^\n]"); getc(fin->f); putc('\n', fout->f); } /* copyaline */ /* end module copyaline version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module basepair */ Static boolean basepair(xseq, yseq, x, y, guallowed) piece *xseq, *yseq; long x, y; boolean guallowed; { /* does xseq basepair to yseq at x to y? allow gu pair if guallowed is true. a series of if-then statements are used for speed. */ base bx, by; /* bases in the sequences */ bx = getbase(x, xseq); by = getbase(y, yseq); if (bx == complement(by)) return true; else if (guallowed) { if (bx == g && by == t) return true; else if (bx == t && by == g) return true; else return false; } else return false; } /* end module basepair version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module freeenergy */ /* this module contains: moveendsin: a procedure to find the core of a helix, where the core is the part that contributes the energy. gu pairs and single base pairs are removed from both ends. freeenergy: a function that uses moveendsin and then calculates the free energy of the core in kcal. */ Static Void moveendsin(x5bound, x3bound, xpiece, y5bound, y3bound, ypiece) long *x5bound, *x3bound; piece *xpiece; long *y5bound, *y3bound; piece *ypiece; { /* move the ends of the helix in. gu pairs contribute no energy, nor do single base pairs surrounded by gu pairs. these are removed. */ /* move the bounds inward to the first occurance of base pairing. since a single base pair provides no energy, the successive position is also required to pair. the energy beyond these bounds is zero. */ /* move one end in, decrease x3bound, increase y5bound note 1: x3bound cannot go below succ(x5bound) because the call to getbase would object to looking before the x5" end. the x5 bound is brought up in the second while loop. */ while (((getbase(*x3bound, xpiece) != complement(getbase(*y5bound, ypiece))) | (getbase(*x3bound - 1, xpiece) != complement(getbase(*y5bound + 1, ypiece)))) && *x3bound > *x5bound + 1) { (*x3bound)--; (*y5bound)++; } /* move the other end in, increase x5bound, decrease y3bound note 2: for efficiency,x5bound can stop incrementation when it passes x3bound */ while (((getbase(*x5bound, xpiece) != complement(getbase(*y3bound, ypiece))) | (getbase(*x5bound + 1, xpiece) != complement(getbase(*y3bound - 1, ypiece)))) && *x5bound < *x3bound) { (*x5bound)++; (*y3bound)--; } } /* moveendsin */ /* Local variables for freeenergy: */ struct LOC_freeenergy { piece *xpiece, *ypiece; long x5, y3; /* 5 and 3 prime ends on x and y pieces */ base x5b, x3b; /* the bounds of energy calculation */ double energy; /* the current total energy */ long bulgebases; /* the number of bases involved in a bulge */ boolean bulging; /* flag for continuing a bulge after a single match */ } ; Local Void readofftable(LINK) struct LOC_freeenergy *LINK; { /* create a 4 by 4 table based on table 1 in Turner et al. we know at this point that both base pairs complement, so only one pair is used. */ double deltag; /* temporary curve */ switch (LINK->x5b) { case a: switch (LINK->x3b) { case a: deltag = -0.9; break; case c: deltag = -2.1; break; case g: deltag = -1.7; break; case t: deltag = -0.9; break; } break; case c: switch (LINK->x3b) { case a: deltag = -1.8; break; case c: deltag = -2.9; break; case g: deltag = -2.0; break; case t: deltag = -1.7; break; } break; case g: switch (LINK->x3b) { case a: deltag = -2.3; break; case c: deltag = -3.4; break; case g: deltag = -2.9; break; case t: deltag = -2.1; break; } break; case t: switch (LINK->x3b) { case a: deltag = -1.1; break; case c: deltag = -2.3; break; case g: deltag = -1.8; break; case t: deltag = -0.9; break; } break; } LINK->energy += deltag; } Local Void gupair(LINK) struct LOC_freeenergy *LINK; { /* in the Freier et al paper, one pairing has exceptional energy: gu/ug */ LINK->energy -= 0.4; } Local Void continuebulge(LINK) struct LOC_freeenergy *LINK; { /* the interior loop bulge continues */ LINK->bulgebases += 2; } Local Void beginbulge(LINK) struct LOC_freeenergy *LINK; { /* begin a bulge, of the interior loop kind */ if (!LINK->bulging) { LINK->bulgebases = 2; LINK->bulging = true; } else { /* only a single base matches, so it is to be ignored */ continuebulge(LINK); } } Local Void endbulge(LINK) struct LOC_freeenergy *LINK; { /* close the bulge by recording its energy */ /* perhaps it is only a single match */ if (getbase(LINK->x5 - 1, LINK->xpiece) != complement(getbase(LINK->y3 + 1, LINK->ypiece))) { /* it is really the end of a bulge */ continuebulge(LINK); /* the single match is to be ignored */ return; } if (2 <= LINK->bulgebases && LINK->bulgebases <= 6) LINK->energy += 2.0; else if (7 <= LINK->bulgebases && LINK->bulgebases <= 20) LINK->energy += 3.0; else { LINK->energy += 1.0 + 2.0 * log((double)LINK->bulgebases) / log(10.0); /* bulgebases > 20 */ } LINK->bulging = false; } Static double freeenergy(x5start, x3start, xpiece_, y5start, y3start, ypiece_) long x5start, x3start; piece *xpiece_; long y5start, y3start; piece *ypiece_; { /* evaluate the free energy of the pairing (x3start, y5start) to (x5start, y3start). interior loops are evaluated only for the case where both sides are equal in length. the units of the energy returned are kcalorie. based on Turner et al and tinoco et al nature new biology vol 246 pp 40-41, 1973. internal coordinates (1 to n) are used. gu pairs are eliminated from the ends by calling moveendsin. */ struct LOC_freeenergy V; long x3, y5; base y5b, y3b; /* the bases corresponding to the above */ long x5bound, x3bound, y5bound, y3bound; V.xpiece = xpiece_; V.ypiece = ypiece_; /* test consistency of input */ if (x5start - x3start != y5start - y3start) { printf(" function freeenergy:\n"); printf(" the supplied ends are not consistent with a helix.\n"); halt(); } x5bound = x5start; x3bound = x3start; y5bound = y5start; y3bound = y3start; moveendsin(&x5bound, &x3bound, V.xpiece, &y5bound, &y3bound, V.ypiece); /* set external energy to zero */ V.energy = 0.0; /* no bulges yet */ V.bulging = false; if (x5bound >= x3bound) return V.energy; /* within this if, the range is bounded by base pairs */ x3 = x3bound; y5 = y5bound; V.x5 = x3 - 1; V.y3 = y5 + 1; /* these points (x,y/5,3) now define a dinucleotide pair between x and y. now we must scan the the dinucleotide across to the other bound. */ do { /* convert positons to bases */ V.x5b = getbase(V.x5, V.xpiece); V.x3b = getbase(x3, V.xpiece); y5b = getbase(y5, V.ypiece); y3b = getbase(V.y3, V.ypiece); if ((V.x3b == complement(y5b)) & (V.x5b == complement(y3b))) /* everybody pairs */ readofftable(&V); else if (y5b == g && y3b == t && V.x3b == t && V.x5b == g) /* a special case */ gupair(&V); else if ((V.x5b != complement(y3b)) & (V.x3b != complement(y5b))) /* nobody pairs */ continuebulge(&V); else if (V.x3b == complement(y5b)) /* the first two pair */ beginbulge(&V); else if (V.x5b == complement(y3b)) /* the last two pair */ endbulge(&V); else halt(); x3--; y5++; V.x5--; V.y3++; /*if debugging then writeln(output,energy:4:1); */ } while (V.x5 >= x5bound); return V.energy; } /* freeenergy */ /* end module freeenergy version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module unlimitln */ Static Void unlimitln(afile) _TEXT *afile; { /* this procedure removes a stupid system dependent limit on the number of lines that one can write to a file. you may remove it from the code if your system does not want or need this. suggested method: place comments around the contents of the procedure. */ /* linelimit(afile, maxint); (@ set 'infinite' lines allowed for afile */ } /* end module unlimitln version = 'delmod 6.51 85 apr 17 tds/gds' */ Static Void initialize() { /* initialize the variables of helix */ printf(" helix %4.2f\n", version); brinit(&xbook); brinit(&ybook); unlimitln(&hlist); if (*hlist.name != '\0') { if (hlist.f != NULL) hlist.f = freopen(hlist.name, "w", hlist.f); else hlist.f = fopen(hlist.name, "w"); } else { if (hlist.f != NULL) rewind(hlist.f); else hlist.f = tmpfile(); } if (hlist.f == NULL) _EscIO2(FileNotFound, hlist.name); SETUPBUF(hlist.f, Char); xseq = (piece *)Malloc(sizeof(piece)); yseq = (piece *)Malloc(sizeof(piece)); /* read parameters */ if (*helixp.name != '\0') { if (helixp.f != NULL) helixp.f = freopen(helixp.name, "r", helixp.f); else helixp.f = fopen(helixp.name, "r"); } else rewind(helixp.f); if (helixp.f == NULL) _EscIO2(FileNotFound, helixp.name); RESETBUF(helixp.f, Char); if (BUFEOF(helixp.f)) { /* set defaults */ minlenmaxene = defminlenmaxene; guallowed = defguallowed; givefreeenergy = defgivefreeenergy; } else { fscanf(helixp.f, "%lg%*[^\n]", &minlenmaxene); getc(helixp.f); if (BUFEOF(helixp.f)) { guallowed = defguallowed; givefreeenergy = defgivefreeenergy; } else { if (P_peek(helixp.f) == 'g') guallowed = true; else guallowed = false; fscanf(helixp.f, "%*[^\n]"); getc(helixp.f); if (BUFEOF(helixp.f)) givefreeenergy = defgivefreeenergy; else if (P_peek(helixp.f) == 'e') givefreeenergy = true; else givefreeenergy = false; } } if (minlenmaxene < absminlength && minlenmaxene > 0) { minlenmaxene = absminlength; /* force a reasonable limit */ printf(" warning: the minimum length helix printed is %ld\n", (long)minlenmaxene); } /* put header in hlist */ fprintf(hlist.f, " helix %4.2f\n", version); fprintf(hlist.f, " x: "); copyaline(&xbook, &hlist); fprintf(hlist.f, " y: "); copyaline(&ybook, &hlist); if (minlenmaxene > 0) fprintf(hlist.f, " %ld base pairs is the minimum length helix recorded\n", (long)minlenmaxene); else fprintf(hlist.f, " %4.2f kcal is the maximum energy of a helix recorded\n", minlenmaxene); putc(' ', hlist.f); if (!guallowed) fprintf(hlist.f, "no "); fprintf(hlist.f, "gu pairs allowed\n"); putc(' ', hlist.f); if (!givefreeenergy) fprintf(hlist.f, "no "); fprintf(hlist.f, "energies calculated\n\n"); } /* initialize */ /* note: procedure gethelix in program matrix depends on the forms that nextsequences and hrecord produce. */ Static Void nextsequences(x, y, xnumber, ynumber) piece *x, *y; long xnumber, ynumber; { /* write the new sequence names to hlist */ fprintf(hlist.f, " x piece: %ld %.*s y piece: %ld %.*s\n", xnumber, namelength, x->key.hea.keynam.letters, ynumber, namelength, y->key.hea.keynam.letters); } Static Void hrecord(x5, y3, xsequence, ysequence, length) long x5, y3; piece *xsequence, *ysequence; long length; { /* record the helix whose 5" end on the x sequence is x5 and whose 3" end on the y sequence is y3 that extends some length. note that it is easier to find the helix on a listing if only the 5" ends are listed on sequences. (it is painful to find if one has to remember whether it is 5" or 3" depending on whether it is x or y.) */ long x3, y5; /* the other end of the helix */ double deltag; /* the energy of the helix */ /*if debugging then writeln(hlist,'hrecord: ',length:1);*/ y5 = y3 - length + 1; if (givefreeenergy) { x3 = x5 + length - 1; deltag = freeenergy(x5, x3, xsequence, y5, y3, ysequence); } else deltag = 0.0; if ((minlenmaxene <= 0.0 || length < minlenmaxene) && (minlenmaxene > 0.0 || deltag > minlenmaxene)) return; fprintf(hlist.f, " x5%c: %6ld y5%c: %6ld length: %3ld", prime, inttopie(x5, xsequence), prime, inttopie(y5, ysequence), length); if (givefreeenergy) fprintf(hlist.f, " %7.2f kcal", deltag); putc('\n', hlist.f); } /* hrecord */ Static Void discani(xmin, ymin, x, y, xmax, ymax, x0, endofdiagonal, endofscan) long xmin, ymin, *x, *y, xmax, ymax, *x0; boolean *endofdiagonal, *endofscan; { /* minimum values */ /* current values */ /* maximum values */ /* the starting x for a diagonal */ /* this procedure initializes the variables for discans, which then steps (x,y) along a diagonal of a matrix. */ boolean xdone; /* true when x reaches the side */ boolean ydone; /* true when y reaches the top */ *x0 = xmin; *x = *x0; *y = ymin; xdone = (*x == xmin); ydone = (*y == ymax); *endofdiagonal = (xdone || ydone); *endofscan = (*x == xmax && ydone); } Static Void discans(xmin, ymin, x, y, xmax, ymax, x0, endofdiagonal, endofscan) long xmin, ymin, *x, *y, xmax, ymax, *x0; boolean *endofdiagonal, *endofscan; { /* minimum values */ /* current values */ /* maximum values */ /* the starting x for a diagonal */ /* this procedure makes it easy to scan a matrix on the diagonal. in the cartesian rectangle (xmin,ymin) to (xmax,ymax) diagonals of slope -1 are scanned, by decreasing x and increasing y. diagonals closest to the origin are scanned first. to use: call discani to initialize the variables. call discans to step (x,y) along the diagonal. flags: endofdiagonal is true when the top or side of the rectangle has just been reached. the next values of x and y will jump. endofscan indicates that the last position has been reached. */ boolean xdone; /* true when x reaches the side */ boolean ydone; /* true when y reaches the top */ if (*endofdiagonal) { if (*endofscan) /* protection */ halt(); (*x0)++; /* move to the next diagonal */ if (*x0 <= xmax) { *x = *x0; *y = ymin; } else { *x = xmax; *y = ymin + *x0 - xmax; } } else { (*x)--; (*y)++; } xdone = (*x == xmin); ydone = (*y == ymax); *endofdiagonal = (xdone || ydone); *endofscan = (*x == xmax && ydone); /* ;if xdone then writeln(output,'xdone'); if ydone then writeln(output,'ydone'); if endofdiagonal then writeln(output,'endofdiagonal'); if endofscan then writeln(output,'endofscan'); writeln(output,'x,y',x:1,y:1); */ } /* Local variables for scan: */ struct LOC_scan { piece *xseq, *yseq; boolean guallowed; long x, y; /* a matching of xseq to yseq */ long length; /* current length of a helix */ boolean eod; } ; Local Void terminate(x, y, LINK) long x, y; struct LOC_scan *LINK; { /* stop the helix extension, possibly recording its end at (x,y) */ /*if debugging then writeln(hlist,'terminate: length= ',length:1, */ /* 'absminlength = ',absminlength:1);*/ if (LINK->length >= absminlength) hrecord(x, y, LINK->xseq, LINK->yseq, LINK->length); LINK->length = 0; } /* terminate */ Local Void look(LINK) struct LOC_scan *LINK; { /* look at the pairing at (x,y) */ if (!basepair(LINK->xseq, LINK->yseq, LINK->x, LINK->y, LINK->guallowed)) { /* extend the helix */ terminate(LINK->x + 1, LINK->y - 1, LINK); return; } LINK->length++; if (LINK->eod) terminate(LINK->x, LINK->y, LINK); } Static Void scan(xseq_, yseq_, minlenmaxene, guallowed_) piece *xseq_, *yseq_; double minlenmaxene; boolean guallowed_; { /* scan two sequences xseq and yseq for complementarity. their lengths are xlength and ylength. the minimum length exact helix to record is minlenmaxene. gu base pairs are allowed if guallowed is true. the helix is recorded by a procedure external to this one: hrecord(x,y,xseq,yseq,length), where (x,y) is the 5" end of the helix on xseq and length is the number of bases matched. the sequences scanned are recorded by procedure newsequences, also, external. the last external procedure determines whether two bases can base pair. */ struct LOC_scan V; long xlength, ylength; /* lengths of the sequences */ long x0; /* used by discan */ boolean eos; /* used by discan */ V.xseq = xseq_; V.yseq = yseq_; V.guallowed = guallowed_; nextsequences(V.xseq, V.yseq, xnumber, ynumber); xlength = piecelength(V.xseq); ylength = piecelength(V.yseq); V.length = 0; discani(1L, 1L, &V.x, &V.y, xlength, ylength, &x0, &V.eod, &eos); look(&V); while (!eos) { discans(1L, 1L, &V.x, &V.y, xlength, ylength, &x0, &V.eod, &eos); look(&V); } } main(argc, argv) int argc; Char *argv[]; { PASCAL_MAIN(argc, argv); if (setjmp(_JL1)) goto _L1; helixp.f = NULL; strcpy(helixp.name, "helixp"); hlist.f = NULL; strcpy(hlist.name, "hlist"); ybook.f = NULL; strcpy(ybook.name, "ybook"); xbook.f = NULL; strcpy(xbook.name, "xbook"); initialize(); while (!BUFEOF(xbook.f)) { getpiece(&xbook, &xseq); xnumber = number; if (BUFEOF(xbook.f)) break; if (*ybook.name != '\0') { if (ybook.f != NULL) ybook.f = freopen(ybook.name, "r", ybook.f); else ybook.f = fopen(ybook.name, "r"); } else rewind(ybook.f); if (ybook.f == NULL) _EscIO2(FileNotFound, ybook.name); RESETBUF(ybook.f, Char); while (!BUFEOF(ybook.f)) { getpiece(&ybook, &yseq); ynumber = number; if (BUFEOF(ybook.f)) break; scan(xseq, yseq, minlenmaxene, guallowed); clearpiece(&yseq); } clearpiece(&xseq); } _L1: if (xbook.f != NULL) fclose(xbook.f); if (ybook.f != NULL) fclose(ybook.f); if (hlist.f != NULL) fclose(hlist.f); if (helixp.f != NULL) fclose(helixp.f); exit(EXIT_SUCCESS); } /* helix */ /* End. */