/* Output from p2c 1.21alpha-07.Dec.93, the Pascal-to-C translator */ /* From input file "comp.p" */ #include /* determine the oligonucleotide composition of a book. by Gary Stormo and Tom Schneider module libraries required: delman, delmods, auxmods */ /* end of program */ /* begin module version */ #define version 5.27 /* of comp.p 1999 Oct 13 1999 Oct 13: 5.27: added E. coli composition example. previous changes: 1994 Sep 5 origin before 1988 oct 10 */ /* end module version */ /* begin module describe.comp */ /* name comp: determine the composition of a book. synopsis comp(book: in, cmp: out, compp: in, output: out) files book: the sequences; cmp: the composition, determined for mononucleotides up to oligonucleotides of length "compmax", see file compp; compp: parameter file used to set the length of the oligonucleotides for which the composition is to be determined ("compmax"); that number must be the first thing in the file; if the file is empty compmax is set by default to the constant "defcompmax"; output: for messages to the user. description Comp counts the number of each oligonucleotide (from length 1 to compmax) in the book and prints that to file "cmp". The output is printed in order of increasing length of oligonucleotide (i.e., first the monos, then the dis, ...). If there are no occurences of an oligonucleotide, but its one-shorter parent did occur, it will be given a zero. None of its descendants will be printed in the composition file. examples As an example of the output format, the composition to depth 3 of E. coli (U00096, 16-OCT-1997) is: comp 5.27: composition of * 1999/05/04 14:41:13, 1999/05/04 14:38:08, dbbk 3.33 3 is the longest oligo counted * 0-long oligos (the total number of bases) 4639221 * 1-long oligos a 1142136 c 1179433 g 1176775 t 1140877 * 2-long oligos aa 337835 ac 256658 ag 237851 at 309792 ca 325118 cc 271649 cg 346636 ct 236029 ga 267234 gc 383865 gg 270083 gt 255593 ta 211948 tc 267261 tg 322205 tt 339463 * 3-long oligos aaa 108901 aac 82578 aag 63364 aat 82992 aca 58633 acc 74899 acg 73263 act 49863 aga 56618 agc 80848 agg 50611 agt 49774 ata 63692 atc 86476 atg 76229 att 83395 caa 76607 cac 66752 cag 104785 cat 76974 cca 86442 ccc 47764 ccg 87031 cct 50412 cga 70934 cgc 115673 cgg 86870 cgt 73159 cta 26762 ctc 42714 ctg 102900 ctt 63653 gaa 83490 gac 54737 gag 42460 gat 86547 gca 96010 gcc 92961 gcg 114609 gct 80285 gga 56199 ggc 92123 ggg 47470 ggt 74291 gta 52670 gtc 54225 gtg 66108 gtt 82590 taa 68837 tac 52591 tag 27241 tat 63279 tca 84033 tcc 56025 tcg 71733 tct 55469 tga 83483 tgc 95221 tgg 85132 tgt 58369 tta 68824 ttc 83846 ttg 76968 ttt 109825 see also compan.p, histan.p, markov.p authors Gary Stormo and Tom Schneider bugs none known technical note The algorithm is an interesting application of linked lists. The composition is stored as a tree, and a number of "spiders" climb the tree during its construction. */ /* end module describe.comp */ /* begin module comp.const */ #define defcompmax 2 /* default value for variable compmax */ /* end module comp.const */ /* begin module book.const */ /* constants needed for book manipulations */ #define dnamax 3000 /* length of dna arrays */ #define namelength 20 /* maximum key name length */ #define linelength 80 /* maximum line readable in book */ /* end module book.const version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.type */ /* types needed for book manipulations */ typedef long chset[5]; /* types defined in book definition */ typedef Char alpha[namelength]; /* this is not alfa */ /* name is a left justified string with blanks following the characters */ typedef struct name { alpha letters; /* zero means an unspecified structure */ char length; } name; typedef struct line { /* a line of characters */ Char letters[linelength]; char length; struct line *next; } line; typedef enum { plus, minus, dircomplement, dirhomologous } direction; typedef enum { linear, circular } configuration; typedef enum { on, off } state; typedef struct header { /* header of key */ name keynam; /* key name of structure */ line *fulnam; /* full name of structure */ /* note key */ line *note; } header; /* base types */ typedef enum { a, c, g, t } base; typedef short dnarange; /* p2c: comp.p, line 153: * Note: Field width for seq assumes enum base has 4 elements [105] */ typedef uchar seq[(dnamax + 3) / 4]; typedef struct dnastring { seq part; dnarange length; struct dnastring *next; } dnastring; typedef struct orgkey { /* organism key */ header hea; /* genetic map units */ line *mapunit; } orgkey; typedef struct chrkey { /* chromosome key */ header hea; double mapbeg; /* number of genetic map beginning */ /* number of genetic map ending */ double mapend; } chrkey; typedef struct piekey { /* piece key */ header hea; double mapbeg; /* genetic map beginning */ configuration coocon; /* configruation (circular/linear) */ direction coodir; /* direction (+/-) relative to genetic map */ long coobeg; /* beginning nucleotide */ long cooend; /* ending nucleotide */ configuration piecon; /* configruation (circular/linear) */ direction piedir; /* direction (+/-) relative to coordinates */ long piebeg; /* beginning nucleotide */ long pieend; /* ending nucleotide */ } piekey; typedef struct piece { piekey key; dnastring *dna; } piece; typedef struct reference { name pienam; /* name of piece referred to */ double mapbeg; /* genetic map beginning */ direction refdir; /* direction relative to coordinates */ long refbeg; /* beginning nucleotide */ long refend; /* ending nucleotide */ } reference; typedef struct genkey { /* gene key */ header hea; reference ref; } genkey; typedef struct trakey { /* transcript key */ header hea; reference ref; } trakey; typedef struct markey { /* marker key */ header hea; reference ref; state sta; line *phenotype; struct marker *next; } markey; typedef struct marker { markey key; dnastring *dna; } marker; /* end module book.type version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module comp.type */ /* the composition is stored in a tree of these nodes */ /* points to a node of the tree */ typedef struct compnode { /* a node of the composition tree */ long count; /* the number of oligos for this node */ struct compnode *son[4]; /* the pointers to 'descendants' of this node of the tree */ } compnode; /* spiders are used to make the composition tree */ /* points to a 'spider' */ typedef struct spider { /* a spider climbs the composition tree, its path determined by the sequences, and increments 'count' at all the nodes it passes, thereby determining the composition */ long depth; /* the level of the node now at */ compnode *place; /* a pointer to the current node */ struct spider *next; /* the next spider in the collection */ } spider; /* the total number of composition entries at a given level is stored in the linked list of type comptotal */ typedef struct comptotal { long count; /* the number at a given level */ struct comptotal *next; /* pointer to the next level totals */ } comptotal; /* 'path' is used in printing the tree */ /* pointer into a path on the tree */ typedef struct path { /* the path of bases to get to a particular node */ base bas; struct path *next; } path; /* end module comp.type version = 'auxmod 1.37 85 apr 4 gds/tds'; */ Static _TEXT book, cmp, compp; Static long compmax; /* the longest oligonucleotide for which the composition is determined */ Static piece *pie; /* the piece currently being used */ Static compnode *root; /* the 'root' of the composition tree */ Static spider *firstspider; /* the first in the string of spiders */ /* begin module book.var */ /* ************************************************************************ */ /* global variables needed for book manipulations */ /* free storage: */ Static line *freeline; /* unused lines */ Static dnastring *freedna; /* unused dnas */ Static boolean readnumber; /* whether to read a number from the notes, or to read in the notes */ Static long number; /* the number of the item just read */ Static boolean numbered; /* true when the item just read is numbered */ Static boolean skipunnum; Static jmp_buf _JL1; /* a control variable to allow skipping of un-numbered items in the book */ /* ************************************************************************ */ /* end module book.var version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module halt */ Static Void halt() { /* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. */ printf(" program halt.\n"); longjmp(_JL1, 1); } /* end module halt version = 'auxmod 1.37 85 apr 4 gds/tds'; */ /* begin module copyaline */ Static Void copyaline(fin, fout) _TEXT *fin, *fout; { /* copy a line from file fin to file fout */ while (!P_eoln(fin->f)) { putc(P_peek(fin->f), fout->f); getc(fin->f); } fscanf(fin->f, "%*[^\n]"); getc(fin->f); putc('\n', fout->f); } /* copyaline */ /* end module copyaline version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module package.getpiece */ /* ************************************************************************ */ /* begin module package.brpiece */ /* ************************************************************************ */ /* begin module book.basis */ /* procedures needed for book manipulations */ /* get procedures should be used for all linked lists of records */ Static Void getline(l) line **l; { /* obtain a line from the free line list or by making a new one */ if (freeline != NULL) { *l = freeline; freeline = freeline->next; } else *l = (line *)Malloc(sizeof(line)); (*l)->length = 0; (*l)->next = NULL; } Static Void getdna(l) dnastring **l; { if (freedna != NULL) { *l = freedna; freedna = freedna->next; } else *l = (dnastring *)Malloc(sizeof(dnastring)); (*l)->length = 0; (*l)->next = NULL; } /* clear procedures should be called each time the records are no longer needed failure to do this may result in a stack overflow. */ Static Void clearline(l) line **l; { /* return a line to the free line list */ line *lptr; if (*l == NULL) return; lptr = *l; *l = (*l)->next; lptr->next = freeline; freeline = lptr; } Static Void cleardna(l) dnastring **l; { dnastring *lptr; if (*l == NULL) return; lptr = *l; *l = (*l)->next; lptr->next = freedna; freedna = lptr; } Static Void clearheader(h) header *h; { /* clear the header h (remove lines to free storage) */ clearline(&h->fulnam); while (h->note != NULL) clearline(&h->note); } Static Void clearpiece(p) piece **p; { /* clear the dna of the piece */ while ((*p)->dna != NULL) cleardna(&(*p)->dna); clearheader(&(*p)->key.hea); } Static base chartobase(ch) Char ch; { /* convert a character into a base */ base Result; switch (ch) { case 'a': Result = a; break; case 'c': Result = c; break; case 'g': Result = g; break; case 't': Result = t; break; } return Result; } Static Char basetochar(ba) base ba; { /* convert a base into a character */ Char Result; switch (ba) { case a: Result = 'a'; break; case c: Result = 'c'; break; case g: Result = 'g'; break; case t: Result = 't'; break; } return Result; } Static base complement(ba) base ba; { /* take the complement of ba */ base Result; switch (ba) { case a: Result = t; break; case c: Result = g; break; case g: Result = c; break; case t: Result = a; break; } return Result; } Static long pietoint(p, pie) long p; piece *pie; { /* p is a coordinate on the piece. we want to transform p into a number from 1 to n: an internal coordinate system for easy manipulation of piece coordinates */ long i; /* an intermediate value */ piekey *WITH; WITH = &pie->key; switch (WITH->piedir) { case plus: if (p >= WITH->piebeg) i = p - WITH->piebeg + 1; else i = p - WITH->coobeg + WITH->cooend - WITH->piebeg + 2; break; case minus: if (p <= WITH->piebeg) i = WITH->piebeg - p + 1; else i = WITH->cooend - p + WITH->piebeg - WITH->coobeg + 2; break; } return i; } Static long inttopie(i, pie) long i; piece *pie; { /* i is in the range 1 to some maximum. it is an internal coordinate system for the program. we want to do a coordinate transformation to obtain a value in the range of the piece called pie: i=1 corresponds to piebeg and i=its maximum corresponds to pieend */ long p; /* an intermediate value */ piekey *WITH; WITH = &pie->key; switch (WITH->piedir) { case plus: p = WITH->piebeg + i - 1; if (p > WITH->cooend) { if (WITH->coocon == circular) p += WITH->coobeg - WITH->cooend - 1; } break; case minus: p = WITH->piebeg - i + 1; if (p < WITH->coobeg) { if (WITH->coocon == circular) p += WITH->cooend - WITH->coobeg + 1; } break; } return p; } Static long piecelength(pie) piece *pie; { /* return the length of the dna in pie */ return (pietoint(pie->key.pieend, pie)); } /* end module book.basis version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.getto */ Static Char getto(thefile, ch) _TEXT *thefile; long *ch; { /* search the file for a character in the first line which is a member of the set ch. */ Char achar = ' '; while ((!P_inset(achar, ch)) & (!BUFEOF(thefile->f))) { fscanf(thefile->f, "%c%*[^\n]", &achar); getc(thefile->f); if (achar == '\n') achar = ' '; } if (P_inset(achar, ch)) return achar; else return ' '; } /* end module book.getto version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.skipstar */ Static Void skipstar(thefile) _TEXT *thefile; { /* skip start of line (or star = '*'). */ if (P_peek(thefile->f) != '*') { printf(" procedure skipstar: bad book\n"); printf(" \"*\" expected as first character on the line, but \"%c\" was found\n", P_peek(thefile->f)); halt(); } getc(thefile->f); /* skip the star */ if (P_peek(thefile->f) != ' ') { /* skip the blank */ printf(" procedure skipstar: bad book\n"); printf(" \"* \" expected on a line but \"*%c\" was found\n", P_peek(thefile->f)); halt(); } getc(thefile->f); } /* skipstar */ /* end module book.skipstar version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brreanum */ Static Void brreanum(thefile, reanum) _TEXT *thefile; double *reanum; { /* read a real number from the file */ skipstar(thefile); fscanf(thefile->f, "%lg%*[^\n]", reanum); getc(thefile->f); } /* end module book.brreanum version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brnumber */ Static Void brnumber(thefile, num) _TEXT *thefile; long *num; { /* read a number from the file */ skipstar(thefile); fscanf(thefile->f, "%ld%*[^\n]", num); getc(thefile->f); } /* end module book.brnumber version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brname */ Static Void brname(thefile, nam) _TEXT *thefile; name *nam; { /* read a name from the file */ long i; /* an index to the name */ Char c_; /* a character read */ skipstar(thefile); nam->length = 0; do { nam->length++; c_ = getc(thefile->f); if (c_ == '\n') c_ = ' '; nam->letters[nam->length - 1] = c_; } while (!(P_eoln(thefile->f) || nam->length >= namelength || nam->letters[nam->length - 1] == ' ')); if (nam->letters[nam->length - 1] == ' ') nam->length--; if (nam->length < namelength) { for (i = nam->length; i < namelength; i++) nam->letters[i] = ' '; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } /* brname */ /* end module book.brname version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brline */ Static Void brline(thefile, l) _TEXT *thefile; line **l; { /* read a line from the file */ long i = 0; long j; Char acharacter; long FORLIM; skipstar(thefile); while (!P_eoln(thefile->f)) { i++; acharacter = getc(thefile->f); if (acharacter == '\n') acharacter = ' '; (*l)->letters[i-1] = acharacter; } if (i < (*l)->length) { FORLIM = (*l)->length; for (j = i; j < FORLIM; j++) (*l)->letters[j] = ' '; } (*l)->length = i; (*l)->next = NULL; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } /* end module book.brline version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brdirect */ Static Void brdirect(thefile, direct) _TEXT *thefile; direction *direct; { /* read a direction */ Char ch; skipstar(thefile); fscanf(thefile->f, "%c%*[^\n]", &ch); getc(thefile->f); if (ch == '\n') ch = ' '; if (ch == '+') *direct = plus; else *direct = minus; } /* end module book.brdirect version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brconfig */ Static Void brconfig(thefile, config) _TEXT *thefile; configuration *config; { /* read a configuration */ Char ch; skipstar(thefile); fscanf(thefile->f, "%c%*[^\n]", &ch); getc(thefile->f); if (ch == '\n') ch = ' '; if (ch == 'l') *config = linear; else *config = circular; } /* end module book.brconfig version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brnotenumber */ Static Void brnotenumber(thefile, note) _TEXT *thefile; line **note; { /* book note reading to obtain the number of the object. the procedure returns the value of the number as a global. (this is not such a good practice, but we are stuck with it for now.) */ *note = NULL; numbered = false; number = 0; /* force number to zero if there is no number at all */ /* the next character is n or * depending on whether there are notes */ if (P_peek(thefile->f) != 'n') return; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); if (P_peek(thefile->f) == 'n') { fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); return; } skipstar(thefile); if (!P_eoln(thefile->f)) { if (P_peek(thefile->f) == '#') { numbered = true; getc(thefile->f); /* move past the number symbol */ fscanf(thefile->f, "%ld", &number); } } do { fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } while (P_peek(thefile->f) != 'n'); fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } /* brnotenumber */ /* end module book.brnotenumber version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brnote */ Static Void brnote(thefile, note) _TEXT *thefile; line **note; { /* read note key */ line *newnote; /* the new note */ line *previousnote; /* the last line of the notes */ *note = NULL; if (P_peek(thefile->f) != 'n') /* enter note */ return; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); if (P_peek(thefile->f) == 'n') { /* abort null note (n/n) */ fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); return; } getline(note); newnote = *note; while (P_peek(thefile->f) != 'n') { /* wait until end of note */ brline(thefile, &newnote); previousnote = newnote; /* get next note */ getline(&newnote->next); newnote = newnote->next; } /* last note was not used, so: */ clearline(&newnote); previousnote->next = NULL; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } /* brnote */ /* end module book.brnote version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brheader */ Static Void brheader(thefile, hea) _TEXT *thefile; header *hea; { /* read the header of a key. */ /* read key name */ brname(thefile, &hea->keynam); /* read full name */ getline(&hea->fulnam); brline(thefile, &hea->fulnam); /* read note key */ if (readnumber) brnotenumber(thefile, &hea->note); else brnote(thefile, &hea->note); } /* end module book.brheader version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brpiekey */ Static Void brpiekey(thefile, pie) _TEXT *thefile; piekey *pie; { /* read piece key */ brheader(thefile, &pie->hea); brreanum(thefile, &pie->mapbeg); brconfig(thefile, &pie->coocon); brdirect(thefile, &pie->coodir); brnumber(thefile, &pie->coobeg); brnumber(thefile, &pie->cooend); brconfig(thefile, &pie->piecon); brdirect(thefile, &pie->piedir); brnumber(thefile, &pie->piebeg); brnumber(thefile, &pie->pieend); } /* end module book.brpiekey version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brdna */ Static Void brdna(thefile, dna) _TEXT *thefile; dnastring **dna; { /* read in dna from thefile */ /* note: if the dna were circularized, by linking the last dnastring to the first, then the cleardna routine could not clear properly, and would loop forever... there is no reason to do that, since a simple mod function will allow one to access the circle. */ Char ch; dnastring *workdna; long SET[5]; long TEMP; getdna(dna); workdna = *dna; ch = getto(thefile, P_addset(P_expset(SET, 0L), 'd')); ch = getc(thefile->f); /* skipstar */ if (ch == '\n') ch = ' '; while (ch == '*') { ch = getc(thefile->f); /* skip blank */ if (ch == '\n') ch = ' '; do { ch = getc(thefile->f); if (ch == '\n') ch = ' '; if (ch == 't' || ch == 'g' || ch == 'c' || ch == 'a') { if (workdna->length == dnamax) { getdna(&workdna->next); workdna = workdna->next; } workdna->length++; TEMP = workdna->length - 1; P_clrbits_B(workdna->part, TEMP, 1, 3); P_putbits_UB(workdna->part, TEMP, (int)chartobase(ch), 1, 3); } } while (!P_eoln(thefile->f)); fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); /* go to next line */ ch = getc(thefile->f); /* ch is either '*' or 'd' */ if (ch == '\n') ch = ' '; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } /* end module book.brdna version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brpiece */ Static Void brpiece(thefile, pie) _TEXT *thefile; piece **pie; { /* read in a piece */ brpiekey(thefile, &(*pie)->key); if (numbered || !skipunnum) brdna(thefile, &(*pie)->dna); } /* end module book.brpiece version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.brinit */ Static Void brinit(book) _TEXT *book; { /* check that the book is ok to read, and set up the global variables for br routines */ /* halt if the book is bad (first word is 'halt') or the first character is not * */ if (*book->name != '\0') { if (book->f != NULL) book->f = freopen(book->name, "r", book->f); else book->f = fopen(book->name, "r"); } else rewind(book->f); if (book->f == NULL) _EscIO2(FileNotFound, book->name); RESETBUF(book->f, Char); if (!BUFEOF(book->f)) { /* check for the date line */ if (P_peek(book->f) != '*') { if (P_peek(book->f) != 'h') printf(" this is not the first line of a book:\n"); else printf(" bad book:\n"); putchar(' '); while (!(P_eoln(book->f) | BUFEOF(book->f))) { putchar(P_peek(book->f)); getc(book->f); } putchar('\n'); halt(); } } else { printf(" book is empty\n"); halt(); } /* initialize free storage */ freeline = NULL; freedna = NULL; readnumber = true; /* usually we read in numbers for items */ number = 0; /* arbitrary value */ numbered = false; /* the piece has no number (none yet read in) */ skipunnum = false; } /* brinit */ /* end module book.brinit version = 'delmod 6.51 85 apr 17 tds/gds' */ /* ************************************************************************ */ /* end module package.brpiece version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.getpiece */ Static Void getpiece(thefile, pie) _TEXT *thefile; piece **pie; { /* move to and read in the next piece in the book */ Char ch; long SET[5]; ch = getto(thefile, P_addset(P_expset(SET, 0L), 'p')); /* get to the next p(iece) in the book */ if (ch != ' ') { brpiece(thefile, pie); ch = getto(thefile, P_addset(P_expset(SET, 0L), 'p')); /* read past closing p */ } } /* end module book.getpiece version = 'delmod 6.51 85 apr 17 tds/gds' */ /* ************************************************************************ */ /* end module package.getpiece version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module book.stepbase */ Static base stepbase(startdna, dna, d) dnastring *startdna, **dna; dnarange *d; { /* advance d by one base in dna and then return the base at the new d. (this means that one should initialize d to zero) if we go past the last base, we restart at startdna. note: d is not the number of the base... it is used as a record for stepbase. do not mess with it, and do not use it to find out what base you are on. use a separate counter. */ long TEMP; if (*d != dnamax && *d != (*dna)->length) { (*d)++; TEMP = *d - 1; return ((base)P_getbits_UB((*dna)->part, TEMP, 1, 3)); } *d = 1; *dna = (*dna)->next; if (*dna == NULL) *dna = startdna; TEMP = *d - 1; return ((base)P_getbits_UB((*dna)->part, TEMP, 1, 3)); } /* end module book.stepbase version = 'delmod 6.51 85 apr 17 tds/gds' */ /***********************************************************************/ Static Void makespiders() { /* make as many spiders as there are levels to the composition tree, which is 'compmax', and leave firstspider pointing to the beginning of the string */ long i; /* an index of the 'for' loop */ spider *aspider; long FORLIM; firstspider = (spider *)Malloc(sizeof(spider)); firstspider->depth = 0; firstspider->place = root; firstspider->next = NULL; FORLIM = compmax; for (i = 1; i < FORLIM; i++) { aspider = (spider *)Malloc(sizeof(spider)); aspider->next = firstspider; firstspider = aspider; } } Static Void initialize() { base ba; printf("comp %4.2f\n", version); brinit(&book); if (*cmp.name != '\0') { if (cmp.f != NULL) cmp.f = freopen(cmp.name, "w", cmp.f); else cmp.f = fopen(cmp.name, "w"); } else { if (cmp.f != NULL) rewind(cmp.f); else cmp.f = tmpfile(); } if (cmp.f == NULL) _EscIO2(FileNotFound, cmp.name); SETUPBUF(cmp.f, Char); fprintf(cmp.f, " comp %4.2f: composition of \n", version); putc(' ', cmp.f); copyaline(&book, &cmp); /* get the parameter 'compmax' from compp or set to default */ if (*compp.name != '\0') { if (compp.f != NULL) compp.f = freopen(compp.name, "r", compp.f); else compp.f = fopen(compp.name, "r"); } else rewind(compp.f); if (compp.f == NULL) _EscIO2(FileNotFound, compp.name); RESETBUF(compp.f, Char); if (!BUFEOF(compp.f)) { fscanf(compp.f, "%ld%*[^\n]", &compmax); getc(compp.f); } else compmax = defcompmax; if (compmax <= 0) { printf(" composition maximum must be positive\n"); halt(); } fprintf(cmp.f, " %ld is the longest oligo counted\n\n", compmax); /* build the root of the tree */ root = (compnode *)Malloc(sizeof(compnode)); root->count = 0; for (ba = a; (long)ba <= (long)t; ba = (base)((long)ba + 1)) root->son[(long)ba] = NULL; makespiders(); pie = (piece *)Malloc(sizeof(piece)); } Static Void increment(s, ba) spider **s; base *ba; { /* the 'ba'-son of the node pointed to by the spider s has its count incremented, or is created if it does not exist */ base bai; spider *WITH; compnode *WITH1; WITH = *s; if (WITH->depth == 0) WITH->place->count++; WITH->depth++; if (WITH->place->son[(long)(*ba)] == NULL) { WITH->place->son[(long)(*ba)] = (compnode *)Malloc(sizeof(compnode)); WITH->place = WITH->place->son[(long)(*ba)]; WITH1 = WITH->place; WITH1->count = 1; for (bai = a; (long)bai <= (long)t; bai = (base)((long)bai + 1)) WITH1->son[(long)bai] = NULL; } else { WITH->place = WITH->place->son[(long)(*ba)]; WITH->place->count++; } if (WITH->depth == compmax) { WITH->depth = 0; WITH->place = root; } } Static Void maketree(pie, root, firstspider) piece *pie; compnode *root; spider *firstspider; { /* increment the counts of the nodes of the tree (starting at the root) according to the sequence in piece pie, using the spiders */ spider *active; long i, j; /* indices for loops */ long length; /* length of the sequence */ dnarange pos = 0; /* position of previous base in the sequence */ dnastring *thedna; /* the internal current dna string */ base ba; long FORLIM, FORLIM1; thedna = pie->dna; active = firstspider; FORLIM = compmax; /* point all spiders to root */ for (i = 1; i <= FORLIM; i++) { active->depth = 0; active->place = root; active = active->next; } length = piecelength(pie); if (length >= compmax) { /* this is the usual case */ FORLIM = compmax; /* get started */ for (i = 1; i <= FORLIM; i++) { active = firstspider; ba = stepbase(pie->dna, &thedna, &pos); for (j = 1; j <= i; j++) { increment(&active, &ba); active = active->next; } } /* keep rolling through the sequence */ for (i = compmax + 1; i <= length; i++) { active = firstspider; ba = stepbase(pie->dna, &thedna, &pos); FORLIM1 = compmax; for (j = 1; j <= FORLIM1; j++) { increment(&active, &ba); active = active->next; } } if (pie->key.piecon != circular) return; FORLIM = length + compmax; /* handle the case of a circular sequence */ for (i = length + 1; i < FORLIM; i++) { active = firstspider; ba = stepbase(pie->dna, &thedna, &pos); FORLIM1 = compmax; for (j = 1; j <= FORLIM1; j++) { if (active->depth >= i - length) increment(&active, &ba); active = active->next; } } return; } for (i = 1; i <= length; i++) { active = firstspider; ba = stepbase(pie->dna, &thedna, &pos); for (j = 1; j <= i; j++) { increment(&active, &ba); active = active->next; } } if (pie->key.piecon != circular) return; FORLIM = compmax; for (i = length + 1; i <= FORLIM; i++) { active = firstspider; ba = stepbase(pie->dna, &thedna, &pos); FORLIM1 = compmax; for (j = 1; j <= FORLIM1; j++) { if (active->depth >= i - length) increment(&active, &ba); active = active->next; /* this is the unusual case of compmax > piecelength */ } } } /* Local variables for writetree: */ struct LOC_writetree { _TEXT *thefile; path *start; /* of the paths through the tree */ long printdepth; /* the depth of the tree currently being printed */ } ; /* Local variables for advancetonode: */ struct LOC_advancetonode { struct LOC_writetree *LINK; long depth; } ; Local Void writecomp(point, count, LINK) path *point; long count; struct LOC_advancetonode *LINK; { /* write the path to the node, recorded in the string beginning with start */ long i, FORLIM; fprintf(LINK->LINK->thefile->f, "%6c", ' '); /* write the oligo for this node */ fputc(basetochar(point->bas), LINK->LINK->thefile->f); FORLIM = LINK->depth; for (i = 2; i <= FORLIM; i++) { point = point->next; fputc(basetochar(point->bas), LINK->LINK->thefile->f); } fprintf(LINK->LINK->thefile->f, " %*ld", (int)(10 - LINK->depth), count); if (point->bas == t) putc('\n', LINK->LINK->thefile->f); } Local Void advancetonode(depth_, thenode, point, LINK) long depth_; compnode *thenode; path *point; struct LOC_writetree *LINK; { /* this procedure recursively calls itself until it gets to the depth that is currently being printed, and writes the count at that node to the composition file. if a nil branch is found at the printing level a zero is the output. */ struct LOC_advancetonode V; base ba; V.LINK = LINK; V.depth = depth_; if (V.depth == LINK->printdepth) { if (thenode != NULL) writecomp(LINK->start, thenode->count, &V); else writecomp(LINK->start, 0L, &V); return; } if (thenode == NULL) return; for (ba = a; (long)ba <= (long)t; ba = (base)((long)ba + 1)) { point->bas = ba; switch (ba) { case a: advancetonode(V.depth + 1, thenode->son[(long)a], point->next, LINK); break; case c: advancetonode(V.depth + 1, thenode->son[(long)c], point->next, LINK); break; case g: advancetonode(V.depth + 1, thenode->son[(long)g], point->next, LINK); break; case t: advancetonode(V.depth + 1, thenode->son[(long)t], point->next, LINK); break; } } } /* begin module comp.writecomp */ Static Void writetree(thefile_, root) _TEXT *thefile_; compnode *root; { /* start at the root of the tree and write out the paths to all the nodes and all the node counts. write a zero to the empty nodes that occur before the depth of compmax */ struct LOC_writetree V; path *point; /* some point on the tree path */ long i, FORLIM; /* make the string of path pointers */ V.thefile = thefile_; V.start = (path *)Malloc(sizeof(path)); V.start->next = NULL; FORLIM = compmax; for (i = 1; i < FORLIM; i++) { point = (path *)Malloc(sizeof(path)); point->next = V.start; V.start = point; } /* write the total number of bases */ fprintf(V.thefile->f, " *\n"); fprintf(V.thefile->f, " 0-long oligos (the total number of bases)\n"); fprintf(V.thefile->f, " %16ld\n", root->count); FORLIM = compmax; /* print out each node of the tree, breadth first */ for (V.printdepth = 1; V.printdepth <= FORLIM; V.printdepth++) { fprintf(V.thefile->f, " *\n"); fprintf(V.thefile->f, " %ld-long oligos\n", V.printdepth); advancetonode(0L, root, V.start, &V); } } /* end module comp.writecomp */ main(argc, argv) int argc; Char *argv[]; { PASCAL_MAIN(argc, argv); if (setjmp(_JL1)) goto _L1; compp.f = NULL; strcpy(compp.name, "compp"); cmp.f = NULL; strcpy(cmp.name, "cmp"); book.f = NULL; strcpy(book.name, "book"); initialize(); while (!BUFEOF(book.f)) { getpiece(&book, &pie); if (BUFEOF(book.f)) break; maketree(pie, root, firstspider); clearpiece(&pie); } writetree(&cmp, root); _L1: if (book.f != NULL) fclose(book.f); if (cmp.f != NULL) fclose(cmp.f); if (compp.f != NULL) fclose(compp.f); exit(EXIT_SUCCESS); } /* End. */