/* Output from p2c 1.21alpha-07.Dec.93, the Pascal-to-C translator */ /* From input file "coda.p" */ #include /* coda: composition file to data for genhis by thomas dana schneider, copyright 1986 module libraries required: delman, delmods, auxmods */ /* end of program */ /* begin module version */ #define version 2.05 /* of coda, 1994 Sep 5 origin 1985 apr 20 */ /* end module version */ /* begin module describe.coda */ /* name coda: composition file to data for genhis synopsis coda(cmp: in, data: out, codap: in, output: out) files cmp: a composition, the output of program comp data: identification lines are followed by the number of occurences of each oligo and the sequence of the oligo, one pair per line. the form of the file can be changed using the parameters in codap. codap: parameter file. four parameters, one per line. 1. composition depth to be used in the data file (integer) 2. the least frequent oligo to record in data (integer). 3. the most frequent oligo to record in data (integer). 4. if the first character is 'b', the number of each oligo is given before the oligo, 'a' means after. 'n' means do not give the number. 's' means the data file will be used as input to the search program. no numbers are given and commands to search are made which will result in a list of the locations of the selected oligos. if parameters 2 to 4 are missing they default to 0 100000 b. output: messages to the user description coda converts a composition file from the comp program into a list of oligos. unlike the original composition file, this list may contain all oligos of the length desired (to save space, comp removes an n-long oligo when the two n-1 long oligos inside it do not exist). however, coda can be told to only include frequent or infrequent oligos using the parameter file. two ways to use the data are: 1. use the data file as input to genhis to determine the distribution of the composition. 2. use the 's' feature to generate instructions for the search program. search converts the list of oligos to locations in a sequence. unshi then is used to remove the extra blanks and genhis then gives a map of the locations of rare or common oligos. example file: datat7 see also comp.p, genhis.p, search.p, unshi.p author thomas dana schneider bugs none known */ /* end module describe.coda */ /* begin module coda.type */ typedef enum { a, c, g, t } base; /* the components of each oligo */ typedef struct parameters { /* variables to control the program */ long depth; /* the depth of the composition requested to be put in the data file */ long lower; /* lowest frequency oligo to include in data */ long upper; /* highest frequency oligo to include in data */ Char mode; /* a - numbers after the oligo b - numbers before the oligo n - no numbers s - data file is input file of search */ } parameters; /* end module coda.type */ /* begin module comp.type */ /* the composition is stored in a tree of these nodes */ /* points to a node of the tree */ typedef struct compnode { /* a node of the composition tree */ long count; /* the number of oligos for this node */ struct compnode *son[4]; /* the pointers to 'descendants' of this node of the tree */ } compnode; /* spiders are used to make the composition tree */ /* points to a 'spider' */ typedef struct spider { /* a spider climbs the composition tree, its path determined by the sequences, and increments 'count' at all the nodes it passes, thereby determining the composition */ long depth; /* the level of the node now at */ compnode *place; /* a pointer to the current node */ struct spider *next; /* the next spider in the collection */ } spider; /* the total number of composition entries at a given level is stored in the linked list of type comptotal */ typedef struct comptotal { long count; /* the number at a given level */ struct comptotal *next; /* pointer to the next level totals */ } comptotal; /* 'path' is used in printing the tree */ /* pointer into a path on the tree */ typedef struct path { /* the path of bases to get to a particular node */ base bas; struct path *next; } path; /* end module comp.type version = 'auxmod 1.37 85 apr 4 gds/tds'; */ /* begin module var */ Static _TEXT cmp, data, codap; Static jmp_buf _JL1; /* end module var */ /* begin module package.primitive */ /* ************************************************************************ */ /* begin module halt */ Static Void halt() { /* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. */ printf(" program halt.\n"); longjmp(_JL1, 1); } /* end module halt version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module unlimitln */ Static Void unlimitln(afile) _TEXT *afile; { /* this procedure removes a stupid system dependent limit on the number of lines that one can write to a file. you may remove it from the code if your system does not want or need this. suggested method: place comments around the contents of the procedure. */ /* linelimit(afile, maxint); (@ set 'infinite' lines allowed for afile */ } /* end module unlimitln version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module copyaline */ Static Void copyaline(fin, fout) _TEXT *fin, *fout; { /* copy a line from file fin to file fout */ while (!P_eoln(fin->f)) { putc(P_peek(fin->f), fout->f); getc(fin->f); } fscanf(fin->f, "%*[^\n]"); getc(fin->f); putc('\n', fout->f); } /* copyaline */ /* end module copyaline version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module copylines */ Static long copylines(fin, fout, n) _TEXT *fin, *fout; long n; { /* copy n lines of file fin to file fout. the actual number of lines copied is returned. */ long index = 0; /* the current line number */ while (!BUFEOF(fin->f) && index < n) { copyaline(fin, fout); index++; } return index; } /* copylines */ /* end module copylines version = 'delmod 6.51 85 apr 17 tds/gds' */ /* ************************************************************************ */ /* end module package.primitive version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module comp.readcomp */ /* begin module skipoligo */ Static Void skipoligo(thefile) _TEXT *thefile; { /* this procedure is used to skip over an oligonucleotide string. it reads until it comes to a base, and then continues to read until it comes to a blank. no checking is done for non-base characters in between. */ Char ch; do { ch = getc(thefile->f); if (ch == '\n') ch = ' '; } while (ch != 't' && ch != 'g' && ch != 'c' && ch != 'a'); do { ch = getc(thefile->f); if (ch == '\n') ch = ' '; } while (ch != ' '); } /* for efficient reading of the information into the tree, each node that has a successor is stored in a queue as well as put into the tree. the variables of the type list are the queue elements. */ typedef struct list { compnode *item; /* points to the composition tree node */ struct list *next; /* points to the next list item in the queue */ } list; /* Local variables for readcomp: */ struct LOC_readcomp { list *freeitem; /* the list of unused list pointers */ } ; /* getitem and clearitem provide efficient use of linked list storage by keeping a list of unused pointers that can be allocated instead of always creating 'new' ones */ Local Void getitem(l, LINK) list **l; struct LOC_readcomp *LINK; { /* obtain a listitem from the free list or by making a new one */ if (LINK->freeitem != NULL) { *l = LINK->freeitem; LINK->freeitem = LINK->freeitem->next; } else *l = (list *)Malloc(sizeof(list)); (*l)->next = NULL; } Local Void clearitem(l, LINK) list **l; struct LOC_readcomp *LINK; { /* return a listitem to the free list */ list *lptr; if (*l == NULL) return; lptr = *l; *l = (*l)->next; lptr->next = LINK->freeitem; LINK->freeitem = lptr; } /* end module skipoligo version = 'auxmod 1.37 85 apr 4 gds/tds'; */ Static Void readcomp(comp, compmax, readmax, root, monocomptotal) _TEXT *comp; long *compmax, readmax; compnode **root; comptotal **monocomptotal; { /* this procedure requires modules: comp.type, skipoligo, halt; this procedure reads from file 'comp' a composition and puts it into the tree pointed to by the 'root' pointer. 'compmax' is the depth of the composition tree which is stored. it is the minimum of 'readmax', the requested depth, and 'detcomp', the depth for which the input file composition was determined. 'monocomptotal' points to the beginning (for the monos) of a linked list which gives the totals for each level of the composition. */ struct LOC_readcomp V; list *listitem; /* an item in the queue */ list *first; /* the first item in the queue */ list *last; /* the last item in the queue */ comptotal *comptot; /* the comp total for a given level */ comptotal *newcomptot; /* for adding to the string of comptot"s */ long detcomp; /* the level to which the input composition was determined */ long level; /* of the composition being read, i.e., monos, dis, ... */ long number; /* read from the 'comp' file */ Char ch; /* for reading from 'comp' */ base ba; /* an index */ if (*comp->name != '\0') { if (comp->f != NULL) comp->f = freopen(comp->name, "r", comp->f); else comp->f = fopen(comp->name, "r"); } else rewind(comp->f); if (comp->f == NULL) _EscIO2(FileNotFound, comp->name); RESETBUF(comp->f, Char); if (BUFEOF(comp->f)) { printf(" error: no composition file provided\n"); halt(); } fscanf(comp->f, "%*[^\n]"); getc(comp->f); /* skip the program identification */ fscanf(comp->f, "%*[^\n]"); getc(comp->f); /* skip the book identification */ fscanf(comp->f, "%ld%*[^\n]", &detcomp); getc(comp->f); /* obtain the determined composition */ fscanf(comp->f, "%*[^\n]"); getc(comp->f); /* skip the blank line */ /* determine the level of composition to be stored */ if (readmax < 1) { printf("\n warning: 0 or negative oligo length requested\n"); printf(" composition used is depth %ld\n\n", detcomp); *compmax = detcomp; } else if (readmax > detcomp) { printf("\n warning: requested composition oligo length (%ld)\n", readmax); printf(" is larger than the determined composition oligo length (%ld).\n", detcomp); printf(" composition used is to depth %ld\n\n", detcomp); *compmax = detcomp; } else *compmax = readmax; *root = (compnode *)Malloc(sizeof(compnode)); *monocomptotal = (comptotal *)Malloc(sizeof(comptotal)); comptot = (comptotal *)Malloc(sizeof(comptotal)); first = (list *)Malloc(sizeof(list)); Malloc(sizeof(list)); /* p2c: coda.p: Note: Eliminated unused assignment statement [338] */ first->item = *root; first->next = NULL; last = first; V.freeitem = (list *)Malloc(sizeof(list)); V.freeitem->next = NULL; /* read in the total number of bases from the composition */ fscanf(comp->f, "%*[^\n]"); getc(comp->f); /* skip the * */ fscanf(comp->f, "%*[^\n]"); getc(comp->f); /* skip the information line */ fscanf(comp->f, "%ld%*[^\n]", &number); getc(comp->f); (*root)->count = number; do { ch = getc(comp->f); /* skip the space */ if (ch == '\n') ch = ' '; ch = getc(comp->f); /* the ch determines what to do next */ if (ch == '\n') ch = ' '; if (ch == '*') { /* determine the level about to be read */ fscanf(comp->f, "%*[^\n]"); getc(comp->f); fscanf(comp->f, "%ld%*[^\n]", &level); getc(comp->f); if (level == 1) *monocomptotal = comptot; else { newcomptot = (comptotal *)Malloc(sizeof(comptotal)); comptot->next = newcomptot; comptot = newcomptot; } comptot->count = 0; comptot->next = NULL; } else { for (ba = a; (long)ba <= (long)t; ba = (base)((long)ba + 1)) { skipoligo(comp); fscanf(comp->f, "%ld", &number); if (number != 0) { first->item->son[(long)ba] = (compnode *)Malloc(sizeof(compnode)); first->item->son[(long)ba]->count = number; comptot->count += number; getitem(&listitem, &V); last->next = listitem; last = listitem; last->next = NULL; last->item = first->item->son[(long)ba]; } else first->item->son[(long)ba] = NULL; } clearitem(&first, &V); fscanf(comp->f, "%*[^\n]"); getc(comp->f); } } while (level != *compmax); /* read the composition values */ /* for the last level of compositions to be read we don"t need to store the nodes in the queue because their successors will not be read in. */ do { for (ba = a; (long)ba <= (long)t; ba = (base)((long)ba + 1)) { skipoligo(comp); fscanf(comp->f, "%ld", &number); if (number != 0) { first->item->son[(long)ba] = (compnode *)Malloc(sizeof(compnode)); first->item->son[(long)ba]->count = number; comptot->count += number; } else first->item->son[(long)ba] = NULL; } clearitem(&first, &V); fscanf(comp->f, "%*[^\n]"); getc(comp->f); } while (first != NULL); } /* readcomp */ Static long getcount(root, start) compnode *root; path *start; { /* this function follows the tree from the node 'root' (which may be the root of a subtree) along the path initiated with 'start', and returns the count of the resulting node. if the path through the tree ever hits a null node in the process the count is returned to be zero. */ compnode *place = root; /* a place in the composition tree */ path *point = start; /* a point in the path through the tree */ while (place != NULL && point != NULL) { place = place->son[(long)point->bas]; point = point->next; } if (place == NULL) return 0; else return (place->count); } /* end module comp.readcomp version = 'auxmod 1.37 85 apr 4 gds/tds'; */ /* begin module coda.init */ Static Void init(cmp, data, codap) _TEXT *cmp, *data, *codap; { /* set up the files */ printf(" coda %4.2f\n", version); if (*cmp->name != '\0') { if (cmp->f != NULL) cmp->f = freopen(cmp->name, "r", cmp->f); else cmp->f = fopen(cmp->name, "r"); } else rewind(cmp->f); if (cmp->f == NULL) _EscIO2(FileNotFound, cmp->name); RESETBUF(cmp->f, Char); if (*data->name != '\0') { if (data->f != NULL) data->f = freopen(data->name, "w", data->f); else data->f = fopen(data->name, "w"); } else { if (data->f != NULL) rewind(data->f); else data->f = tmpfile(); } if (data->f == NULL) _EscIO2(FileNotFound, data->name); SETUPBUF(data->f, Char); if (*codap->name != '\0') { if (codap->f != NULL) codap->f = freopen(codap->name, "r", codap->f); else codap->f = fopen(codap->name, "r"); } else rewind(codap->f); if (codap->f == NULL) _EscIO2(FileNotFound, codap->name); RESETBUF(codap->f, Char); unlimitln(data); } /* init */ /* Local variables for copystart: */ struct LOC_copystart { _TEXT *cmp, *data; } ; Local Void aline(LINK) struct LOC_copystart *LINK; { /* copy a line */ if (copylines(LINK->cmp, LINK->data, 1L) != 1) { printf(" composition file too short\n"); halt(); } } /* end module coda.init */ /* begin module coda.copystart */ Static Void copystart(cmp_, data_) _TEXT *cmp_, *data_; { /* pick up the first lines of the cmp file to the data file */ struct LOC_copystart V; V.cmp = cmp_; V.data = data_; fprintf(V.data->f, "* coda %4.2f from: \n", version); putc('*', V.data->f); aline(&V); /* composition id line */ if (!P_eoln(V.cmp->f)) /* skip the space */ getc(V.cmp->f); aline(&V); /* book id line */ } /* end module coda.copystart */ /* begin module coda.getparam */ Static Void getparam(codap, param) _TEXT *codap; parameters *param; { /* get the depth of the composition from the codap file */ if (BUFEOF(codap->f)) { printf(" codap is empty\n"); halt(); } fscanf(codap->f, "%ld%*[^\n]", ¶m->depth); getc(codap->f); if (param->depth < 0) { printf(" composition depth cannot be negative"); halt(); } if (BUFEOF(codap->f)) { /* set up default values */ param->lower = 0; param->upper = 100000L; param->mode = 'b'; return; } fscanf(codap->f, "%ld%*[^\n]", ¶m->lower); getc(codap->f); if (param->lower < 0) { printf(" lower limit of number of oligos is 0.\n"); halt(); } if (BUFEOF(codap->f)) { printf(" missing upper parameter\n"); halt(); } fscanf(codap->f, "%ld%*[^\n]", ¶m->upper); getc(codap->f); if (param->upper < param->lower) { printf(" upper boundary cannot be below lower.\n"); halt(); } if (BUFEOF(codap->f)) { printf(" missing mode parameter\n"); halt(); } fscanf(codap->f, "%c%*[^\n]", ¶m->mode); getc(codap->f); if (param->mode == '\n') param->mode = ' '; if (param->mode != 'a' && param->mode != 'b' && param->mode != 'n' && param->mode != 's') { printf(" mode must be one of abns.\n"); halt(); /* pick up the rest of the parameters */ } } #define spacer 10 /* space before each datum */ /* Local variables for puttodata: */ struct LOC_puttodata { _TEXT *data; compnode *root; parameters param; long count; /* the composition count of an oligo */ long oligosput; /* number of oligos put to data file */ path *proot; /* the root of the composition path */ path *pwrite; /* index for writing the path */ long totalcount; /* total of the counts of oligos put */ } ; Local Void down(p, LINK) path *p; struct LOC_puttodata *LINK; { /* work our way down through the oligo and at the bottom print the count and the oligo out. p is the current pointer to the depth */ base b; /* index to the current depth */ long dwrite; /* an index for writing the oligo */ long FORLIM1; for (b = a; (long)b <= (long)t; b = (base)((long)b + 1)) { p->bas = b; if (p->next != NULL) down(p->next, LINK); else { LINK->count = getcount(LINK->root, LINK->proot); if (LINK->param.lower <= LINK->count) { if (LINK->count <= LINK->param.upper) { /* dump the stuff out to the data file */ if (LINK->param.mode == 'b') fprintf(LINK->data->f, "%*ld", spacer, LINK->count); putc(' ', LINK->data->f); LINK->pwrite = LINK->proot; FORLIM1 = LINK->param.depth; for (dwrite = 1; dwrite <= FORLIM1; dwrite++) { switch (LINK->pwrite->bas) { case a: putc('a', LINK->data->f); break; case c: putc('c', LINK->data->f); break; case g: putc('g', LINK->data->f); break; case t: putc('t', LINK->data->f); break; } LINK->pwrite = LINK->pwrite->next; } if (LINK->param.mode == 'a') fprintf(LINK->data->f, "%*ld", spacer, LINK->count); putc('\n', LINK->data->f); LINK->oligosput++; LINK->totalcount += LINK->count; } } } } } /* down */ /* end module coda.getparam */ /* begin module coda.puttodata */ Static Void puttodata(data_, root_, param_) _TEXT *data_; compnode *root_; parameters param_; { /* put the composition information in the tree root to the data file. use only the level specified by depth, and include all zero values. the output must be in a form acceptable to the genhis program */ struct LOC_puttodata V; long d; /* an index to the depth */ path *p; /* index to the composition path */ long FORLIM; V.data = data_; V.root = root_; V.param = param_; fprintf(V.data->f, "* %ld long oligos with occurences from %ld to %ld\n", V.param.depth, V.param.lower, V.param.upper); if (V.param.mode == 's') { fprintf(V.data->f, "* coda search mode\n"); fprintf(V.data->f, "view nothing view position\n"); } /* construct the path for reading at the depth */ V.proot = (path *)Malloc(sizeof(path)); /* this is the zero depth of the path */ p = V.proot; FORLIM = V.param.depth; for (d = 1; d < FORLIM; d++) { p->next = (path *)Malloc(sizeof(path)); p = p->next; } p->next = NULL; /* start up the counting */ V.oligosput = 0; V.totalcount = 0; if (V.param.depth > 0) /* begin at the start of the path */ down(V.proot, &V); else fprintf(V.data->f, "%*ld\n", spacer, getcount(V.root, NULL)); if (V.param.mode == 's') fprintf(V.data->f, "quit\n"); printf(" %ld oligos put to data file\n", V.oligosput); printf(" representing %ld positions in the original sequence(s)\n", V.totalcount); } #undef spacer /* end module coda.puttodata */ /* begin module coda.themain */ Static Void themain(cmp, data, codap) _TEXT *cmp, *data, *codap; { /* the main procedure of the program */ long compmax; /* deepest composition in cmp */ comptotal *monoct; /* keeps readcomp happy here */ parameters param; /* for controlling the program */ compnode *root; /* the composition tree */ init(cmp, data, codap); copystart(cmp, data); getparam(codap, ¶m); readcomp(cmp, &compmax, param.depth, &root, &monoct); if (compmax < param.depth) { printf(" requested depth does not exist.\n"); halt(); } puttodata(data, root, param); } /* themain */ /* end module coda.themain */ main(argc, argv) int argc; Char *argv[]; { PASCAL_MAIN(argc, argv); if (setjmp(_JL1)) goto _L1; codap.f = NULL; strcpy(codap.name, "codap"); data.f = NULL; strcpy(data.name, "data"); cmp.f = NULL; strcpy(cmp.name, "cmp"); themain(&cmp, &data, &codap); _L1: if (cmp.f != NULL) fclose(cmp.f); if (data.f != NULL) fclose(data.f); if (codap.f != NULL) fclose(codap.f); exit(EXIT_SUCCESS); } /* coda */ /* End. */