/* Output from p2c 1.21alpha-07.Dec.93, the Pascal-to-C translator */ /* From input file "compan.p" */ #include /* compan: chi-squared analysis of a composition. by Gary Stormo module libraries needed: delman, delmods, auxmods */ /* end of program */ /* begin module version */ #define version 3.25 /* of compan.p 2001 Mar 26 2001 Mar 26, 3.25: spelling correction previous change 1994 Sep 5 origin before 1985 apr 18 */ /* end module version */ /* begin module describe.compan */ /* name compan: composition analysis. synopsis compan(cmp: in, anal: out, companp: in, output: out) files cmp: the input composition, which is the output of program comp; anal: the output analysis of this program; companp: for parameters; should contain a single integer which specifies the maximum level for which the composition is analyzed. the maximum allowed level is 4, or the maximum level for which the composition was determined. output: for user messages; description calculates chi squared from a composition using: 1) assumption of equal frequencies to calculate mono, di, tri and tet expecteds; 2) mono frequencies to calculate di, tri and tet expecteds; 3) di frequencies to calculate tri and tet expecteds; 4) tri frequencies to calculate tet expecteds; the partial chi squared values are given for each oligo. the 'information' content of the composition is also calculated, using the standard information theory definition: information = -sum(frequency * log(frequency)), where the sum is over each oligonucleotide of a given length and the log is taken to the base 2. this gives the information in bits. see also comp.p author gary stormo bugs the program cannot do calculations for compositions larger than 4 */ /* end module describe.compan */ #define defcompmax 4 /* the limit, and default value, for analysis */ typedef enum { a, c, g, t } base; /* begin module comp.type */ /* the composition is stored in a tree of these nodes */ /* points to a node of the tree */ typedef struct compnode { /* a node of the composition tree */ long count; /* the number of oligos for this node */ struct compnode *son[4]; /* the pointers to 'descendants' of this node of the tree */ } compnode; /* spiders are used to make the composition tree */ /* points to a 'spider' */ typedef struct spider { /* a spider climbs the composition tree, its path determined by the sequences, and increments 'count' at all the nodes it passes, thereby determining the composition */ long depth; /* the level of the node now at */ compnode *place; /* a pointer to the current node */ struct spider *next; /* the next spider in the collection */ } spider; /* the total number of composition entries at a given level is stored in the linked list of type comptotal */ typedef struct comptotal { long count; /* the number at a given level */ struct comptotal *next; /* pointer to the next level totals */ } comptotal; /* 'path' is used in printing the tree */ /* pointer into a path on the tree */ typedef struct path { /* the path of bases to get to a particular node */ base bas; struct path *next; } path; /* end module comp.type version = 'auxmod 1.37 85 apr 4 gds/tds'; */ Static _TEXT cmp, anal, companp; Static compnode *root; /* of the composition tree */ Static comptotal *monocomptotal; /* beginning of comp totals list */ Static long compmax; /* maximum length oligo for which comp was determined */ Static long readmax; /* maximum amount of the comp used */ Static jmp_buf _JL1; /* begin module package.primitive */ /* ************************************************************************ */ /* begin module halt */ Static Void halt() { /* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. */ printf(" program halt.\n"); longjmp(_JL1, 1); } /* end module halt version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module unlimitln */ Static Void unlimitln(afile) _TEXT *afile; { /* this procedure removes a stupid system dependent limit on the number of lines that one can write to a file. you may remove it from the code if your system does not want or need this. suggested method: place comments around the contents of the procedure. */ /* linelimit(afile, maxint); (@ set 'infinite' lines allowed for afile */ } /* end module unlimitln version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module copyaline */ Static Void copyaline(fin, fout) _TEXT *fin, *fout; { /* copy a line from file fin to file fout */ while (!P_eoln(fin->f)) { putc(P_peek(fin->f), fout->f); getc(fin->f); } fscanf(fin->f, "%*[^\n]"); getc(fin->f); putc('\n', fout->f); } /* copyaline */ /* end module copyaline version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module copylines */ Static long copylines(fin, fout, n) _TEXT *fin, *fout; long n; { /* copy n lines of file fin to file fout. the actual number of lines copied is returned. */ long index = 0; /* the current line number */ while (!BUFEOF(fin->f) && index < n) { copyaline(fin, fout); index++; } return index; } /* copylines */ /* end module copylines version = 'delmod 6.51 85 apr 17 tds/gds' */ /* ************************************************************************ */ /* end module package.primitive version = 'delmod 6.51 85 apr 17 tds/gds' */ /* begin module basetochar */ Static Char basetochar(ba) base ba; { /* convert a base into a character */ Char Result; switch (ba) { case a: Result = 'a'; break; case c: Result = 'c'; break; case g: Result = 'g'; break; case t: Result = 't'; break; } return Result; } /* end module basetochar version = 'auxmod 1.37 85 apr 4 gds/tds'; */ /* begin module comp.readcomp */ /* begin module skipoligo */ Static Void skipoligo(thefile) _TEXT *thefile; { /* this procedure is used to skip over an oligonucleotide string. it reads until it comes to a base, and then continues to read until it comes to a blank. no checking is done for non-base characters in between. */ Char ch; do { ch = getc(thefile->f); if (ch == '\n') ch = ' '; } while (ch != 't' && ch != 'g' && ch != 'c' && ch != 'a'); do { ch = getc(thefile->f); if (ch == '\n') ch = ' '; } while (ch != ' '); } /* for efficient reading of the information into the tree, each node that has a successor is stored in a queue as well as put into the tree. the variables of the type list are the queue elements. */ typedef struct list { compnode *item; /* points to the composition tree node */ struct list *next; /* points to the next list item in the queue */ } list; /* Local variables for readcomp: */ struct LOC_readcomp { list *freeitem; /* the list of unused list pointers */ } ; /* getitem and clearitem provide efficient use of linked list storage by keeping a list of unused pointers that can be allocated instead of always creating 'new' ones */ Local Void getitem(l, LINK) list **l; struct LOC_readcomp *LINK; { /* obtain a listitem from the free list or by making a new one */ if (LINK->freeitem != NULL) { *l = LINK->freeitem; LINK->freeitem = LINK->freeitem->next; } else *l = (list *)Malloc(sizeof(list)); (*l)->next = NULL; } Local Void clearitem(l, LINK) list **l; struct LOC_readcomp *LINK; { /* return a listitem to the free list */ list *lptr; if (*l == NULL) return; lptr = *l; *l = (*l)->next; lptr->next = LINK->freeitem; LINK->freeitem = lptr; } /* end module skipoligo version = 'auxmod 1.37 85 apr 4 gds/tds'; */ Static Void readcomp(comp, compmax, readmax, root, monocomptotal) _TEXT *comp; long *compmax, readmax; compnode **root; comptotal **monocomptotal; { /* this procedure requires modules: comp.type, skipoligo, halt; this procedure reads from file 'comp' a composition and puts it into the tree pointed to by the 'root' pointer. 'compmax' is the depth of the composition tree which is stored. it is the minimum of 'readmax', the requested depth, and 'detcomp', the depth for which the input file composition was determined. 'monocomptotal' points to the beginning (for the monos) of a linked list which gives the totals for each level of the composition. */ struct LOC_readcomp V; list *listitem; /* an item in the queue */ list *first; /* the first item in the queue */ list *last; /* the last item in the queue */ comptotal *comptot; /* the comp total for a given level */ comptotal *newcomptot; /* for adding to the string of comptot"s */ long detcomp; /* the level to which the input composition was determined */ long level; /* of the composition being read, i.e., monos, dis, ... */ long number; /* read from the 'comp' file */ Char ch; /* for reading from 'comp' */ base ba; /* an index */ if (*comp->name != '\0') { if (comp->f != NULL) comp->f = freopen(comp->name, "r", comp->f); else comp->f = fopen(comp->name, "r"); } else rewind(comp->f); if (comp->f == NULL) _EscIO2(FileNotFound, comp->name); RESETBUF(comp->f, Char); if (BUFEOF(comp->f)) { printf(" error: no composition file provided\n"); halt(); } fscanf(comp->f, "%*[^\n]"); getc(comp->f); /* skip the program identification */ fscanf(comp->f, "%*[^\n]"); getc(comp->f); /* skip the book identification */ fscanf(comp->f, "%ld%*[^\n]", &detcomp); getc(comp->f); /* obtain the determined composition */ fscanf(comp->f, "%*[^\n]"); getc(comp->f); /* skip the blank line */ /* determine the level of composition to be stored */ if (readmax < 1) { printf("\n warning: 0 or negative oligo length requested\n"); printf(" composition used is depth %ld\n\n", detcomp); *compmax = detcomp; } else if (readmax > detcomp) { printf("\n warning: requested composition oligo length (%ld)\n", readmax); printf(" is larger than the determined composition oligo length (%ld).\n", detcomp); printf(" composition used is to depth %ld\n\n", detcomp); *compmax = detcomp; } else *compmax = readmax; *root = (compnode *)Malloc(sizeof(compnode)); *monocomptotal = (comptotal *)Malloc(sizeof(comptotal)); comptot = (comptotal *)Malloc(sizeof(comptotal)); first = (list *)Malloc(sizeof(list)); Malloc(sizeof(list)); /* p2c: compan.p: Note: Eliminated unused assignment statement [338] */ first->item = *root; first->next = NULL; last = first; V.freeitem = (list *)Malloc(sizeof(list)); V.freeitem->next = NULL; /* read in the total number of bases from the composition */ fscanf(comp->f, "%*[^\n]"); getc(comp->f); /* skip the * */ fscanf(comp->f, "%*[^\n]"); getc(comp->f); /* skip the information line */ fscanf(comp->f, "%ld%*[^\n]", &number); getc(comp->f); (*root)->count = number; do { ch = getc(comp->f); /* skip the space */ if (ch == '\n') ch = ' '; ch = getc(comp->f); /* the ch determines what to do next */ if (ch == '\n') ch = ' '; if (ch == '*') { /* determine the level about to be read */ fscanf(comp->f, "%*[^\n]"); getc(comp->f); fscanf(comp->f, "%ld%*[^\n]", &level); getc(comp->f); if (level == 1) *monocomptotal = comptot; else { newcomptot = (comptotal *)Malloc(sizeof(comptotal)); comptot->next = newcomptot; comptot = newcomptot; } comptot->count = 0; comptot->next = NULL; } else { for (ba = a; (long)ba <= (long)t; ba = (base)((long)ba + 1)) { skipoligo(comp); fscanf(comp->f, "%ld", &number); if (number != 0) { first->item->son[(long)ba] = (compnode *)Malloc(sizeof(compnode)); first->item->son[(long)ba]->count = number; comptot->count += number; getitem(&listitem, &V); last->next = listitem; last = listitem; last->next = NULL; last->item = first->item->son[(long)ba]; } else first->item->son[(long)ba] = NULL; } clearitem(&first, &V); fscanf(comp->f, "%*[^\n]"); getc(comp->f); } } while (level != *compmax); /* read the composition values */ /* for the last level of compositions to be read we do not need to store the nodes in the queue because their successors will not be read in. */ do { for (ba = a; (long)ba <= (long)t; ba = (base)((long)ba + 1)) { skipoligo(comp); fscanf(comp->f, "%ld", &number); if (number != 0) { first->item->son[(long)ba] = (compnode *)Malloc(sizeof(compnode)); first->item->son[(long)ba]->count = number; comptot->count += number; } else first->item->son[(long)ba] = NULL; } clearitem(&first, &V); fscanf(comp->f, "%*[^\n]"); getc(comp->f); } while (first != NULL); } /* readcomp */ Static long getcount(root, start) compnode *root; path *start; { /* this function follows the tree from the node 'root' (which may be the root of a subtree) along the path initiated with 'start', and returns the count of the resulting node. if the path through the tree ever hits a null node in the process the count is returned to be zero. */ compnode *place = root; /* a place in the composition tree */ path *point = start; /* a point in the path through the tree */ while (place != NULL && point != NULL) { place = place->son[(long)point->bas]; point = point->next; } if (place == NULL) return 0; else return (place->count); } /* end module comp.readcomp version = 'auxmod 1.37 85 apr 4 gds/tds'; */ Static Void header() { /* write the header to the anal file */ if (*cmp.name != '\0') { if (cmp.f != NULL) cmp.f = freopen(cmp.name, "r", cmp.f); else cmp.f = fopen(cmp.name, "r"); } else rewind(cmp.f); if (cmp.f == NULL) _EscIO2(FileNotFound, cmp.name); RESETBUF(cmp.f, Char); fprintf(anal.f, " compan version %4.2f; chi-squared analysis of composition\n", version); if (copylines(&cmp, &anal, 2L) != 2) { printf(" composition file is too short\n"); halt(); } fprintf(anal.f, "\n %ld-long oligos are the longest analyzed\n", compmax); fprintf(anal.f, " (+) means observed > expected;"); fprintf(anal.f, " (-) means observed < expected\n\n"); } Static Void chisq(observed, expected, chi, sign) double observed, expected, *chi; Char *sign; { /* given observed and expected values, calculate chisquared and return as variable chi; set sign to '+' if observed is >= expected, else '-' */ double diff; /* difference between observed and expected */ diff = observed - expected; if (diff >= 0) *sign = '+'; else *sign = '-'; if (expected > 0) *chi = diff * diff / expected; else *chi = 0.0; } Static Void calc0() { /* calculate expecteds assuming equal frequencies */ base b1, b2, b3, b4; long comptot; /* total number of oligos of a certain length */ long observed; /* number of oligos of a certain composition */ double expected; /* number of oligos of a certain composition */ double chi; /* the partial chisquared for each entry */ double totchi = 0.0; /* the total chisquared for each section */ double frequency; /* of each oligo */ double inf = 0.0; /* the 'information' at each level of composition, as defined by information theory: inf = -sum(frequency(log(frequency))) */ path *start; /* of the path through the tree */ path *point; /* on the path through the tree */ Char sign; /* '+' or '-', as observed is more or less than expected */ fprintf(anal.f, " **** chi squareds assuming equal frequencies ****\n\n"); /* chi squareds for monos */ comptot = monocomptotal->count; expected = comptot / 4.0; fprintf(anal.f, " partials\n"); start = (path *)Malloc(sizeof(path)); start->next = NULL; for (b1 = a; (long)b1 <= (long)t; b1 = (base)((long)b1 + 1)) { start->bas = b1; observed = getcount(root, start); chisq((double)observed, expected, &chi, &sign); fprintf(anal.f, "%5c%c%8.2f (%c);", ' ', basetochar(b1), chi, sign); totchi += chi; if (observed > 0) { frequency = (double)observed / comptot; inf -= frequency * log(frequency) / log(2.0); } } fprintf(anal.f, "\n * total chisquared = %8.2f\n", totchi); fprintf(anal.f, " * information = %5.3f bits per base\n\n", inf); /* chi squareds for dis */ if (compmax >= 2) { totchi = 0.0; inf = 0.0; comptot = monocomptotal->next->count; expected = comptot / 16.0; fprintf(anal.f, " partials\n"); point = (path *)Malloc(sizeof(path)); point->next = NULL; start->next = point; for (b1 = a; (long)b1 <= (long)t; b1 = (base)((long)b1 + 1)) { start->bas = b1; for (b2 = a; (long)b2 <= (long)t; b2 = (base)((long)b2 + 1)) { point->bas = b2; observed = getcount(root, start); chisq((double)observed, expected, &chi, &sign); fprintf(anal.f, "%4c%c%c%8.2f (%c);", ' ', basetochar(b1), basetochar(b2), chi, sign); totchi += chi; if (observed > 0) { frequency = (double)observed / comptot; inf -= frequency * log(frequency) / log(2.0); } } putc('\n', anal.f); } fprintf(anal.f, " * total chisquared = %8.2f\n", totchi); fprintf(anal.f, " * information = %5.3f bits per base\n\n", inf / 2); } /* chi squareds for tris */ if (compmax >= 3) { totchi = 0.0; inf = 0.0; comptot = monocomptotal->next->next->count; expected = comptot / 64.0; fprintf(anal.f, " partials\n"); point = (path *)Malloc(sizeof(path)); point->next = NULL; start->next->next = point; for (b1 = a; (long)b1 <= (long)t; b1 = (base)((long)b1 + 1)) { start->bas = b1; for (b2 = a; (long)b2 <= (long)t; b2 = (base)((long)b2 + 1)) { start->next->bas = b2; for (b3 = a; (long)b3 <= (long)t; b3 = (base)((long)b3 + 1)) { point->bas = b3; observed = getcount(root, start); chisq((double)observed, expected, &chi, &sign); fprintf(anal.f, "%3c%c%c%c%8.2f (%c);", ' ', basetochar(b1), basetochar(b2), basetochar(b3), chi, sign); totchi += chi; if (observed > 0) { frequency = (double)observed / comptot; inf -= frequency * log(frequency) / log(2.0); } } putc('\n', anal.f); } putc('\n', anal.f); } fprintf(anal.f, " * total chisquared = %8.2f\n", totchi); fprintf(anal.f, " * information = %5.3f bits per base\n\n", inf / 3); } /* chi squareds for tets */ if (compmax < 4) return; totchi = 0.0; inf = 0.0; comptot = monocomptotal->next->next->next->count; expected = comptot / 256.0; fprintf(anal.f, " partials\n"); point = (path *)Malloc(sizeof(path)); point->next = NULL; start->next->next->next = point; for (b1 = a; (long)b1 <= (long)t; b1 = (base)((long)b1 + 1)) { start->bas = b1; for (b2 = a; (long)b2 <= (long)t; b2 = (base)((long)b2 + 1)) { start->next->bas = b2; for (b3 = a; (long)b3 <= (long)t; b3 = (base)((long)b3 + 1)) { start->next->next->bas = b3; for (b4 = a; (long)b4 <= (long)t; b4 = (base)((long)b4 + 1)) { point->bas = b4; observed = getcount(root, start); chisq((double)observed, expected, &chi, &sign); fprintf(anal.f, "%2c%c%c%c%c%8.2f (%c);", ' ', basetochar(b1), basetochar(b2), basetochar(b3), basetochar(b4), chi, sign); totchi += chi; if (observed > 0) { frequency = (double)observed / comptot; inf -= frequency * log(frequency) / log(2.0); } } putc('\n', anal.f); } putc('\n', anal.f); } putc('\n', anal.f); } fprintf(anal.f, " * total chisquared = %8.2f\n", totchi); fprintf(anal.f, " * information = %5.3f bits per base\n\n", inf / 4); } Static Void calc1() { /* calculate chi squareds using mono frequencies */ base b1, b2, b3, b4; double observed; /* number of oligos of a certain composition */ double expected; /* number of oligos of a certain composition */ /* using monos, the expected is the frequency of each mono in the oligo times the total number of oligos of a particular length */ double ratio; /* for calculating expecteds */ double n1, n2, n3, n4; /* for storing the results of 'getcount' calls */ double chi; /* partial chisquareds */ double totchi = 0.0; /* total chisquareds */ Char sign; /* '+' or '-', as observed is more or less than expected */ path *point1; /* a point on a path through the composition tree */ path *point2; /* ' */ path *point3; /* ' */ path *point4; /* ' */ fprintf(anal.f, "\f"); fprintf(anal.f, " **** chi squareds using mono frequencies ****\n\n"); /* chi squareds for dis */ ratio = log((double)monocomptotal->next->count) - 2 * log((double)monocomptotal->count); fprintf(anal.f, " partials\n"); point1 = (path *)Malloc(sizeof(path)); point2 = (path *)Malloc(sizeof(path)); point2->next = NULL; for (b1 = a; (long)b1 <= (long)t; b1 = (base)((long)b1 + 1)) { point1->bas = b1; point1->next = NULL; n1 = getcount(root, point1); for (b2 = a; (long)b2 <= (long)t; b2 = (base)((long)b2 + 1)) { point2->bas = b2; n2 = getcount(root, point2); if (n1 > 0 && n2 > 0) expected = exp(log(n1) + log(n2) + ratio); else expected = 0.0; point1->next = point2; observed = getcount(root, point1); chisq(observed, expected, &chi, &sign); fprintf(anal.f, "%4c%c%c%8.2f (%c);", ' ', basetochar(b1), basetochar(b2), chi, sign); totchi += chi; } putc('\n', anal.f); } fprintf(anal.f, " * total chisquared = %8.2f\n\n", totchi); /* chi squareds for tris */ if (compmax >= 3) { totchi = 0.0; ratio = log((double)monocomptotal->next->next->count) - 3 * log((double)monocomptotal->count); fprintf(anal.f, " partials\n"); point3 = (path *)Malloc(sizeof(path)); point3->next = NULL; for (b1 = a; (long)b1 <= (long)t; b1 = (base)((long)b1 + 1)) { point1->bas = b1; point1->next = NULL; n1 = getcount(root, point1); for (b2 = a; (long)b2 <= (long)t; b2 = (base)((long)b2 + 1)) { point2->bas = b2; point2->next = NULL; n2 = getcount(root, point2); point1->next = point2; for (b3 = a; (long)b3 <= (long)t; b3 = (base)((long)b3 + 1)) { point3->bas = b3; n3 = getcount(root, point3); if (n1 > 0 && n2 > 0 && n3 > 0) expected = exp(log(n1) + log(n2) + log(n3) + ratio); else expected = 0.0; point2->next = point3; observed = getcount(root, point1); chisq(observed, expected, &chi, &sign); fprintf(anal.f, "%3c%c%c%c%8.2f (%c);", ' ', basetochar(b1), basetochar(b2), basetochar(b3), chi, sign); totchi += chi; } putc('\n', anal.f); } putc('\n', anal.f); } fprintf(anal.f, " * total chisquared = %8.2f\n\n", totchi); } /* chi squareds for tets */ if (compmax < 4) return; totchi = 0.0; ratio = log((double)monocomptotal->next->next->next->count) - 4 * log((double)monocomptotal->count); fprintf(anal.f, " partials\n"); point4 = (path *)Malloc(sizeof(path)); point4->next = NULL; for (b1 = a; (long)b1 <= (long)t; b1 = (base)((long)b1 + 1)) { point1->bas = b1; point1->next = NULL; n1 = getcount(root, point1); for (b2 = a; (long)b2 <= (long)t; b2 = (base)((long)b2 + 1)) { point2->bas = b2; point2->next = NULL; n2 = getcount(root, point2); point1->next = point2; for (b3 = a; (long)b3 <= (long)t; b3 = (base)((long)b3 + 1)) { point3->bas = b3; point3->next = NULL; n3 = getcount(root, point3); point2->next = point3; for (b4 = a; (long)b4 <= (long)t; b4 = (base)((long)b4 + 1)) { point4->bas = b4; n4 = getcount(root, point4); if (n1 > 0 && n2 > 0 && n3 > 0 && n4 > 0) expected = exp(log(n1) + log(n2) + log(n3) + log(n4) + ratio); else expected = 0.0; point3->next = point4; observed = getcount(root, point1); chisq(observed, expected, &chi, &sign); fprintf(anal.f, "%3c%c%c%c%c%8.2f (%c);", ' ', basetochar(b1), basetochar(b2), basetochar(b3), basetochar(b4), chi, sign); totchi += chi; } putc('\n', anal.f); } putc('\n', anal.f); } putc('\n', anal.f); } fprintf(anal.f, " * total chisquared = %8.2f\n\n", totchi); } Static Void calc2() { /* calculate chi squareds using mono and di frequencies */ base b1, b2, b3, b4; double observed; /* number of a particular oligo */ double expected; /* number of a particular oligo, see sections for details */ double ratio; /* for calculating expecteds */ double ratio2; /* for calculating expecteds */ double ratio3; /* for calculating expecteds */ double n1, n2, n3; /* for storing values from 'getcount' calls */ double chi; /* partial chisquareds */ double totchi = 0.0; /* total chisquareds */ Char sign; /* '+' or '-', as observed is more or less than expected */ path *point1; /* on the path through the tree */ path *point2; /* ' */ path *point3; /* ' */ path *point4; /* ' */ fprintf(anal.f, "\f"); fprintf(anal.f, " **** chi squareds using di frequencies ****\n\n"); /* chi squareds for tris */ /* calculating expected tris from dis uses this formula: expected[b1,b2,b3] = #[b1,b2]/2totals * #[b2,b3]/2totals * 1totals/#[b2] * 3totals. */ /* ratio = ln( 1totals * 3totals / (2totals)**2) <= 0 */ ratio = log((double)monocomptotal->count) + log((double)monocomptotal->next->next->count) - 2 * log((double)monocomptotal->next->count); fprintf(anal.f, " partials\n"); point1 = (path *)Malloc(sizeof(path)); point2 = (path *)Malloc(sizeof(path)); point3 = (path *)Malloc(sizeof(path)); point3->next = NULL; for (b1 = a; (long)b1 <= (long)t; b1 = (base)((long)b1 + 1)) { point1->bas = b1; for (b2 = a; (long)b2 <= (long)t; b2 = (base)((long)b2 + 1)) { point2->bas = b2; point1->next = point2; point2->next = NULL; n1 = getcount(root, point1); n2 = getcount(root, point2); if (n1 > 0) { ratio2 = ratio + log(n1) - log(n2); /* ratio2 <= 0 unless n1 = 0 */ } else ratio2 = 1.0; for (b3 = a; (long)b3 <= (long)t; b3 = (base)((long)b3 + 1)) { point3->bas = b3; point2->next = point3; n2 = getcount(root, point2); if (n2 > 0 && ratio2 < 1) expected = exp(log(n2) + ratio2); else expected = 0.0; observed = getcount(root, point1); chisq(observed, expected, &chi, &sign); fprintf(anal.f, "%3c%c%c%c%8.2f (%c);", ' ', basetochar(b1), basetochar(b2), basetochar(b3), chi, sign); totchi += chi; } putc('\n', anal.f); } putc('\n', anal.f); } fprintf(anal.f, " * total chisquared = %8.2f\n\n", totchi); /* chi squareds for tets */ /* expected tets using dis is calculated by this formula: expected[b1,b2,b3,b4] = #[b1,b2]/2totals * #[b2,b3]/2totals * #[b3,b4]/2totals * 1totals/#[b2] * 1totals/#[b3] * 4totals */ if (compmax < 4) return; totchi = 0.0; /* ratio = ln( 1totals * 1totals * 4totals / (2totals)**3) <= 0 */ ratio = 2 * log((double)monocomptotal->count) + log((double)monocomptotal->next->next->next->count) - 3 * log((double)monocomptotal->next->count); fprintf(anal.f, " partials\n"); point4 = (path *)Malloc(sizeof(path)); point4->next = NULL; for (b1 = a; (long)b1 <= (long)t; b1 = (base)((long)b1 + 1)) { point1->bas = b1; for (b2 = a; (long)b2 <= (long)t; b2 = (base)((long)b2 + 1)) { point2->bas = b2; point1->next = point2; point2->next = NULL; n1 = getcount(root, point1); n2 = getcount(root, point2); if (n1 > 0) { ratio2 = ratio + log(n1) - log(n2); /* ratio2 <= 0 unless n1 = 0 */ } else ratio2 = 1.0; for (b3 = a; (long)b3 <= (long)t; b3 = (base)((long)b3 + 1)) { point3->bas = b3; point2->next = point3; point3->next = NULL; n2 = getcount(root, point2); n3 = getcount(root, point3); if (n2 > 0 && ratio2 < 1) { ratio3 = ratio2 + log(n2) - log(n3); /* ratio3 <= 0 unles n2 = 0 or n1 = 0 */ } else ratio3 = 1.0; for (b4 = a; (long)b4 <= (long)t; b4 = (base)((long)b4 + 1)) { point4->bas = b4; point3->next = point4; n3 = getcount(root, point3); if (n3 > 0 && ratio3 < 1) expected = exp(ratio3 + log(n3)); else expected = 0.0; observed = getcount(root, point1); chisq(observed, expected, &chi, &sign); fprintf(anal.f, "%3c%c%c%c%c%8.2f (%c);", ' ', basetochar(b1), basetochar(b2), basetochar(b3), basetochar(b4), chi, sign); totchi += chi; } putc('\n', anal.f); } putc('\n', anal.f); } putc('\n', anal.f); } fprintf(anal.f, " * total chisquared = %8.2f\n", totchi); } Static Void calc3() { /* calculate chi squareds using tri frequencies */ base b1, b2, b3, b4; double observed; /* number of a particular oligo */ double expected; /* number of a particular oligo */ /* expected tets are calculated from tris by this formula: expected[b1,b2,b3,b4] = #[b1,b2,b3]/3totals * #[b2,b3,b4]/3totals * 2totals/#[b2,b3] * 4totals */ double ratio; /* = 2totals * 4totals / (3totals)**2 */ double ratio2; /* = ratio * #[b1,b2,b3] / #[b2,b3] */ double n1, n2; /* for storing values from 'getcount' calls */ double chi; /* partial chisquareds */ double totchi = 0.0; /* total chisquareds */ Char sign; /* '+' or '-', as observed is more or less than expected */ path *point1; /* on the path throught the tree */ path *point2; /* ' */ path *point3; /* ' */ path *point4; /* ' */ fprintf(anal.f, "\f"); fprintf(anal.f, " **** chi squareds using tri frequenies ****\n\n"); /* chi squareds for tets */ ratio = log((double)monocomptotal->next->count) + log((double)monocomptotal->next->next->next->count) - 2 * log((double)monocomptotal->next->next->count); fprintf(anal.f, " partials\n"); point1 = (path *)Malloc(sizeof(path)); point2 = (path *)Malloc(sizeof(path)); point1->next = point2; point3 = (path *)Malloc(sizeof(path)); point2->next = point3; point4 = (path *)Malloc(sizeof(path)); point4->next = NULL; for (b1 = a; (long)b1 <= (long)t; b1 = (base)((long)b1 + 1)) { point1->bas = b1; for (b2 = a; (long)b2 <= (long)t; b2 = (base)((long)b2 + 1)) { point2->bas = b2; for (b3 = a; (long)b3 <= (long)t; b3 = (base)((long)b3 + 1)) { point3->bas = b3; point3->next = NULL; n1 = getcount(root, point1); n2 = getcount(root, point2); if (n1 > 0) { ratio2 = ratio + log(n1) - log(n2); /* ratio2 <= 0 unless n1 = 0 */ } else ratio2 = 1.0; for (b4 = a; (long)b4 <= (long)t; b4 = (base)((long)b4 + 1)) { point4->bas = b4; point3->next = point4; n2 = getcount(root, point2); if (n2 > 0 && ratio2 < 1) expected = exp(ratio2 + log(n2)); else expected = 0.0; observed = getcount(root, point1); chisq(observed, expected, &chi, &sign); fprintf(anal.f, "%3c%c%c%c%c%8.2f (%c);", ' ', basetochar(b1), basetochar(b2), basetochar(b3), basetochar(b4), chi, sign); totchi += chi; } putc('\n', anal.f); } putc('\n', anal.f); } putc('\n', anal.f); } fprintf(anal.f, " * total chisquared = %8.2f\n", totchi); } main(argc, argv) int argc; Char *argv[]; { /**** compan ****/ PASCAL_MAIN(argc, argv); if (setjmp(_JL1)) goto _L1; companp.f = NULL; strcpy(companp.name, "companp"); anal.f = NULL; strcpy(anal.name, "anal"); cmp.f = NULL; strcpy(cmp.name, "cmp"); printf("compan %4.2f\n", version); if (*anal.name != '\0') anal.f = fopen(anal.name, "w"); else anal.f = tmpfile(); if (anal.f == NULL) _EscIO2(FileNotFound, anal.name); SETUPBUF(anal.f, Char); if (*cmp.name != '\0') { if (cmp.f != NULL) cmp.f = freopen(cmp.name, "r", cmp.f); else cmp.f = fopen(cmp.name, "r"); } else rewind(cmp.f); if (cmp.f == NULL) _EscIO2(FileNotFound, cmp.name); RESETBUF(cmp.f, Char); if (BUFEOF(cmp.f)) { printf(" no composition file provided\n"); halt(); } if (*companp.name != '\0') { if (companp.f != NULL) companp.f = freopen(companp.name, "r", companp.f); else companp.f = fopen(companp.name, "r"); } else rewind(companp.f); if (companp.f == NULL) _EscIO2(FileNotFound, companp.name); RESETBUF(companp.f, Char); if (BUFEOF(companp.f)) readmax = defcompmax; else { fscanf(companp.f, "%ld%*[^\n]", &readmax); getc(companp.f); } if (readmax > defcompmax) readmax = defcompmax; readcomp(&cmp, &compmax, readmax, &root, &monocomptotal); header(); calc0(); if (compmax >= 2) calc1(); if (compmax >= 3) calc2(); if (compmax >= 4) calc3(); _L1: if (cmp.f != NULL) fclose(cmp.f); if (anal.f != NULL) fclose(anal.f); if (companp.f != NULL) fclose(companp.f); exit(EXIT_SUCCESS); } /* End. */