/* Output from p2c 1.21alpha-07.Dec.93, the Pascal-to-C translator */ /* From input file "cluster.p" */ #include /* cluster: filter indana subindexes to find duplicate entries and then print the duplicate pair coordinates, lengths, and sequences into the output files Mike Stephens, 1992 */ /* end of program */ /* begin module version */ #define version 5.06 /* of cluster.p 1992 September 18 origin 1989 September 1 */ /* end module version */ /* begin module describe.cluster */ /* name cluster: cluster indana subindexes into groups of duplicate entries synopsis cluster(clusterp: in, subind: in, inst: in, book: in, pairs: out, clumps: out, output: out) files clusterp: The cluster parameter file that consists of the following: FIRST LINE 'y' turns the flag on, 'n' turns it off (debugging) allows one to look at raw data in the bags. The debugging flag controls the printing of the raw data above the regular output of the cluster program, which is created solely by procedure showRAWbag. This can then be compared with the data in the chart for correctness. Raw data consists of the series of coordinate pairs in the bag and the sides they are matched on. printed above the standard output structure. example: - ( 630, 69) R L ( 649, 88) - {20} {20} ************************************* | 630 663 HUMUK | ---------- | 34 HUMUPA | ---------- | 69 102 ************************************* It is important to note that the raw data will only appear in the pairs output file, and will not be written in clumps at all. This means that parameter 3, writepairs, must also be turned on for this flag to be effective. SECOND LINE 'y' turns the flag on, 'n' turns it off (showfragments) allows one to see pairs that are fragmented. The showfragments toggle controls printing the outputs of pairs with "imperfect" matches. That is, in some cases a repeating sequence will match in several frames, causing repeated sequence matching and producing a large list of coordinate pairs. This list can be shown if the parameter is turned on, but the statement "WARNING: sequence pairs are overmatched" will appear if it is turned off. The actual sequences will be shown in either case, so the comparison can always be done by hand by the user. The output is excessively long, but the sequences will be shown, so the comparison can be done by the user. example: 1 acggatcgtgtgtgtgtgtgtgtgtacgatcggatcgat 2 acggatcgtgtgtgtgtgtgtgtgtacgatcggatcgat These sequences will have matches between all of the 'gt' base pairs, resulting in an overwhelming number of matches. The maximum number of possible matches is found by taking the length of the sequences and dividing it by the value in the overmatched parameter (FIFTH LINE) times the number of instructions that match between any two pieces in the dbinst. This results in a maximum number of matches between any two pieces. Any pieces above this limit will can have their output completely shown or can generate a warning message (see showfragments, SECOND LINE). In addition to preventing the example case, showfragments will also prevent the display of any other case that may cause an excessive number of matches. THIRD LINE 'y' turns the flag on, 'n' turns it off (writepairs) controls the printing of the pairs output file. If writepairs is on, the original clustering pairlist will be printed into the output file pairs. If it is off, this file will not be printed. This parameter must be turned on to effectively use the debugging parameter (see FIRST LINE). FOURTH LINE 'y' turns the flag on, 'n' turns it off (writeclumps) controls printing of the clumps output file. If writeclumps is on, the original clustering pairlist will be sent through the clumping procedures. The output file clumps will contain the sequences involved in the matches on the pair in addition to the clumped version of the pairlist. The clumping process takes an excessive amount of time for very large files, since the program must traverse the entire pairlist to find all related pairs, then put the pairs on to the clumplist, then go through the book and find sequences to match every instruction in every pair of every clump. Although it is much easier to determine which pieces are true repeats through use of the clumps file, it is certainly possible to do so by simply using the pairs output file. FIFTH LINE any integer (matchparameter) is the number of matches to be allowed between two instructions. This can be determined by dividing the sequence length from the book by the minimum window size from the subindex, or a maximum number of matches between instructions can be set. An integer less than or equal to 0 will calculate maximum matches using the above method. Any number greater than 0 will be used as the new maximum matches. example: if the instructions call for the sequences piece1: get from 100 -50 to 100 +50; piece2: get from 200 -50 to 200 +50; The sequence length is 101. If the windowsize read from the subindex = 15, then 6 possible matches can occur between these two instructions (101 div 15 = 6). The TOTAL number of matches between two pieces is found by multiplying matchparameter by the number of instructions in a given pair. If a piece has more matches than this, it is considered to be overmatched, the bag will not be printed, and the statment 'WARNING: sequence pairs have too many matches.' will appear. Overmatched pairs can be printed using the showmatches parameter (see SECOND LINE). subind: a subindex from the indana program matching the inst and the book inst: a set of delila instructions that correspond to the book book: a delila book that contains the sequences being clumped pairs: the output list of paired sequences clumps: the output list of clumped sequences output: When errors occur, the program halts and produces an error message description Duplicate entries in the subind subindex are clustered into a unified list of pairs and copied to output files as sequence numbers, lengths, and sequence base pairs. Pairs are determined by the indana program, which delegates sequence similarities with an '*'. Cluster takes the subindex and shows the coordinate range and length of the similarity by pairs. The pairs file is a list of relationships between two sequences, the clumps file takes this list of pairs and groups related ones together. The seqalign modules of the program then access the book and get the corresponding sequences to print out with the instruction number and piece name. documentation none see also index.p, indana.p author R. Michael Stephens bugs None currently known. technical notes The read for the indana window size is based on the '[' character before the number in the subind heading. Any changes to indana that alter this format must be reflected in the getwindowsize procedure. */ /* end module describe.cluster */ /* ************************************************************************ */ /* ************************************************************************ */ /* begin module cluster.book.const */ /* constants needed for book manipulations */ #define dnamax 101 /* length of dna arrays of the piece*/ #define namelength 20 /* maximum key name length */ #define linelength 80 /* maximum line readable in book */ /* end module cluster.book.const version = 'delmod 6.58 90 Jan 2 tds/gds' */ /* begin module book.type */ /* types needed for book manipulations */ typedef long chset[5]; /* types defined in book definition */ typedef Char alpha[namelength]; /* this is not alfa */ /* name is a left justified string with blanks following the characters */ typedef struct name { alpha letters; /* zero means an unspecified structure */ char length; } name; typedef struct line { /* a line of characters */ Char letters[linelength]; char length; struct line *next; } line; typedef enum { plus, minus, dircomplement, dirhomologous } direction; typedef enum { linear, circular } configuration; typedef enum { on, off } state; typedef struct header { /* header of key */ name keynam; /* key name of structure */ line *fulnam; /* full name of structure */ /* note key */ line *note; } header; /* base types */ typedef enum { a, c, g, t } base; typedef char dnarange; /* p2c: cluster.p, line 215: * Note: Field width for seq assumes enum base has 4 elements [105] */ typedef uchar seq[(dnamax + 3) / 4]; typedef struct dnastring { seq part; dnarange length; struct dnastring *next; } dnastring; typedef struct orgkey { /* organism key */ header hea; /* genetic map units */ line *mapunit; } orgkey; typedef struct chrkey { /* chromosome key */ header hea; double mapbeg; /* number of genetic map beginning */ /* number of genetic map ending */ double mapend; } chrkey; typedef struct piekey { /* piece key */ header hea; double mapbeg; /* genetic map beginning */ configuration coocon; /* configruation (circular/linear) */ direction coodir; /* direction (+/-) relative to genetic map */ long coobeg; /* beginning nucleotide */ long cooend; /* ending nucleotide */ configuration piecon; /* configruation (circular/linear) */ direction piedir; /* direction (+/-) relative to coordinates */ long piebeg; /* beginning nucleotide */ long pieend; /* ending nucleotide */ } piekey; typedef struct piece { piekey key; dnastring *dna; } piece; typedef struct reference { name pienam; /* name of piece referred to */ double mapbeg; /* genetic map beginning */ direction refdir; /* direction relative to coordinates */ long refbeg; /* beginning nucleotide */ long refend; /* ending nucleotide */ } reference; typedef struct genkey { /* gene key */ header hea; reference ref; } genkey; typedef struct trakey { /* transcript key */ header hea; reference ref; } trakey; typedef struct markey { /* marker key */ header hea; reference ref; state sta; line *phenotype; struct marker *next; } markey; typedef struct marker { markey key; dnastring *dna; } marker; /* end module book.type version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module cluster.type */ /* a bag is that part of a pair that contains the sequence coordinates */ /* a pointer to a bag */ typedef struct bag { long leftcoord; /* coordinate of the first number in numberpair */ long rightcoord; /* coordinate of the second number in numberpair */ boolean leftmatch; /* is there a matching coordinate on the left? */ boolean rightmatch; /* is there a matching coordinate on the right? */ struct bag *nextbag; /* points to the next bag in the linked list */ } bag; /* an entry is a working copy of a pair used to find duplicate lists*/ /* a pointer to an entry */ typedef struct entries { long piece_; /* the piece number of the entry */ long inst; /* the sequence number in the entry */ long coordinate; /* the coordinate number of an entry */ struct entries *nextentry; /* pointer to the next entry on the list */ } entries; /* the numberptr is a multiple-use list of single integers */ /* a pointer to a numberlist */ typedef struct numberlist { long number; /* the number in the record on the list */ struct numberlist *nextlist; /* a pointer to the next element on the list */ } numberlist; /* pairs are complete doubles of data sets for duplicate sequences */ /* a pointer to a pair */ typedef struct numberpair { long leftpiece; /* the first piece number in the pair */ long rightpiece; /* the second piece number in the pair */ numberlist *leftseq; /* the pair instruction list of leftpiece */ numberlist *rightseq; /* the pair instruction list of rightpiece */ bag *seqbag; /* points to the bag for that pair */ struct numberpair *nextpair; /* points to the next numberpair */ } numberpair; /* ranges show the relationships between intruction and piece numbers */ /* a pointer to a range */ typedef struct rangerecord { long instnumber; /* the highest instruction number in the range */ long singlescount; /* the number of singels preceding the range */ struct rangerecord *nextrange; /* the pointer to the next range */ } rangerecord; /* the clump is the final stage of clustering, consisting of a data */ /* cube with a list of clumps on the y-axis, the pairs within the clump*/ /* on the x-axis, and the bags of the pairs in the clump on the z-axis */ /* a pointer to a clump */ typedef struct clump { numberlist *piecelist; /* the piece numbers in the clump */ numberpair *pairlist; /* pointer to the related pairlist on the clump */ struct clump *nextclump; /* pointer to the next clump in the list */ } clump; /* end module cluster.type */ /* begin module book.var */ /* ************************************************************************ */ /* global variables needed for book manipulations */ /* free storage: */ Static line *freeline; /* unused lines */ Static dnastring *freedna; /* unused dnas */ Static boolean readnumber; /* whether to read a number from the notes, or to read in the notes */ Static long number; /* the number of the item just read */ Static boolean numbered; /* true when the item just read is numbered */ Static boolean skipunnum; /* a control variable to allow skipping of un-numbered items in the book */ /* ************************************************************************ */ /* end module book.var version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module cluster.var */ Static _TEXT book; /* the book to be aligned */ Static _TEXT clumps; /* the clumped list output file */ Static _TEXT clusterp; /* the parameter file */ Static boolean debugging; /* debugging parameter control variable */ Static _TEXT pairs; /* the paired list output file */ Static _TEXT subind; /* the indana subindex used as input */ Static bag *freebag; /* root for the list of unused bag pointers */ Static clump *freeclump; /* root for the list of unused clump pointers */ Static entries *freeentry; /* root for the list of unused entry pointers */ Static numberpair *freepair; /* root for the list of unused pair pointers */ Static numberlist *freesequence; /* root for the list of unused sequence pointers */ Static _TEXT inst; /* delila instructions to align procedures */ Static long matchparameter; /* maximum number of matches allowed per inst */ Static boolean showfragments; /* showfragments parameter control variable */ Static boolean writepairs; /* writepairs parameter control variable */ Static boolean writeclumps; /* wrteinsts parameter control variable */ Static jmp_buf _JL1; /* end module cluster.var */ /* ************************************************************************ */ /* ************************************************************************ */ /* begin module halt */ Static Void halt() { /* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. */ printf(" program halt.\n"); longjmp(_JL1, 1); } /* end module halt version = 'delmod 6.65 94 sep 5 tds/gds' */ /* ************************************************************************ */ /* ************************************************************************ */ /* begin module copyaline */ Static Void copyaline(fin, fout) _TEXT *fin, *fout; { /* copy a line from file fin to file fout */ while (!P_eoln(fin->f)) { putc(P_peek(fin->f), fout->f); getc(fin->f); } fscanf(fin->f, "%*[^\n]"); getc(fin->f); putc('\n', fout->f); } /* copyaline */ /* end module copyaline version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module copylines */ Static long copylines(fin, fout, n) _TEXT *fin, *fout; long n; { /* copy n lines of file fin to file fout. the actual number of lines copied is returned. */ long index = 0; /* the current line number */ while (!BUFEOF(fin->f) && index < n) { copyaline(fin, fout); index++; } return index; } /* copylines */ /* end module copylines version = 'delmod 6.65 94 sep 5 tds/gds' */ /* ************************************************************************ */ /* ************************************************************************ */ /* begin module skipblanks */ Static Void skipblanks(thefile) _TEXT *thefile; { /* skip over blanks until a non-blank, or end of line, is found */ while ((P_peek(thefile->f) == ' ') & (!P_eoln(thefile->f))) getc(thefile->f); } Static Void skipnonblanks(thefile) _TEXT *thefile; { /* skip over nonblanks until a blank, or end of line, is found */ while ((P_peek(thefile->f) != ' ') & (!P_eoln(thefile->f))) getc(thefile->f); } /* end module skipblanks */ /* ************************************************************************ */ /* ************************************************************************ */ /* begin module package.align */ /* ************************************************************************ */ /* begin module package.getpiece */ /* ************************************************************************ */ /* begin module package.brpiece */ /* ************************************************************************ */ /* begin module book.basis */ /* procedures needed for book manipulations */ /* get procedures should be used for all linked lists of records */ Static Void getline(l) line **l; { /* obtain a line from the free line list or by making a new one */ if (freeline != NULL) { *l = freeline; freeline = freeline->next; } else *l = (line *)Malloc(sizeof(line)); (*l)->length = 0; (*l)->next = NULL; } Static Void getdna(l) dnastring **l; { if (freedna != NULL) { *l = freedna; freedna = freedna->next; } else *l = (dnastring *)Malloc(sizeof(dnastring)); (*l)->length = 0; (*l)->next = NULL; } /* clear procedures should be called each time the records are no longer needed failure to do this may result in a stack overflow. */ Static Void clearline(l) line **l; { /* return a line to the free line list */ line *lptr; if (*l == NULL) return; lptr = *l; *l = (*l)->next; lptr->next = freeline; freeline = lptr; } Static Void cleardna(l) dnastring **l; { dnastring *lptr; if (*l == NULL) return; lptr = *l; *l = (*l)->next; lptr->next = freedna; freedna = lptr; } Static Void clearheader(h) header *h; { /* clear the header h (remove lines to free storage) */ clearline(&h->fulnam); while (h->note != NULL) clearline(&h->note); } Static Void clearpiece(p) piece **p; { /* clear the dna of the piece */ while ((*p)->dna != NULL) cleardna(&(*p)->dna); clearheader(&(*p)->key.hea); } Static base chartobase(ch) Char ch; { /* convert a character into a base */ base Result; switch (ch) { case 'a': Result = a; break; case 'c': Result = c; break; case 'g': Result = g; break; case 't': Result = t; break; } return Result; } Static Char basetochar(ba) base ba; { /* convert a base into a character */ Char Result; switch (ba) { case a: Result = 'a'; break; case c: Result = 'c'; break; case g: Result = 'g'; break; case t: Result = 't'; break; } return Result; } Static base complement(ba) base ba; { /* take the complement of ba */ base Result; switch (ba) { case a: Result = t; break; case c: Result = g; break; case g: Result = c; break; case t: Result = a; break; } return Result; } Static long pietoint(p, pie) long p; piece *pie; { /* p is a coordinate on the piece. we want to transform p into a number from 1 to n: an internal coordinate system for easy manipulation of piece coordinates */ long i; /* an intermediate value */ piekey *WITH; WITH = &pie->key; switch (WITH->piedir) { case plus: if (p >= WITH->piebeg) i = p - WITH->piebeg + 1; else i = p - WITH->coobeg + WITH->cooend - WITH->piebeg + 2; break; case minus: if (p <= WITH->piebeg) i = WITH->piebeg - p + 1; else i = WITH->cooend - p + WITH->piebeg - WITH->coobeg + 2; break; } return i; } Static long inttopie(i, pie) long i; piece *pie; { /* i is in the range 1 to some maximum. it is an internal coordinate system for the program. we want to do a coordinate transformation to obtain a value in the range of the piece called pie: i=1 corresponds to piebeg and i=its maximum corresponds to pieend */ long p; /* an intermediate value */ piekey *WITH; WITH = &pie->key; switch (WITH->piedir) { case plus: p = WITH->piebeg + i - 1; if (p > WITH->cooend) { if (WITH->coocon == circular) p += WITH->coobeg - WITH->cooend - 1; } break; case minus: p = WITH->piebeg - i + 1; if (p < WITH->coobeg) { if (WITH->coocon == circular) p += WITH->cooend - WITH->coobeg + 1; } break; } return p; } Static long piecelength(pie) piece *pie; { /* return the length of the dna in pie */ return (pietoint(pie->key.pieend, pie)); } /* end module book.basis version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module book.getto */ Static Char getto(thefile, ch) _TEXT *thefile; long *ch; { /* search the file for a character in the first line which is a member of the set ch. */ Char achar = ' '; while ((!P_inset(achar, ch)) & (!BUFEOF(thefile->f))) { fscanf(thefile->f, "%c%*[^\n]", &achar); getc(thefile->f); if (achar == '\n') achar = ' '; } if (P_inset(achar, ch)) return achar; else return ' '; } /* end module book.getto version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module book.skipstar */ Static Void skipstar(thefile) _TEXT *thefile; { /* skip start of line (or star = '*'). */ if (P_peek(thefile->f) != '*') { printf(" procedure skipstar: bad book\n"); printf(" \"*\" expected as first character on the line, but \"%c\" was found\n", P_peek(thefile->f)); halt(); } getc(thefile->f); /* skip the star */ if (P_peek(thefile->f) != ' ') { /* skip the blank */ printf(" procedure skipstar: bad book\n"); printf(" \"* \" expected on a line but \"*%c\" was found\n", P_peek(thefile->f)); halt(); } getc(thefile->f); } /* skipstar */ /* end module book.skipstar version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module book.brreanum */ Static Void brreanum(thefile, reanum) _TEXT *thefile; double *reanum; { /* read a real number from the file */ skipstar(thefile); fscanf(thefile->f, "%lg%*[^\n]", reanum); getc(thefile->f); } /* end module book.brreanum version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module book.brnumber */ Static Void brnumber(thefile, num) _TEXT *thefile; long *num; { /* read a number from the file */ skipstar(thefile); fscanf(thefile->f, "%ld%*[^\n]", num); getc(thefile->f); } /* end module book.brnumber version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module book.brname */ Static Void brname(thefile, nam) _TEXT *thefile; name *nam; { /* read a name from the file */ long i; /* an index to the name */ Char c_; /* a character read */ skipstar(thefile); nam->length = 0; do { nam->length++; c_ = getc(thefile->f); if (c_ == '\n') c_ = ' '; nam->letters[nam->length - 1] = c_; } while (!(P_eoln(thefile->f) || nam->length >= namelength || nam->letters[nam->length - 1] == ' ')); if (nam->letters[nam->length - 1] == ' ') nam->length--; if (nam->length < namelength) { for (i = nam->length; i < namelength; i++) nam->letters[i] = ' '; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } /* brname */ /* end module book.brname version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module book.brline */ Static Void brline(thefile, l) _TEXT *thefile; line **l; { /* read a line from the file */ long i = 0; long j; Char acharacter; long FORLIM; skipstar(thefile); while (!P_eoln(thefile->f)) { i++; acharacter = getc(thefile->f); if (acharacter == '\n') acharacter = ' '; (*l)->letters[i-1] = acharacter; } if (i < (*l)->length) { FORLIM = (*l)->length; for (j = i; j < FORLIM; j++) (*l)->letters[j] = ' '; } (*l)->length = i; (*l)->next = NULL; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } /* end module book.brline version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module book.brdirect */ Static Void brdirect(thefile, direct) _TEXT *thefile; direction *direct; { /* read a direction */ Char ch; skipstar(thefile); fscanf(thefile->f, "%c%*[^\n]", &ch); getc(thefile->f); if (ch == '\n') ch = ' '; if (ch == '+') *direct = plus; else *direct = minus; } /* end module book.brdirect version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module book.brconfig */ Static Void brconfig(thefile, config) _TEXT *thefile; configuration *config; { /* read a configuration */ Char ch; skipstar(thefile); fscanf(thefile->f, "%c%*[^\n]", &ch); getc(thefile->f); if (ch == '\n') ch = ' '; if (ch == 'l') *config = linear; else *config = circular; } /* end module book.brconfig version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module book.brnotenumber */ Static Void brnotenumber(thefile, note) _TEXT *thefile; line **note; { /* book note reading to obtain the number of the object. the procedure returns the value of the number as a global. (this is not such a good practice, but we are stuck with it for now.) */ *note = NULL; numbered = false; number = 0; /* force number to zero if there is no number at all */ /* the next character is n or * depending on whether there are notes */ if (P_peek(thefile->f) != 'n') return; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); if (P_peek(thefile->f) == 'n') { fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); return; } skipstar(thefile); if (!P_eoln(thefile->f)) { if (P_peek(thefile->f) == '#') { numbered = true; getc(thefile->f); /* move past the number symbol */ fscanf(thefile->f, "%ld", &number); } } do { fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } while (P_peek(thefile->f) != 'n'); fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } /* brnotenumber */ /* end module book.brnotenumber version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module book.brnote */ Static Void brnote(thefile, note) _TEXT *thefile; line **note; { /* read note key */ line *newnote; /* the new note */ line *previousnote; /* the last line of the notes */ *note = NULL; if (P_peek(thefile->f) != 'n') /* enter note */ return; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); if (P_peek(thefile->f) == 'n') { /* abort null note (n/n) */ fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); return; } getline(note); newnote = *note; while (P_peek(thefile->f) != 'n') { /* wait until end of note */ brline(thefile, &newnote); previousnote = newnote; /* get next note */ getline(&newnote->next); newnote = newnote->next; } /* last note was not used, so: */ clearline(&newnote); previousnote->next = NULL; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } /* brnote */ /* end module book.brnote version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module book.brheader */ Static Void brheader(thefile, hea) _TEXT *thefile; header *hea; { /* read the header of a key. */ /* read key name */ brname(thefile, &hea->keynam); /* read full name */ getline(&hea->fulnam); brline(thefile, &hea->fulnam); /* read note key */ if (readnumber) brnotenumber(thefile, &hea->note); else brnote(thefile, &hea->note); } /* end module book.brheader version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module book.copyheader */ Static Void copyheader(fromhea, tohea) header fromhea, *tohea; { /* copy the header fromhea into tohea. Note that the linked objects are NOT copied, but merely pointed to. */ memcpy(tohea->keynam.letters, fromhea.keynam.letters, sizeof(alpha)); tohea->keynam.length = fromhea.keynam.length; tohea->note = fromhea.note; tohea->fulnam = fromhea.fulnam; } /* end module book.copyheader version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module book.brpiekey */ Static Void brpiekey(thefile, pie) _TEXT *thefile; piekey *pie; { /* read piece key */ brheader(thefile, &pie->hea); brreanum(thefile, &pie->mapbeg); brconfig(thefile, &pie->coocon); brdirect(thefile, &pie->coodir); brnumber(thefile, &pie->coobeg); brnumber(thefile, &pie->cooend); brconfig(thefile, &pie->piecon); brdirect(thefile, &pie->piedir); brnumber(thefile, &pie->piebeg); brnumber(thefile, &pie->pieend); } /* end module book.brpiekey version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module book.brdna */ Static Void brdna(thefile, dna) _TEXT *thefile; dnastring **dna; { /* read in dna from thefile */ /* note: if the dna were circularized, by linking the last dnastring to the first, then the cleardna routine could not clear properly, and would loop forever... there is no reason to do that, since a simple mod function will allow one to access the circle. */ Char ch; dnastring *workdna; long SET[5]; long TEMP; getdna(dna); workdna = *dna; ch = getto(thefile, P_addset(P_expset(SET, 0L), 'd')); ch = getc(thefile->f); /* skipstar */ if (ch == '\n') ch = ' '; while (ch == '*') { ch = getc(thefile->f); /* skip blank */ if (ch == '\n') ch = ' '; do { ch = getc(thefile->f); if (ch == '\n') ch = ' '; if (ch == 't' || ch == 'g' || ch == 'c' || ch == 'a') { if (workdna->length == dnamax) { getdna(&workdna->next); workdna = workdna->next; } workdna->length++; TEMP = workdna->length - 1; P_clrbits_B(workdna->part, TEMP, 1, 3); P_putbits_UB(workdna->part, TEMP, (int)chartobase(ch), 1, 3); } } while (!P_eoln(thefile->f)); fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); /* go to next line */ ch = getc(thefile->f); /* ch is either '*' or 'd' */ if (ch == '\n') ch = ' '; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); } /* end module book.brdna version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module book.brpiece */ Static Void brpiece(thefile, pie) _TEXT *thefile; piece **pie; { /* read in a piece */ brpiekey(thefile, &(*pie)->key); if (numbered || !skipunnum) brdna(thefile, &(*pie)->dna); } /* end module book.brpiece version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module book.brinit */ Static Void brinit(book) _TEXT *book; { /* check that the book is ok to read, and set up the global variables for br routines */ /* halt if the book is bad (first word is 'halt') or the first character is not * */ if (*book->name != '\0') { if (book->f != NULL) book->f = freopen(book->name, "r", book->f); else book->f = fopen(book->name, "r"); } else rewind(book->f); if (book->f == NULL) _EscIO2(FileNotFound, book->name); RESETBUF(book->f, Char); if (!BUFEOF(book->f)) { /* check for the date line */ if (P_peek(book->f) != '*') { if (P_peek(book->f) != 'h') printf(" this is not the first line of a book:\n"); else printf(" bad book:\n"); putchar(' '); while (!(P_eoln(book->f) | BUFEOF(book->f))) { putchar(P_peek(book->f)); getc(book->f); } putchar('\n'); halt(); } } else { printf(" book is empty\n"); halt(); } /* initialize free storage */ freeline = NULL; freedna = NULL; readnumber = true; /* usually we read in numbers for items */ number = 0; /* arbitrary value */ numbered = false; /* the piece has no number (none yet read in) */ skipunnum = false; } /* brinit */ /* end module book.brinit version = 'delmod 6.65 94 sep 5 tds/gds' */ /* ************************************************************************ */ /* end module package.brpiece version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module book.getpiece */ Static Void getpiece(thefile, pie) _TEXT *thefile; piece **pie; { /* move to and read in the next piece in the book */ Char ch; long SET[5]; ch = getto(thefile, P_addset(P_expset(SET, 0L), 'p')); /* get to the next p(iece) in the book */ if (ch != ' ') { brpiece(thefile, pie); ch = getto(thefile, P_addset(P_expset(SET, 0L), 'p')); /* read past closing p */ } } /* end module book.getpiece version = 'delmod 6.65 94 sep 5 tds/gds' */ /* ************************************************************************ */ /* end module package.getpiece version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module findblank */ Static Void findblank(afile) _TEXT *afile; { /* read a file to find the next blank character */ Char ch; do { ch = getc(afile->f); if (ch == '\n') ch = ' '; } while (ch != ' '); } /* end module findblank version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module findnonblank */ Static Void findnonblank(afile, ch) _TEXT *afile; Char *ch; { /* find the next non blank character in a file, return it in ch. */ *ch = ' '; while (!BUFEOF(afile->f) && *ch == ' ') { *ch = getc(afile->f); if (*ch == '\n') *ch = ' '; if (P_eoln(afile->f)) { fscanf(afile->f, "%*[^\n]"); getc(afile->f); } } } #define maximumrange 500 /* end module findnonblank version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module align.align */ Static Void align(inst, book, pie, length, alignedbase) _TEXT *inst, *book; piece **pie; long *length, *alignedbase; { /* documentation on align is in module info.align and delman.use.aligned.books */ /* if the alignment point is more than this distance from the piece ends, the program halts in an attempt to catch the alignment bug... 1991 Jan 11 It appears that the rewrite of the code has removed the bug, but the check will be kept. */ Char ch; /* a character in inst */ boolean comment; /* true means we are inside a comment */ boolean done = false; /* done finding an aligning get */ long thebase; /* the base read in */ name *WITH; if (BUFEOF(book->f)) /* if there is still more to the book ... */ return; getpiece(book, pie); /* read in the piece */ if (BUFEOF(book->f)) /* if we found a piece ... */ return; *length = pietoint((*pie)->key.pieend, *pie); /* calculate piece length */ /* now find inst the next occurance of 'get' */ while (!done) { if (BUFEOF(inst->f)) { /* no instructions? */ *alignedbase = 1; /* simply align by the first base */ done = true; break; } if (P_peek(inst->f) == '(') { /* skip comment */ getc(inst->f); if (P_peek(inst->f) == '*') comment = true; /*if comment then write(output,'COMMENT: (');*/ while (comment) { if (BUFEOF(inst->f)) { printf(" in procedure align:\n"); printf(" an instruction comment does not end!\n"); halt(); } /*write(output,inst^);*/ getc(inst->f); if (P_peek(inst->f) == '*') { getc(inst->f); /*if inst^ = ')' then writeln(output,'*)');*/ if (P_peek(inst->f) == ')') comment = false; } } } if (P_peek(inst->f) == 'g') { getc(inst->f); if (P_peek(inst->f) == 'e') { getc(inst->f); if (P_peek(inst->f) == 't') { getc(inst->f); if (P_peek(inst->f) == ' ') { findnonblank(inst, &ch); /* get to "from" */ findblank(inst); /* get past "from" */ fscanf(inst->f, "%ld", &thebase); /* read in the alignedbase */ /*writeln(output,'thebase=',thebase:1);*/ *alignedbase = pietoint(thebase, *pie); done = true; } } } } getc(inst->f); /* move along now */ } if (*alignedbase > -maximumrange && *alignedbase <= *length + maximumrange) return; printf(" in procedure align:\n"); printf(" read in base was %ld\n", thebase); printf(" in internal coordinates: %ld\n", *alignedbase); printf(" maximum range was %ld\n", (long)maximumrange); printf(" piece length was %ld\n", *length); WITH = &(*pie)->key.hea.keynam; /* p2c: cluster.p, line 1072: Note: * Format for packed-array-of-char will work only if width < length [321] */ printf(" piece name: %.*s\n", WITH->length, WITH->letters); printf(" piece number: %ld\n", number); printf(" aligned base is too far away... see the code\n"); halt(); } #undef maximumrange #define maximumrange 500 /* end module align.align version = 'delmod 6.65 94 sep 5 tds/gds' */ /* ************************************************************************ */ /* end module package.align version = 'delmod 6.65 94 sep 5 tds/gds' */ /* ************************************************************************ */ /* ************************************************************************ */ /* begin module align.maxminalignment */ Static Void maxminalignment(inst, book, fromparam, toparam) _TEXT *inst, *book; long *fromparam, *toparam; { /* prescan the book to find the range over which the pieces of the book are spread, relative to the aligned base. the procedure uses the same variables that align does (so it can call align itself), and it returns the range in fromparam and toparam. */ /* the maximum size aligned piece; this will presumably catch the alignment bug */ long distance; /* a distance to the aligned base */ piece *pie; long length, alignedbase; pie = (piece *)Malloc(sizeof(piece)); /* set an initial range for the two bounds */ *fromparam = LONG_MAX; *toparam = -LONG_MAX; if (*book->name != '\0') { if (book->f != NULL) book->f = freopen(book->name, "r", book->f); else book->f = fopen(book->name, "r"); } else rewind(book->f); if (book->f == NULL) _EscIO2(FileNotFound, book->name); RESETBUF(book->f, Char); if (*inst->name != '\0') { if (inst->f != NULL) inst->f = freopen(inst->name, "r", inst->f); else inst->f = fopen(inst->name, "r"); } else rewind(inst->f); if (inst->f == NULL) _EscIO2(FileNotFound, inst->name); RESETBUF(inst->f, Char); while (!BUFEOF(book->f)) { align(inst, book, &pie, &length, &alignedbase); if (BUFEOF(book->f)) break; distance = 1 - alignedbase; if (*fromparam > distance) *fromparam = distance; distance = length - alignedbase; if (*toparam < distance) *toparam = distance; clearpiece(&pie); } if (*toparam - *fromparam > maximumrange) { printf(" in procedure maxminalignment:\n"); printf(" fromparameter = %ld\n", *fromparam); printf(" toparameter = %ld\n", *toparam); printf(" this exceeds the maximum range allowed (%ld)\n", (long)maximumrange); printf(" see notes in the procedure. \n"); /* notes: if you desired this range, increase 'maximumrange'. otherwise, this may indicate a bug - either: 1) locate the bug (and tell tom schneider, please...) 2) reduce the size of the fragments, from one or the other end until the bombing is stopped. */ halt(); } /* make the book readable again */ if (*book->name != '\0') { if (book->f != NULL) book->f = freopen(book->name, "r", book->f); else book->f = fopen(book->name, "r"); } else rewind(book->f); if (book->f == NULL) _EscIO2(FileNotFound, book->name); RESETBUF(book->f, Char); if (*inst->name != '\0') { if (inst->f != NULL) inst->f = freopen(inst->name, "r", inst->f); else inst->f = fopen(inst->name, "r"); } else rewind(inst->f); if (inst->f == NULL) _EscIO2(FileNotFound, inst->name); RESETBUF(inst->f, Char); Free(pie); } #undef maximumrange /* end module align.maxminalignment version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module align.withinalignment */ Static boolean withinalignment(alignedposition, alignedbase, length) long alignedposition, alignedbase, length; { /* this function tells one if an aligned position, relative to an aligned base in a piece of some length is within the piece. */ long p; /* the position on the piece */ p = alignedposition + alignedbase; return (p > 0 && p <= length); } /* end module align.withinalignment version = 'delmod 6.65 94 sep 5 tds/gds' */ /* begin module book.getbase */ Static base getbase(position, pie) long position; piece *pie; { /* get a base from the nth position (internal coordinates) of the piece. no protection is made against positions outside the piece */ dnastring *workdna; long p = dnamax; /* the last base of the dna part */ workdna = pie->dna; while (position > p) { p += dnamax; workdna = workdna->next; } return ((base)P_getbits_UB(workdna->part, position - p + dnamax - 1, 1, 3)); } /* Local variables for numberdigit: */ struct LOC_numberdigit { long number, place; /* the exponent of logplace */ long absolute; /* the absolute value of number */ Char acharacter; /* the character to be returned */ } ; /* ******************************************************************** */ /* NESTED PROCEDURES */ Local Void digit(LINK) struct LOC_numberdigit *LINK; { /* extract a digit at the place position */ long tenplace; /* ten times place */ long z; /* an intermediate value */ long d; /* the digit extracted */ tenplace = LINK->place * 10; z = LINK->absolute - LINK->absolute / tenplace * tenplace; if (LINK->place == 1) d = z; else d = z / LINK->place; switch (d) { case 0: LINK->acharacter = '0'; break; case 1: LINK->acharacter = '1'; break; case 2: LINK->acharacter = '2'; break; case 3: LINK->acharacter = '3'; break; case 4: LINK->acharacter = '4'; break; case 5: LINK->acharacter = '5'; break; case 6: LINK->acharacter = '6'; break; case 7: LINK->acharacter = '7'; break; case 8: LINK->acharacter = '8'; break; case 9: LINK->acharacter = '9'; break; } } /* digit */ Local Void sign(LINK) struct LOC_numberdigit *LINK; { /* put a negative sign out or a positive sign */ if (LINK->number < 0) LINK->acharacter = '-'; else LINK->acharacter = '+'; } /* sign */ /* end module book.getbase version = 'delmod 6.65 94 sep 5 tds/gds' */ /* ************************************************************************ */ /* ************************************************************************ */ /* begin module package.numbar */ /* ************************************************************************ */ /* begin module numberdigit */ Static Char numberdigit(number_, logplace) long number_, logplace; { /* return the digit at the place value ('logplace') position of number. example: numberdigit(13625, 3) = 3 numberdigit(13625, 4) = 1 */ struct LOC_numberdigit V; long count; /* used to make place */ /* END NESTED PROCEDURES */ /* ******************************************************************** */ V.number = number_; V.place = 1; for (count = 1; count <= logplace; count++) V.place *= 10; if (V.number == 0) { if (V.place == 1) V.acharacter = '0'; else V.acharacter = ' '; return V.acharacter; } V.absolute = labs(V.number); if (V.absolute < V.place / 10) { V.acharacter = ' '; return V.acharacter; } if (V.absolute >= V.place) digit(&V); else sign(&V); return V.acharacter; } /* numberdigit */ #define ln10 2.30259 /* natural log of 10 - for conversion to log base 10 */ #define epsilon 0.00001 /* a small number to correct log base 10 errors */ /* end module numberdigit version = 'prgmod 3.96 85 mar 18 tds'; */ /* begin module numbersize */ Static long numbersize(n) long n; { /* calculate amount of space to be reserved for the integer n */ if (n == 0) return 1; else { return ((long)(log((double)labs(n)) / ln10 + epsilon) + 2); /* the 2 is for the sign and last digit */ /* the epsilon assures that we do not lose a place due to roundoff. eg, sometimes log base 10 of 10 would be 0.9999 instead of 1, and we would not do it right... note: this will fail for very large numbers on the order of 1/epsilon. */ } } /* numbersize */ #undef ln10 #undef epsilon /* end module numbersize version = 'prgmod 3.96 85 mar 18 tds'; */ /* begin module numberbar */ Static Void numberbar(afile, spaces, firstnumber, lastnumber, linesused) _TEXT *afile; long spaces, firstnumber, lastnumber, *linesused; { /* write a bar of numbers to a file, with several spaces before. the number of lines used is returned */ long logplace; /* the log of the digit being looked at */ long spacecount; /* count of spaces */ long number; /* the current number being written */ if (labs(firstnumber) > labs(lastnumber)) *linesused = numbersize(firstnumber); else *linesused = numbersize(lastnumber); for (logplace = *linesused - 1; logplace >= 0; logplace--) { for (spacecount = 1; spacecount <= spaces; spacecount++) putc(' ', afile->f); for (number = firstnumber; number <= lastnumber; number++) fputc(numberdigit(number, logplace), afile->f); putc('\n', afile->f); } } /* end module numberbar version = 'prgmod 3.96 85 mar 18 tds'; */ /* ************************************************************************ */ /* end module package.numbar version = 1.11; */ /* ************************************************************************ */ /* ************************************************************************ */ /* begin module writepiename */ Static Void writepiename(thefile, pie, namespace) _TEXT *thefile; piece *pie; long namespace; { /* write the piece name to the file, left justified in a field of namespace */ long index; /* index to the field */ for (index = 1; index <= namespace; index++) { if (index <= pie->key.hea.keynam.length) putc(pie->key.hea.keynam.letters[index-1], thefile->f); else putc(' ', thefile->f); } } #define namespace 10 /* the width of the field to write the name in */ #define width 10 /* the width of the field to write coordinates in */ /* end module writepiename */ /* ************************************************************************ */ /* ************************************************************************ */ /* begin module seqalign.writeseqline */ Static Void writeseqline(fout, apiece, length, alignedbase, fromparam, toparam, number) _TEXT *fout; piece *apiece; long length, alignedbase, fromparam, toparam, number; { /* write the sequence number, locus name, and sequence to the fout file */ long index; /* a loop control variable */ /* write the number of the sequence */ fprintf(fout->f, "%5ld ", number); /* write the locus name of the sequence */ writepiename(fout, apiece, (long)namespace); putc(' ', fout->f); /* write the starting coordinate of the sequence */ fprintf(fout->f, " %*ld ", width, inttopie(1L, apiece)); /* write the sequence */ for (index = fromparam; index <= toparam; index++) { if (withinalignment(index, alignedbase, length)) fputc(basetochar(getbase(index + alignedbase, apiece)), fout->f); else putc(' ', fout->f); } /* write the ending coordinate of the sequence */ fprintf(fout->f, " %ld\n", inttopie(toparam - fromparam + 1, apiece)); /* add the carriage return at the end of the line */ } #undef namespace #undef width /* end module seqalign.writeseqline */ /* begin module seqalign.dosequence */ Static Void dosequence(apiece, book, inst, piecenumber, alignedbase, length) piece **apiece; _TEXT *book, *inst; long piecenumber, *alignedbase, *length; { /* find the piece corresponding to piecenumber and return it in apiece. */ boolean foundpiece = false; /* did we locate the number we were supposed to? */ long linenumber; /* number of the line down the page we are on */ /* reset the files to make sure that they match */ if (*book->name != '\0') { if (book->f != NULL) book->f = freopen(book->name, "r", book->f); else book->f = fopen(book->name, "r"); } else rewind(book->f); if (book->f == NULL) _EscIO2(FileNotFound, book->name); RESETBUF(book->f, Char); if (*inst->name != '\0') { if (inst->f != NULL) inst->f = freopen(inst->name, "r", inst->f); else inst->f = fopen(inst->name, "r"); } else rewind(inst->f); if (inst->f == NULL) _EscIO2(FileNotFound, inst->name); RESETBUF(inst->f, Char); /* search the book for the sequence piecenumber, and write it out */ while (!foundpiece) { clearpiece(apiece); /* clear the piece for reuse */ align(inst, book, apiece, length, alignedbase); if (BUFEOF(book->f)) continue; if (numbered) { if (number != piecenumber) linenumber++; else foundpiece = true; continue; } if (false) break; if (piecenumber > number || piecenumber < 1) { fprintf(clumps.f, "The requested sequence is not in book range.\n"); foundpiece = true; } } } /* end module seqalign.dosequence */ /* ************************************************************************ */ /* ************************************************************************ */ /* begin module cluster.readparameters */ Static Void readparameters(clusterp) _TEXT *clusterp; { /* read the parameters from clusterp */ Char letter1; /* a character on the line we are on */ if (*clusterp->name != '\0') { if (clusterp->f != NULL) clusterp->f = freopen(clusterp->name, "r", clusterp->f); else clusterp->f = fopen(clusterp->name, "r"); } else rewind(clusterp->f); if (clusterp->f == NULL) _EscIO2(FileNotFound, clusterp->name); RESETBUF(clusterp->f, Char); if (!BUFEOF(clusterp->f)) { fscanf(clusterp->f, "%c%*[^\n]", &letter1); getc(clusterp->f); if (letter1 == '\n') letter1 = ' '; if (letter1 == 'y') debugging = true; else debugging = false; fscanf(clusterp->f, "%c%*[^\n]", &letter1); getc(clusterp->f); if (letter1 == '\n') { /* read the showfragments parameter (second line) */ letter1 = ' '; } if (letter1 == 'y') showfragments = true; else showfragments = false; fscanf(clusterp->f, "%c%*[^\n]", &letter1); getc(clusterp->f); if (letter1 == '\n') { /* read the writepairs parameter (third line) */ letter1 = ' '; } if (letter1 == 'y') writepairs = true; else writepairs = false; fscanf(clusterp->f, "%c%*[^\n]", &letter1); getc(clusterp->f); if (letter1 == '\n') { /* read the writeclumps parameter (fourth line) */ letter1 = ' '; } if (letter1 == 'y') writeclumps = true; else writeclumps = false; /* read the matchparameter (fifth line) */ fscanf(clusterp->f, "%ld%*[^\n]", &matchparameter); getc(clusterp->f); return; } /* read the debugging parameter (first line) */ printf("The parameter file clusterp is empty.\n"); printf("Program halt.\n"); halt(); } /* Local variables for writeparameters: */ struct LOC_writeparameters { _TEXT *fout; } ; Local Void onoff(toggle, LINK) boolean toggle; struct LOC_writeparameters *LINK; { /* add the on or off to the parameter line */ if (toggle) fprintf(LINK->fout->f, "on\n"); else fprintf(LINK->fout->f, "off\n"); } /* onoff */ /* end module cluster.readparameters */ /* begin module cluster.writeparameters */ Static Void writeparameters(fout_) _TEXT *fout_; { /* write the parameter conditions to the fout file */ struct LOC_writeparameters V; /* write the condition of the debugging parameter */ V.fout = fout_; fprintf(V.fout->f, "* debugging "); onoff(debugging, &V); /* write the condition of the showfragments parameter */ fprintf(V.fout->f, "* showfragments "); onoff(showfragments, &V); /* write the condition of the writepairs parameter */ fprintf(V.fout->f, "* writepairs "); onoff(writepairs, &V); /* write the condition of the writeclumps parameter */ fprintf(V.fout->f, "* writeclumps "); onoff(writeclumps, &V); /* write the number of the matchparameter */ if (matchparameter < 0) matchparameter = 0; fprintf(V.fout->f, "* matchparameter is set at %ld\n\n", matchparameter); /* add some spaces to set them apart from the cluster output */ } /* writeparameters */ /* end module cluster.writeparameters */ /* ************************************************************************ */ /* ************************************************************************ */ /* begin module cluster.getwindowsize */ Static Void getwindowsize(fin, windowsize) _TEXT *fin; long *windowsize; { /* read the windowsize from the indana subindex for later use */ Char acharacter; /* a character in the fin file */ /* move to the windowsize number */ do { acharacter = getc(fin->f); if (acharacter == '\n') acharacter = ' '; } while (acharacter != '['); /* read it */ fscanf(fin->f, "%ld", windowsize); } /* end module cluster.getwindowsize */ /* begin module cluster.filehead */ Static Void filehead(fin, fout) _TEXT *fin, *fout; { /* place the heading lines on the fout file */ long copynumber = 12; /* the number of lines being copied from the text */ if (*fout->name != '\0') { if (fout->f != NULL) fout->f = freopen(fout->name, "w", fout->f); else fout->f = fopen(fout->name, "w"); } else { if (fout->f != NULL) rewind(fout->f); else fout->f = tmpfile(); } if (fout->f == NULL) _EscIO2(FileNotFound, fout->name); SETUPBUF(fout->f, Char); /* write the name and version of the program */ fprintf(fout->f, "cluster %4.2f\n", version); /* copy the identification lines to the fout */ if (*fin->name != '\0') { if (fin->f != NULL) fin->f = freopen(fin->name, "r", fin->f); else fin->f = fopen(fin->name, "r"); } else rewind(fin->f); if (fin->f == NULL) _EscIO2(FileNotFound, fin->name); RESETBUF(fin->f, Char); if (copylines(fin, fout, copynumber) != copynumber) { fprintf(fout->f, "subind does not have enough header lines\n"); halt(); } /* skip the next line of fin to avoid the headings of the subindex data */ fscanf(fin->f, "%*[^\n]"); getc(fin->f); } /* end module cluster.filehead */ /* begin module cluster.readdata */ Static Void readdata(fin, seqnumber, coord, lastchar) _TEXT *fin; long *seqnumber, *coord; Char *lastchar; { /* reads the necessary data elements from a line in the subind file */ /*writeln(output,'in readdata');*/ /*copyaline(fin,output);*/ /* skip data to the sequence number */ skipblanks(fin); /*zzz*/ /*writeln(output,'about to read this character: ',fin^);*/ skipnonblanks(fin); skipblanks(fin); /* read the number */ /*writeln(output,'about to read this character: ',fin^);*/ fscanf(fin->f, "%ld", seqnumber); fscanf(fin->f, "%ld", coord); /* move to and get the last character of the line */ while (!P_eoln(fin->f)) { *lastchar = getc(fin->f); if (*lastchar == '\n') *lastchar = ' '; } /* put in a carriage return to move to the next line */ fscanf(fin->f, "%*[^\n]"); getc(fin->f); } /* end module cluster.readdata */ /* ********************************************************************* */ /* ********************************************************************* */ /* begin module cluster.clearclump */ Static Void clearclump(a_) clump **a_; { /* initialize the variables in a clump record */ clump *WITH; WITH = *a_; WITH->piecelist = NULL; WITH->pairlist = NULL; WITH->nextclump = NULL; } /* end module cluster.clearclump */ /* begin module cluster.clearpair */ Static Void clearpair(a_) numberpair **a_; { /* initialize the variables in a pair record */ numberpair *WITH; WITH = *a_; WITH->leftpiece = 0; WITH->rightpiece = 0; WITH->leftseq = NULL; WITH->rightseq = NULL; WITH->seqbag = NULL; WITH->nextpair = NULL; } /* end module cluster.clearpair */ /* begin module cluster.clearbag */ Static Void clearbag(a_) bag **a_; { /* initialize the variables in a bag record */ bag *WITH; WITH = *a_; WITH->leftcoord = 0; WITH->rightcoord = 0; WITH->leftmatch = false; WITH->rightmatch = false; WITH->nextbag = NULL; } /* end module cluster.clearbag */ /* begin module cluster.clearentry */ Static Void clearentry(a_) entries **a_; { /* initialize the variables in an entry record */ entries *WITH; WITH = *a_; WITH->piece_ = 0; number = 0; WITH->coordinate = 0; WITH->nextentry = NULL; } /* end module cluster.clearentry */ /* begin module cluster.clearrange */ Static Void clearrange(a_) rangerecord **a_; { /* initialize the variables of a range record */ rangerecord *WITH; WITH = *a_; number = 0; WITH->singlescount = 0; WITH->nextrange = NULL; } /* end module cluster.clearrange */ /* begin module cluster.clearnumberlist */ Static Void clearnumberlist(a_) numberlist **a_; { /* initialize the variables in a sequence record */ numberlist *WITH; WITH = *a_; WITH->number = 0; WITH->nextlist = NULL; } /* end module cluster.clearnumberlist */ /* begin module cluster.getclump */ Static Void getclump(a_) clump **a_; { /* gets a clump off the list of free clump records */ if (freeclump == NULL) *a_ = (clump *)Malloc(sizeof(clump)); else { *a_ = freeclump; freeclump = freeclump->nextclump; } clearclump(a_); } /* end module cluster.getclump */ /* begin module cluster.getpair */ Static Void getpair(a_) numberpair **a_; { /* gets a pair off the list of free pair records */ if (freepair == NULL) *a_ = (numberpair *)Malloc(sizeof(numberpair)); else { *a_ = freepair; freepair = freepair->nextpair; } clearpair(a_); } /* end module cluster.getpair */ /* begin module cluster.getbag */ Static Void getbag(a_) bag **a_; { /* gets a bag off the list of free bag records */ if (freebag == NULL) *a_ = (bag *)Malloc(sizeof(bag)); else { *a_ = freebag; freebag = freebag->nextbag; } clearbag(a_); } /* end module cluster.getbag */ /* begin module cluster.getentry */ Static Void getentry(a_) entries **a_; { /* gets an entry off the list of free entry records */ if (freeentry == NULL) *a_ = (entries *)Malloc(sizeof(entries)); else { *a_ = freeentry; freeentry = freeentry->nextentry; } clearentry(a_); } /* end module cluster.getentry */ /* begin module cluster.getnumberlist */ Static Void getnumberlist(a_) numberlist **a_; { /* gets a sequence off the list of free sequence records */ if (freesequence == NULL) *a_ = (numberlist *)Malloc(sizeof(numberlist)); else { *a_ = freesequence; freesequence = freesequence->nextlist; } clearnumberlist(a_); } /* end module cluster.getnumberlist */ /* begin module putclump */ Static Void putclump(a_) clump *a_; { /* place a clump on the list of free pair records */ clearclump(&a_); a_->nextclump = freeclump; freeclump = a_; } /* end module putclump */ /* begin module putpair */ Static Void putpair(a_) numberpair *a_; { /* place a pair on the list of free pair records */ clearpair(&a_); a_->nextpair = freepair; freepair = a_; } /* end module putpair */ /* begin module putbag */ Static Void putbag(a_) bag *a_; { /* place a bag on the list of free bag records */ clearbag(&a_); a_->nextbag = freebag; freebag = a_; } /* end module putbag */ /* begin module putentry */ Static Void putentry(a_) entries *a_; { /* places an entry on the list of free entry records */ clearentry(&a_); a_->nextentry = freeentry; freeentry = a_; } /* end module putentry */ /* begin module putnumberlist */ Static Void putnumberlist(a_) numberlist *a_; { /* places a sequence on the list of free sequence records */ clearnumberlist(&a_); a_->nextlist = freesequence; freesequence = a_; } /* end module putnumberlist */ /* ************************************************************************ */ /* ************************************************************************ */ /* begin module cluster.findranges */ Static long findranges(instnumber, list) long instnumber; rangerecord *list; { /* returns the piecenumber that corresponds to instnumber */ long Result; rangerecord *current = list; /* the current pointer position on the list */ boolean foundpiecenumber = false; /* are we in the right range? */ long instcounter = 0; /* a running count of instruction numbers */ long piececounter = 1; /* a running count of piece numbers */ do { if (instnumber <= current->instnumber) { foundpiecenumber = true; if (instnumber > instcounter + current->singlescount) Result = piececounter; else Result = piececounter - current->singlescount + instnumber - instcounter - 1; } else { instcounter = current->instnumber; current = current->nextrange; if (current != NULL) piececounter += current->singlescount + 1; else printf("(would have bombed!)\n"); } } while (!(foundpiecenumber || current == NULL)); return Result; } /* end module cluster.findranges */ /* begin module cluster.addranges */ Static Void addrange(number, piecenumber, numberlist_) long number, piecenumber; rangerecord **numberlist_; { /* add a piecenumber and the singlesnumber to the numberlist */ rangerecord *current; /* the pointer position before next */ rangerecord *last; /* the pointer position before current */ rangerecord *next; /* the working position on the numberlist */ long piececounter = 0; /* keeps track of the piecenumber */ /* if the list is nil make a new one */ if (*numberlist_ == NULL) { *numberlist_ = (rangerecord *)Malloc(sizeof(rangerecord)); clearrange(numberlist_); (*numberlist_)->instnumber = number; return; } /* initalize the counter and the pointers */ last = *numberlist_; current = *numberlist_; next = *numberlist_; /* get to the end of the list */ while (next != NULL) { last = current; current = next; piececounter += next->singlescount + 1; next = next->nextrange; } /* decide if we need a new range or not */ if (piecenumber == piececounter) { current->instnumber = number; return; } if (number - last->instnumber - current->singlescount == 2) { current->instnumber = number; current->singlescount++; return; } next = (rangerecord *)Malloc(sizeof(rangerecord)); clearrange(&next); next->instnumber = number; current->nextrange = next; } /* end module cluster.addranges */ /* begin module cluster.createrangelist */ Static Void createrangelist(book, numberlist_, apiece) _TEXT *book; rangerecord **numberlist_; piece *apiece; { /* generate the list of instruction-piece relationships */ name lastname; /* the name before the one we are on */ long piececounter = 0; /* a running counter of the piece we are on */ long piecenumber = 0; /* the number of the piece we are on */ /* initialize the variables */ if (*book->name != '\0') { if (book->f != NULL) book->f = freopen(book->name, "r", book->f); else book->f = fopen(book->name, "r"); } else rewind(book->f); if (book->f == NULL) _EscIO2(FileNotFound, book->name); RESETBUF(book->f, Char); /* go through the entire book */ while (!BUFEOF(book->f)) { /* get piece number and name */ getpiece(book, &apiece); if (BUFEOF(book->f)) break; /* keep track of the piece number */ if (strncmp(apiece->key.hea.keynam.letters, lastname.letters, sizeof(alpha))) piecenumber++; /* insert the new stuff */ addrange(number, piecenumber, numberlist_); lastname = apiece->key.hea.keynam; clearpiece(&apiece); /* clear the piece for reuse */ } } /* end module cluster.createrangelist */ /* ************************************************************************ */ /* ************************************************************************ */ /* begin module cluster.overmatched */ Static boolean overmatched(currentpair, totallength, matchlength) numberpair *currentpair; long totallength, matchlength; { /* find the maximum # of matches and check to see if it has been exceeded */ long count = 0; /* the number of matches in the bag */ bag *currentbag; /* the pointer to the currentbag */ numberlist *index; /* an index pointer to traverse the instlists */ long insttotal = 0; /* number of instructions on the pair instlists */ long matchesperinst; /* maximum number of amtches per instruction */ long maxmatch; /* the maximum number of possible matches */ bag *WITH; /* initialixe some variables */ /* go through the instlists and count the number of instructions present */ index = currentpair->leftseq; while (index != NULL) { insttotal++; index = index->nextlist; } /* find the maximum number of matches */ if (matchparameter > 0) matchesperinst = matchparameter; else matchesperinst = totallength / matchlength; maxmatch = insttotal * matchesperinst; /* count the number of elements in the pair bag */ currentbag = currentpair->seqbag; while (currentbag != NULL) { WITH = currentbag; if (WITH->nextbag != NULL) { if (WITH->rightmatch && WITH->nextbag->leftmatch) count++; } if (!(WITH->leftmatch || WITH->rightmatch)) count++; currentbag = currentbag->nextbag; } return (count > maxmatch); } /* end module cluster.overmatched */ /* begin module cluster.emptybag */ Static Void emptybag(baglist) bag **baglist; { /* remove all coordinates from pair^.seqbag */ bag *trashbag; /* the pointer to the bag being thrown away */ while (*baglist != NULL) { trashbag = *baglist; *baglist = (*baglist)->nextbag; trashbag->nextbag = NULL; putbag(trashbag); } } /* end module cluster.emptybag */ /* ********************************************************************* */ /* ********************************************************************* */ /* begin module cluster.showRAWbag */ Static Void showRAWbag(fout, pairs) _TEXT *fout; numberpair *pairs; { /* show the linked bag bagview. This shows the data in with a set of coordinate pairs for the corresponding sequences. */ bag *bagview; /* pointer to one of the bag contents */ long leftprev = 0; /* storage variable of the previous left coordinate */ long rightprev = 0; /* storage variable of the previous right coordinate */ bag *WITH; /* initialize the variables */ bagview = pairs->seqbag; while (bagview != NULL) { WITH = bagview; /* write the coordinates and match patterns */ if (WITH->leftmatch) fprintf(fout->f, " L "); else fprintf(fout->f, " - "); fprintf(fout->f, " (%5ld,%5ld) ", WITH->leftcoord, WITH->rightcoord); if (WITH->rightmatch) fprintf(fout->f, " R "); else fprintf(fout->f, " - "); /* write the length of the duplicate sequences */ if (leftprev != 0) fprintf(fout->f, " {%ld} ", WITH->leftcoord - leftprev + 1); if (rightprev != 0) fprintf(fout->f, " {%ld} ", WITH->rightcoord - rightprev + 1); putc('\n', fout->f); /* advance the variables */ bagview = bagview->nextbag; leftprev = WITH->leftcoord; rightprev = WITH->rightcoord; } } /* end module cluster.showRAWbag */ /* begin module cluster.multi */ Static Void multi(f, c_, n) _TEXT *f; Char c_; long n; { /* write out n characters c to file f */ long i; /* index */ for (i = 1; i <= n; i++) putc(c_, f->f); } #define width 10 /* number of characters per integer */ /* Local variables for showbag: */ struct LOC_showbag { _TEXT *fout; numberpair *pairs; bag *abag; /* current content of the bag */ long half; /* half of the width constant */ } ; /* ********************************************************************* */ /* NESTED PROCEDURES */ Local Void bar(LINK) struct LOC_showbag *LINK; { /* write a bar to the fout of the width of the bag */ bag *WITH; LINK->abag = LINK->pairs->seqbag; while (LINK->abag != NULL) { /* one width per coordinate pair */ /* cover the case of a single bag element */ WITH = LINK->abag; if (!(WITH->leftmatch || WITH->rightmatch)) multi(LINK->fout, '*', (long)width); multi(LINK->fout, '*', (long)width); LINK->abag = LINK->abag->nextbag; } multi(LINK->fout, '*', LINK->half); /* a half-width offset for spacing */ putc('*', LINK->fout->f); /* one more for the | mark after the seq #'s */ } Local Void writemid(afile, number, LINK) _TEXT *afile; long number; struct LOC_showbag *LINK; { /* write the number number to file afile, so that the number is in the middle of width characters */ long half; /* the space before and after the number */ long numdigits; /* number of characters in the number */ if (number != 0) numdigits = (long)(log((double)labs(number)) / log(10.0)) + 1; /* log10(n) + 1 */ else numdigits = 1; /* case of n = 0 */ if (number < 0) /* add a - sign */ numdigits++; half = (long)((width - numdigits) / 2.0); /* find half the space */ multi(afile, ' ', half); /* SPACES BEFORE the number */ fprintf(afile->f, "%*ld", (int)numdigits, number); /* the NUMBER ITSELF */ multi(afile, ' ', half); /* SPACES AFTER the number */ /* trailing spaces */ multi(afile, ' ', width - half * 2 - numdigits); } /* end module cluster.multi */ /* begin module cluster.showbag */ Static Void showbag(fout_, windowsize, pairs_, apiece) _TEXT *fout_; long windowsize; numberpair *pairs_; piece *apiece; { /* show the linked bag bagview in pairs */ struct LOC_showbag V; long alignedbase; /* the aligned base of the piece */ long leftprev; /* left coordinate of previous bag */ long length; /* the length of the sequence of the piece */ long rightprev; /* right coordinate of previous bag */ long seqlength = 0; /* the real length of the sequence in the fout */ bag *WITH; /* end NESTED PROCEDURES */ /* ********************************************************************* */ V.fout = fout_; V.pairs = pairs_; /* p2c: cluster.p: Note: Eliminated unused assignment statement [338] */ /* p2c: cluster.p: Note: Eliminated unused assignment statement [338] */ V.half = width / 2; /* a half-width to use as an offset */ bar(&V); /* makes the bar at the top of every sequence pair */ multi(V.fout, '*', 11L); putc('\n', V.fout->f); /* do the top line of sequence coordinates */ multi(V.fout, ' ', (long)namelength); putc('|', V.fout->f); V.abag = V.pairs->seqbag; while (V.abag != NULL) { WITH = V.abag; if (WITH->rightmatch) writemid(V.fout, V.abag->leftcoord, &V); if (WITH->nextbag != NULL) { if (WITH->nextbag->leftmatch) { seqlength = windowsize + WITH->nextbag->leftcoord - WITH->leftcoord; writemid(V.fout, WITH->leftcoord + seqlength - 1, &V); } } /* check to see if it is an unpaired bag element */ /* (not leftmatch) AND (not rightmatch) */ if (!(WITH->leftmatch || WITH->rightmatch)) { writemid(V.fout, V.abag->leftcoord, &V); seqlength = windowsize; writemid(V.fout, WITH->leftcoord + seqlength - 1, &V); } V.abag = V.abag->nextbag; } putc('\n', V.fout->f); /* do the first sequence number and the cluster difference */ dosequence(&apiece, &book, &inst, V.pairs->leftseq->number, &alignedbase, &length); fprintf(V.fout->f, "%.*s", namelength, apiece->key.hea.keynam.letters); putc('|', V.fout->f); /* bar after sequence numbers */ multi(V.fout, ' ', V.half); V.abag = V.pairs->seqbag; while (V.abag != NULL) { WITH = V.abag; if (WITH->leftmatch) { seqlength = windowsize + WITH->leftcoord - leftprev; multi(V.fout, '-', (long)width); } else if (leftprev != 0) writemid(V.fout, WITH->leftcoord - leftprev - seqlength, &V); /* check to see if it is an unpaired bag element */ /* (not leftmatch) AND (not rightmatch) */ if (!(WITH->leftmatch || WITH->rightmatch)) { multi(V.fout, '-', (long)width); if (WITH->nextbag != NULL) writemid(V.fout, WITH->nextbag->leftcoord - WITH->leftcoord - windowsize, &V); } if (WITH->rightmatch) leftprev = WITH->leftcoord; V.abag = V.abag->nextbag; } putc('\n', V.fout->f); /* do the middle */ multi(V.fout, ' ', (long)namelength); putc('|', V.fout->f); /* bar after sequence blanks */ multi(V.fout, ' ', V.half); leftprev = 0; if (V.pairs->seqbag == NULL) fprintf(V.fout->f, " WARNING: sequence pairs have too many matches."); V.abag = V.pairs->seqbag; while (V.abag != NULL) { WITH = V.abag; if (WITH->leftmatch) { seqlength = windowsize + WITH->rightcoord - rightprev; writemid(V.fout, seqlength, &V); } else if (leftprev != 0) multi(V.fout, ' ', (long)width); /* check to see if it is a single bag element */ /* (not leftmatch) AND (not rightmatch) */ if (!(WITH->leftmatch || WITH->rightmatch)) { seqlength = windowsize; writemid(V.fout, seqlength, &V); } /* an error checker that shouldn't function unless there is a BIG bug */ if (WITH->leftcoord - leftprev != WITH->rightcoord - rightprev && WITH->leftmatch) { fprintf(V.fout->f, "(!"); fprintf(V.fout->f, " %*ld", width, WITH->leftcoord - leftprev + 1); fprintf(V.fout->f, " %*ld", width, WITH->rightcoord - rightprev + 1); fprintf(V.fout->f, "!)"); } leftprev = WITH->leftcoord; rightprev = WITH->rightcoord; V.abag = V.abag->nextbag; } putc('\n', V.fout->f); /* do the second sequence number and cluster difference */ dosequence(&apiece, &book, &inst, V.pairs->rightseq->number, &alignedbase, &length); fprintf(V.fout->f, "%.*s", namelength, apiece->key.hea.keynam.letters); putc('|', V.fout->f); /* bar after sequence numbers */ multi(V.fout, ' ', V.half); rightprev = 0; V.abag = V.pairs->seqbag; while (V.abag != NULL) { WITH = V.abag; if (WITH->leftmatch) { seqlength = windowsize + WITH->rightcoord - rightprev; multi(V.fout, '-', (long)width); } else if (rightprev != 0) writemid(V.fout, WITH->rightcoord - rightprev - seqlength, &V); /* check to see if it is an unpaired bag element */ /* (not leftmatch) AND (not rightmatch) */ if (!(WITH->leftmatch || WITH->rightmatch)) { multi(V.fout, '-', (long)width); if (WITH->nextbag != NULL) writemid(V.fout, WITH->nextbag->rightcoord - WITH->rightcoord - windowsize, &V); } if (WITH->rightmatch) rightprev = WITH->rightcoord; V.abag = V.abag->nextbag; } putc('\n', V.fout->f); /* do the bottom line of sequence coordinates */ multi(V.fout, ' ', (long)namelength); putc('|', V.fout->f); V.abag = V.pairs->seqbag; while (V.abag != NULL) { WITH = V.abag; if (WITH->rightmatch) writemid(V.fout, WITH->rightcoord, &V); if (WITH->nextbag != NULL) { if (WITH->nextbag->leftmatch) { seqlength = windowsize + WITH->nextbag->rightcoord - WITH->rightcoord; writemid(V.fout, WITH->rightcoord + seqlength - 1, &V); } } /* check to see if it is an unpaired bag element */ /* (not leftmatch) AND (not rightmatch) */ if (!(WITH->leftmatch || WITH->rightmatch)) { writemid(V.fout, WITH->rightcoord, &V); seqlength = windowsize; writemid(V.fout, WITH->rightcoord + seqlength - 1, &V); } V.abag = V.abag->nextbag; } putc('\n', V.fout->f); bar(&V); /* write the bar that signifies the end of the entry */ multi(V.fout, '*', 11L); fprintf(V.fout->f, "\n\n"); } #undef width /* end module cluster.showbag */ /* begin module cluster.showlist */ Static Void showlist(fout, windowsize, totallength, rootpair, apiece) _TEXT *fout; long windowsize, totallength; numberpair **rootpair; piece *apiece; { /* show the linked list pairs using the windowsize from getwindowsize */ numberpair *pair; /* the current pair being done in the list */ numberpair *WITH; pair = *rootpair; while (pair != NULL) { WITH = pair; if ((!showfragments) & overmatched(pair, totallength, windowsize)) emptybag(&pair->seqbag); if (debugging) showRAWbag(fout, pair); showbag(fout, windowsize, pair, apiece); pair = pair->nextpair; } } /* end module cluster.showlist */ /* begin module cluster.printsequences */ Static Void printsequences(apiece, book, inst, fout, alignedbase, length, fromparam, toparam, list1, list2) piece *apiece; _TEXT *book, *inst, *fout; long *alignedbase, *length, *fromparam, *toparam; numberlist *list1, *list2; { /* get sequences from the book that match the instnumbers in list1 and list2 */ numberlist *current1 = list1; /* the pointer to the instructions on list1 */ numberlist *current2 = list2; /* the pointer to the instructions on list2 */ /* write the sequences corresponding to the instruction number pairs */ while (current1 != NULL) { dosequence(&apiece, book, inst, current1->number, alignedbase, length); writeseqline(fout, apiece, *length, *alignedbase, *fromparam, *toparam, current1->number); dosequence(&apiece, book, inst, current2->number, alignedbase, length); writeseqline(fout, apiece, *length, *alignedbase, *fromparam, *toparam, current2->number); /* the alignedbase plus 28 spaces for the instnumber, piecename, starting coordinate, and the spaces between them */ multi(fout, ' ', *alignedbase + 28); fprintf(fout->f, "^\n\n"); current1 = current1->nextlist; current2 = current2->nextlist; } } /* end module cluster.printsequences */ /* begin module cluster.showclumpedlist */ Static Void showclumpedlist(fout, clumproot, windowsize, totallength, fromparam, toparam, apiece) _TEXT *fout; clump **clumproot; long windowsize, totallength, fromparam, toparam; piece *apiece; { /* show the clumped listing in clumproot to file fout using the windowsize from getwindowsize */ long alignedbase; /* the aligned base of the piece of dosequence */ clump *clump_; /* the current position in the clumplist */ numberlist *clumpseq; /* the sequences in a given clump */ numberpair *clumppair; /* the pairs in a given clump */ long length; /* the length of the sequences found by dosequence */ clump *WITH; clump_ = *clumproot; while (clump_ != NULL) { WITH = clump_; /* print the beginning dividing marks */ multi(fout, '#', 70L); putc('\n', fout->f); multi(fout, '#', 70L); fprintf(fout->f, "\nThe related pieces in this clump are: \n"); /* print the list of sequences contained in the clump */ clumpseq = WITH->piecelist; while (clumpseq != NULL) { fprintf(fout->f, "%ld", clumpseq->number); if (clumpseq->nextlist != NULL) putc(',', fout->f); clumpseq = clumpseq->nextlist; } fprintf(fout->f, "\nTheir pairings are as follows: \n\n"); /* print the pairs in the clump */ clumppair = clump_->pairlist; while (clumppair != NULL) { if ((!showfragments) & overmatched(clumppair, totallength, windowsize)) emptybag(&clumppair->seqbag); showbag(fout, windowsize, clumppair, apiece); /* write the sequences corresponding to the instruction number pairs */ printsequences(apiece, &book, &inst, fout, &alignedbase, &length, &fromparam, &toparam, clumppair->leftseq, clumppair->rightseq); clumppair = clumppair->nextpair; } /* print the ending dividing marks */ putc('\n', fout->f); multi(fout, '#', 70L); putc('\n', fout->f); multi(fout, '#', 70L); fprintf(fout->f, "\n\n\n"); /* go to the next clump */ clump_ = clump_->nextclump; } } /* end module cluster.showclumpedlist */ /* begin module cluster.fullinsert */ Static Void fullinsert(pair, clumproot) numberpair **pair; clump **clumproot; { /* make a new clump, stick it on the list, and place a pair's worth of data in it (two piece numbers on the piecelist and a pair on the pairlist) */ clump *newclump; /* a pointer to a clump */ numberlist *newpiece; /* a pointer to a sequence */ /* make a new clump */ getclump(&newclump); /* place the instnumbers on the piecelist */ getnumberlist(&newpiece); getnumberlist(&newpiece->nextlist); newpiece->number = (*pair)->leftpiece; newpiece->nextlist->number = (*pair)->rightpiece; newclump->piecelist = newpiece; /* place the pair on the clump */ newclump->pairlist = *pair; /* place the new clump on the clumplist */ newclump->nextclump = *clumproot; *clumproot = newclump; } /* end module cluster.fullinsert */ /* begin module cluster.insertclump */ Static Void insertclump(pair, currentclump, leftfound, rightfound) numberpair **pair; clump **currentclump; boolean leftfound, rightfound; { /* insert a sequence number and its pair on an already existing clump */ numberlist *newpiece; /* the new sequence being introduced */ /* if one of the sequences is not on the list, add it on */ if (leftfound == rightfound) return; /* add the piecenumber to the piece list */ getnumberlist(&newpiece); if (leftfound) newpiece->number = (*pair)->rightpiece; if (rightfound) newpiece->number = (*pair)->leftpiece; newpiece->nextlist = (*currentclump)->piecelist; (*currentclump)->piecelist = newpiece; /* insert the pair onto the clump pairlist */ (*pair)->nextpair = (*currentclump)->pairlist; (*currentclump)->pairlist = *pair; } /* end module cluster.insertclump */ /* begin module cluster.clumppairs */ Static Void clumppairs(pair, root) numberpair **pair; clump **root; { /* take a pair from the pairlist and put it into its spot on the clumplist */ boolean clumpdone; /* have we finished checking the clumps? */ clump *currentclump; /* a pointer to a clump */ numberlist *currentpiece; /* a pointer to a sequence */ boolean leftfound = false; /* does the pair leftsequence have a match? */ boolean rightfound = false; /* does the pair rightsequence have a match? */ boolean piecedone; /* have we finished checking the sequences? */ numberlist *WITH; currentclump = *root; clumpdone = (*root == NULL); while (!clumpdone) { leftfound = false; rightfound = false; piecedone = false; currentpiece = currentclump->piecelist; while (!piecedone) { WITH = currentpiece; if ((*pair)->leftpiece == WITH->number) leftfound = true; if ((*pair)->rightpiece == WITH->number) rightfound = true; piecedone = (WITH->nextlist == NULL || leftfound && rightfound); if (!piecedone) currentpiece = currentpiece->nextlist; } insertclump(pair, ¤tclump, leftfound, rightfound); currentclump = currentclump->nextclump; clumpdone = (currentclump == NULL || leftfound || rightfound); } if (currentclump == NULL && !(leftfound || rightfound)) fullinsert(pair, root); } /* end module cluster.clumppairs */ /* begin module cluster.removebag */ Static Void removebag(prior, defendent) bag **prior, **defendent; { /* Checks to see if the defendent bag is continuous with the surrounding sequence. If so, it will be removed. */ /* check for a match between the two bags */ if ((*defendent)->leftcoord == (*prior)->leftcoord + 1 && (*defendent)->rightcoord == (*prior)->rightcoord + 1) { (*prior)->rightmatch = true; (*defendent)->leftmatch = true; } /* if a bag is matched on both sides, pop it off the list */ if (!((*defendent)->leftmatch && (*defendent)->rightmatch)) return; (*prior)->nextbag = (*defendent)->nextbag; (*defendent)->nextbag = NULL; putbag(*defendent); } /* end module cluster.removebag */ /* begin module cluster.baginsert */ Static Void baginsert(root, coord1, coord2) bag **root; long *coord1, *coord2; { /* inserts a set of coordinates into the baglist of a sequence */ bag *currentbag; /* the coordinates of the current variable */ boolean done = false; /* are we done with insertion?? */ bag *lastlastbag; /* variable to keep the start of a possible crunch */ bag *lastbag; /* a pointer to the previous bag on the list */ bag *newbag; /* bag being inserted in the middle of the list */ /* put the coordinates in a new bag */ getbag(&newbag); newbag->leftcoord = *coord1; newbag->rightcoord = *coord2; /* go through the list to find the position of the new bag and insert it */ currentbag = *root; lastbag = *root; lastlastbag = *root; while (!done) { if (currentbag == NULL) { if (*root == NULL) { *root = newbag; lastbag = *root; } else lastbag->nextbag = newbag; continue; } /* p2c: cluster.p: Note: Eliminated unused assignment statement [338] */ if (currentbag->leftcoord < *coord1 && currentbag->rightcoord < *coord2) { lastlastbag = lastbag; lastbag = currentbag; currentbag = currentbag->nextbag; continue; } newbag->nextbag = currentbag; if (currentbag == *root) { *root = newbag; lastbag = *root; } else lastbag->nextbag = newbag; done = true; } /* check the newbag and adjacent bags to see if they can be crunched */ if (currentbag != NULL) removebag(&newbag, ¤tbag); if (newbag != *root) removebag(&lastbag, &newbag); if (lastbag != *root) removebag(&lastlastbag, &lastbag); } /* end module cluster.baginsert */ /* begin module cluster.insertinst */ Static Void insertinst(apair, firstinst, secondinst) numberpair **apair; long firstinst, secondinst; { /* insert two instruction numbers on the lists of apair */ boolean insert; /* do we insert an instruction pair yet or not */ boolean done = false; /* are we finished trying to place the insts yet? */ numberlist *lastleft; /* the previous spot on the left list */ numberlist *lastright; /* the previous spot on the right list */ numberlist *left; /* the current position on the left inst list */ numberlist *newlist1; /* a new numberlist to stick new values in */ numberlist *newlist2; /* a new numberlist to stick new values in */ numberlist *right; /* the current position on the right inst list */ /* initialize the search pointers */ left = (*apair)->leftseq; right = (*apair)->rightseq; lastleft = (*apair)->leftseq; lastright = (*apair)->rightseq; /* find the position of the pair of instruction numbers and insert them */ while (!done) { /* variable conditions for inserting a pair of instructions */ insert = (left == NULL); if (!insert) insert = (firstinst <= left->number); if (!insert) { /* advance the pointers */ lastleft = left; lastright = right; left = left->nextlist; right = right->nextlist; continue; } /* create the two new lists and put the numbers in them */ getnumberlist(&newlist1); newlist1->number = firstinst; getnumberlist(&newlist2); newlist2->number = secondinst; if (left == NULL) { /* if we are at the end of the list */ if ((*apair)->leftseq != NULL) { /* then stick the newlist on the end of the list */ lastleft->nextlist = newlist1; lastright->nextlist = newlist2; } else { /* else start the list if nothing is there yet */ (*apair)->leftseq = newlist1; (*apair)->rightseq = newlist2; } } else if (firstinst != left->number && secondinst != right->number) { /* if the new pair is unique */ if (left != lastleft) { /* stick a pair in the middle of the list */ newlist1->nextlist = lastleft->nextlist; lastleft->nextlist = newlist1; newlist2->nextlist = lastright->nextlist; lastright->nextlist = newlist2; } else { /* stick a pair on the top of the list */ newlist1->nextlist = (*apair)->leftseq; (*apair)->leftseq = newlist1; newlist2->nextlist = (*apair)->rightseq; (*apair)->rightseq = newlist2; } } else { /* throw out the unused numberlists */ putnumberlist(newlist1); putnumberlist(newlist2); } done = true; } } /* end module cluster.insertinst */ /* begin module cluster.pairinsert */ Static Void pairinsert(root, first, second) numberpair **root; entries **first, **second; { /* takes a pair of sequences from the subind file and inserts it on the list */ numberpair *current; /* record of a set of sequence data */ boolean foundone = false; /* found a duplicate pair */ numberpair *WITH; /* only place the pair on the list if it is two distinct pieces */ if ((*first)->piece_ == (*second)->piece_) return; /* find the position of the pair of sequences and insert it */ current = *root; while (!foundone) { if (current != NULL) { if (current->leftpiece == (*first)->piece_ && current->rightpiece == (*second)->piece_) foundone = true; else current = current->nextpair; continue; } /* stick the new pair on the end of the list */ getpair(¤t); if (*root != NULL) current->nextpair = *root; *root = current; foundone = true; } /* write sequence numbers into pairbag and insert all coordinates */ WITH = current; /* add the new piecenumbers */ WITH->leftpiece = (*first)->piece_; WITH->rightpiece = (*second)->piece_; /* stick the instructions on the instlists */ if (current->leftseq == NULL) insertinst(¤t, (*first)->inst, (*second)->inst); else if (writeclumps) insertinst(¤t, (*first)->inst, (*second)->inst); /* add the coordinates to the bag */ baginsert(&WITH->seqbag, &(*first)->coordinate, &(*second)->coordinate); } /* end module cluster.pairinsert */ /* begin module cluster.completelist */ Static Void completelist(piecenumber, pieceinst, instcoord, lastchar, entryroot, pairroot) long piecenumber, pieceinst, instcoord; Char lastchar; entries **entryroot; numberpair **pairroot; { /* checks entries to make sure all possible pairs are made */ entries *current; /* pointer to the current entry in the list */ entries *lastentry; /* pointer to the previous entry on the list */ entries *newentry; /* pointer to the values being inserted */ /* make a new entry and fill it with the new numbers */ getentry(&newentry); newentry->piece_ = piecenumber; newentry->inst = pieceinst; newentry->coordinate = instcoord; newentry->nextentry = NULL; /* empty the entrylist if we are no longer in a subindex match */ if (lastchar != '*') { while (*entryroot != NULL) { lastentry = *entryroot; *entryroot = (*entryroot)->nextentry; putentry(lastentry); } } /* make all possible entry pairs */ current = *entryroot; while (current != NULL) { lastentry = current; /* send the two entries through pairinsert in a least/greatest lineup */ if (lastentry->piece_ < newentry->piece_) pairinsert(pairroot, &lastentry, &newentry); else pairinsert(pairroot, &newentry, &lastentry); current = current->nextentry; } if (*entryroot != NULL) lastentry->nextentry = newentry; else *entryroot = newentry; } /* end module cluster.completelist */ /* begin module cluster.pairclumps */ Static Void pairclumps(pairroot, clumproot) numberpair **pairroot; clump **clumproot; { /* take individual pairs off of the pairlist and clump them */ numberpair *pair; /* an individual pair in the pairlist */ /* move down through the pairlist */ while (*pairroot != NULL) { /* pop the top pair off of the list and make sure it's isolated */ pair = *pairroot; *pairroot = pair->nextpair; pair->nextpair = NULL; /* send the pair to the clumping routine to stick it on the clumplist */ clumppairs(&pair, clumproot); } } /* end module cluster.pairclumps */ /* begin module cluster.themain */ Static Void themain(clusterp, subind, inst, book, pairs, clumps) _TEXT *clusterp, *subind, *inst, *book, *pairs, *clumps; { /* the main procedure of the program */ piece *apiece; /* a piece pointer used throughout the program */ clump *clumproot = NULL; /* linked list of clumps */ long coord = 0; /* coordinate of the duplicate in sequence 1 */ entries *entryroot = NULL; /* linked list of sequence entries */ long fromparam = 0; /* the farthest aligned position back from 0 */ long instnumber = 0; /* instruction number in the index */ Char lastchar = ' '; /* last character in a line of the subind file */ rangerecord *numberlist_ = NULL; /* the root of the piece range list */ numberpair *pairroot = NULL; /* linked list of sequence repeats */ long toparam = 0; /* the farthest aligned postion in front of 0 */ long totallength = 0; /* the total range of bases */ long windowsize = 0; /* the size of the window used by indana */ _TEXT TEMP; /* announce the program */ printf("cluster %4.2f\n", version); /* reset the input files and pointers for printing the sequences */ brinit(book); if (*inst->name != '\0') { if (inst->f != NULL) inst->f = freopen(inst->name, "r", inst->f); else inst->f = fopen(inst->name, "r"); } else rewind(inst->f); if (inst->f == NULL) _EscIO2(FileNotFound, inst->name); RESETBUF(inst->f, Char); apiece = (piece *)Malloc(sizeof(piece)); /* set the initial values for the sequence and pointer variables to zero */ freebag = NULL; freeclump = NULL; freeentry = NULL; freepair = NULL; freesequence = NULL; matchparameter = 0; /* read the windowsize from the first line of the subindex */ if (*subind->name != '\0') { if (subind->f != NULL) subind->f = freopen(subind->name, "r", subind->f); else subind->f = fopen(subind->name, "r"); } else rewind(subind->f); if (subind->f == NULL) _EscIO2(FileNotFound, subind->name); RESETBUF(subind->f, Char); getwindowsize(subind, &windowsize); /* read the book and generate the compacted list of piece ranges */ createrangelist(book, &numberlist_, apiece); /* read the parameters from clusterp */ /* PARAMETERS ARE READ ONCE AND ARE GLOBAL IN THE PROGRAM. THE NAMES ARE UNIQUE AND CAREFUL ATTENTION SHOULD BE PAID TO MAKE SURE THEY REMAIN SO. AFTER THE NAMES ARE READ, THEY ARE NOT MODIFIED IN ANY WAY. THIS SHOULD BE CAREFULLY WATCHED TO AVOID INADVERTENT CHANGES IN THE PARAMETER CONDITIONS */ readparameters(clusterp); /* place the heading lines on the output files */ if (writepairs) filehead(subind, pairs); if (writeclumps) filehead(subind, clumps); TEMP.f = stdout; *TEMP.name = '\0'; /* write the parameters to the output files */ writeparameters(&TEMP); if (writepairs) writeparameters(pairs); if (writeclumps) writeparameters(clumps); /* look at the book to find the maximum sequence length from aligned base */ maxminalignment(inst, book, &fromparam, &toparam); totallength = toparam - fromparam + 1; /* begin reading data from subind and pulling out numbers */ while (!BUFEOF(subind->f)) { /*writeln(output,'about to readdata');*/ /* read the data from the line of fout */ readdata(subind, &instnumber, &coord, &lastchar); /* go ahead and put the entry on the list and include all pairings */ completelist(findranges(instnumber, numberlist_), instnumber, coord, lastchar, &entryroot, &pairroot); } /* print out the list of sequence names */ if (writepairs) { printf("* about to print output file pairs\n"); showlist(pairs, windowsize, totallength, &pairroot, apiece); } /* pluck pairs off the top of pairroot and send them through clumppairs */ if (writeclumps) pairclumps(&pairroot, &clumproot); /* print the clumped list out */ if (writeclumps) { printf("* about to print output file clumps\n"); showclumpedlist(clumps, &clumproot, windowsize, totallength, fromparam, toparam, apiece); } } /* themain */ /* end module cluster.themain */ main(argc, argv) int argc; Char *argv[]; { PASCAL_MAIN(argc, argv); if (setjmp(_JL1)) goto _L1; inst.f = NULL; strcpy(inst.name, "inst"); subind.f = NULL; strcpy(subind.name, "subind"); pairs.f = NULL; strcpy(pairs.name, "pairs"); clusterp.f = NULL; strcpy(clusterp.name, "clusterp"); clumps.f = NULL; strcpy(clumps.name, "clumps"); book.f = NULL; strcpy(book.name, "book"); themain(&clusterp, &subind, &inst, &book, &pairs, &clumps); _L1: if (book.f != NULL) fclose(book.f); if (clumps.f != NULL) fclose(clumps.f); if (clusterp.f != NULL) fclose(clusterp.f); if (pairs.f != NULL) fclose(pairs.f); if (subind.f != NULL) fclose(subind.f); if (inst.f != NULL) fclose(inst.f); exit(EXIT_SUCCESS); } /* End. */