/* Output from p2c 1.21alpha-07.Dec.93, the Pascal-to-C translator */ /* From input file "dbmutate.p" */ #include /* dbmutate: mutate genbank database Tom Schneider NCI/FCRDC Bldg 469. Room 144 P.O. Box B Frederick, MD 21702-1201 (301) 846-5581 (-5532 for messages) permanent email: toms@alum.mit.edu toms@ncifcrf.gov http://www.lecb.ncifcrf.gov/~toms/ National Cancer Institute Laboratory of Experimental and Computational Biology */ /* end of program */ /* begin module version */ #define version 1.90 /* of dbmutate.p 2000 January 3 1.90: 2000 January 3: Program deprecated. 1.85: 1999 April 2: markspots now works with VERY complex cases! 1.68: 1999 March 8: create minst instructions 1.56: 1999 March 6: prepare for absorbing changes into Delila: semicolon comment at end of line 1.55: 1999 March 4: markspots fully functional 1.43: 1999 Feb 12: invented markspots 1.42: 1999 Feb 11: Bug produced null character at end of sequence in procedure writesequence. 1.41: 1999 Feb 11: Retain the secondary ACCESSIONs in the dbout 1998 oct 19: remove bug that left garbage just after the ORIGIN line origin 1996 October 5 */ #define updateversion 1.00 /* defines lowest acceptable current parameter file */ /* end module version */ /* begin module describe.dbmutate */ /* name dbmutate: mutate genbank database synopsis dbmutate(dbin: in, dbout: out, dbmutatep: in, markspots: out, minst: out, output: out) files dbin: GenBank flat file format sequences. dbout: GenBank flat file format sequences, with mutations as specified by the parameters. dbmutatep: parameters to control the program. The first line must be the version number of the this program. This allows the program to recognize when the parameter file is old. A series of lines like this: K02402 g20633c This means that entry K02402 is to be grabbed, and the g at 20633 is to be changed to c. The output entry name will be "K02402.g20633c". Multiple changes are allowed, separated by spaces: K02402 t1c g20633c Entries with no changes are allowed, they are just copied to dbout: K02402 To make a deletion, give the endpoints of the deletion range: M55114 d449,450 Both of the end points will be deleted. The numbers can be the same, to delete one base. To make an insertion or change, give the endpoints between which to REPLACE with a new string: M55114 i449,450tt The numbers cannot be the same. Use zero (0) to insert before the start of the sequence and a value larger than the sequence length to insert after the end of the sequence. Any number of spaces may be between the parts of each instruction, but the instructions must be one per line. Blank lines are skipped. Lines that begin with '*' are comments and are skipped. The parts of an instruction can be separated by spaces or by periods. The use of periods makes the notation consistent with the name given to the pieces generated. A semicolon indicates that the rest of the line is a comment. Program parameters can be adjusted by lines that begin with '@'. The form is '@ commandname value'. The adjustable parameters are: fromrange: the distance before the first base mutated to get in the delila instructions (see minst). torange: the distance after the first base mutated to get in the delila instructions (see minst). markspots: The locations to put marks for use by the lister program in the file marks. They are of the form: U 1055 0.0 1055 -20.0 0 (g->a) change where 5391 is the first coordinate given for a mutation These can be concatinated with a file like marks.arrow to define the locations of mutations: cat marks.arrow markspots > marks The markspots are generated on the assumption that the user will want to display alternating pairs consisting of wild type sequence followed by mutant. The 'p' marks command, as defined in the lister program, is used to jump to the next piece. To use this mechanism start the dbmutatep with the GenBank entry for wild type sequences. Follow this by the mutations of that entry. (The program cannot handle more than one entry properly at this time.) For your delila instructions, write pairs of wild type sequence followed by mutant sequence. minst: delila instructions (inst) for grabbing the regions around the mutations. The from-to range will default to preset values (see technical notes) or can be adjusted with an "@" command in dbmutatep. output: messages to the user description Make mutation of GenBank sequences easy. Note that the copy function makes this program supercede dbpull (although this program is probably going to be much slower). Note that the insert function is fully capable of not only doing insertions but also changes and deletions. Beware that the numbering will be messed up with deletions; multiple deletions could be conflicting. THIS PROGRAM IS NOW DEPRECATED because Delila itself can make mutations. This program is still useful, however, for people not using the Delila system (shame on you) who wish to modify a GenBank entry. examples 1.70 version of dbmutate that this parameter file is designed for. * Lines that begin with '*' are a comment. * Substitute the second 10 bases of an entry K02402 i10,21ggggcccccc * Inserts before base 0 are considered to be at base 0, and ones after the * end of the sequence are at the end of the sequence. Here is an insert * from -20 to 1 of 10 bases followed by a deletion later: K02402 i-20,1ggggcccccc d61,70 * This kind of double insert and deletion of the same length is useful for * checking inserts and deletions by using the Unix diff program, because * only one line changes. * This one deletes the first 10 bases and makes a compensating insert: K02402 d-5,10.i20,21ggggcccccc * note that the instruction parts are separated by a period above. * Replace exactly 10 bases: K02402 i10,21cccgggcccc * Delete exactly 10 bases: K02402 i10,21 * set the from-to range: @ fromrange -25 @ torange +5 documentation see also {Parameter file:} dbmutatep {Related Programs:} delila.p, dbbk.p, dbclean.p, dbpull.p, marks.arrow author Thomas Dana Schneider bugs * changesetmax should not be needed; replace by linked list technical notes Constant changesetmax is the largest number of changes allowed per entry. Constant sequencemax is the largest length sequence that can be handled. Because the program creates a new accesssion name, it will strip away any secondary accession names. default values for the from-to range are in constants deffromrange and deftorange. */ /* end module describe.dbmutate */ /* begin module const.dbmutate */ #define sequencemax 500000L /* maximum sequence that can be handled (bp) */ #define deffromrange (-50) /* default from range */ #define deftorange 50 /* default to range */ /* end module const.dbmutate */ /* begin module book.const ***************************************************/ /* constants needed for book manipulations */ #define dnamax 10000000L /* length of dna arrays */ #define namelength 100 /* maximum key name length */ #define linelength 80 /* maximum line readable in book */ /* end module book.const version = 1.01; (@ of testtime.p 1997 Jan 11 */ /* begin module changeset.const */ #define changesetmax 20 /* maximum number of changes allowed */ #define insertmax 100 /* maximum insertion length allowed (bp) */ /* end module changeset.const */ /* begin module marspot.const */ #define insertupperbits (-0.1) /* upperbits for insertion symbol */ #define insertlowerbits (-1.3) /* upperbits for insertion symbol */ #define deleteupperbits (-0.1) /* upperbits for deletion symbol */ #define deletelowerbits (-1.3) /* upperbits for deletion symbol */ #define changeupperbits (-1.3) /* upperbits for change symbol */ #define changelowerbits (-11.3) /* upperbits for change symbol */ #define displacement 0 /* amount to displace the mark backwards */ /* end module marspot.const */ /* begin module interact.const */ #define maxstring 150 /* the maximum string */ /* end module interact.const version = 4.21; (@ of prgmod.p 1997 October 22 */ /* begin module filler.const */ #define fillermax 50 /* the size of the filler array for a string */ /* end module filler.const version = 4.21; (@ of prgmod.p 1997 October 22 */ /* begin module interact.type */ typedef struct string { /* a string of characters */ Char letters[maxstring]; /* the letters in the string */ long length; /* the number of characters in the string */ long current; /* the letter we are working on */ } string; /* end module interact.type version = 4.21; (@ of prgmod.p 1997 October 22 */ /* begin module filler.type */ /* the following is an array used to fill a string. it is convenient to have it much shorter than the maxstring, so that it is easy to fill the string using procedure fillstring. the user must declare the value of constant fillermax. */ typedef Char filler[fillermax]; /* end module filler.type version = 4.21; (@ of prgmod.p 1997 October 22 */ /* begin module trigger.type */ typedef struct trigger { /* an object to be searched for */ string seek; /* the characters looked for */ long state; /* how close to triggering we are */ boolean skip; /* trigger not found- skip the line */ /* the trigger was found */ boolean found; } trigger; /* end module trigger.type version = 4.21; (@ of prgmod.p 1997 October 22 */ /* begin module changeset.type */ typedef struct changedata { Char changetype; /* the type of change: c(hange), i(nsertion), d(eletion) */ Char baseold; /* the old base given in a change instruction */ Char basenew; /* the new base given in a change instruction */ double basecoo1; /* the first coordinate */ double basecoo2; /* the second coordinate */ long inserts; /* number of bases to insert */ Char insert[insertmax]; /* bases to insert */ } changedata; typedef struct changeset { /* the complete set of changes for an entry */ changedata data[changesetmax]; long number; /* number of changes */ } changeset; /* end module changeset.type */ /* begin module cystem.type */ /*zzzccc*/ /* define a coordinate system "cystem" */ typedef struct cystem { /* a coordinate system segment */ double lower; /* lower coordinate bound */ double upper; /* upper coordinate bound */ struct cystem *next; /* the next segment */ } cystem; /* end module cystem.type */ /* pieceptr = integer; (* a dummy type to keep pietoint happy *) */ /* begin module book.type ****************/ /* types needed for book manipulations */ typedef long chset[5]; /* types defined in book definition */ typedef Char alpha[namelength]; /* this is not alfa */ /* name is a left justified string with blanks following the characters */ typedef struct name { alpha letters; /* zero means an unspecified structure */ char length; } name; typedef struct line { /* a line of characters */ Char letters[linelength]; char length; struct line *next; } line; typedef enum { plus, minus, dircomplement, dirhomologous } direction; typedef enum { linear, circular } configuration; typedef enum { on, off } state; typedef struct header { /* header of key */ name keynam; /* key name of structure */ line *fulnam; /* full name of structure */ /* note key */ line *note; } header; /* base types */ typedef enum { a, c, g, t } base; typedef long dnarange; /* p2c: dbmutate.p, line 355: * Note: Field width for seq assumes enum base has 4 elements [105] */ typedef uchar seq[(dnamax + 3) / 4]; typedef struct dnastring { seq part; dnarange length; struct dnastring *next; } dnastring; typedef struct orgkey { /* organism key */ header hea; /* genetic map units */ line *mapunit; } orgkey; typedef struct chrkey { /* chromosome key */ header hea; double mapbeg; /* number of genetic map beginning */ /* number of genetic map ending */ double mapend; } chrkey; typedef struct piekey { /* piece key */ header hea; double mapbeg; /* genetic map beginning */ configuration coocon; /* configruation (circular/linear) */ direction coodir; /* direction (+/-) relative to genetic map */ long coobeg; /* beginning nucleotide */ long cooend; /* ending nucleotide */ configuration piecon; /* configruation (circular/linear) */ direction piedir; /* direction (+/-) relative to coordinates */ long piebeg; /* beginning nucleotide */ long pieend; /* ending nucleotide */ } piekey; typedef struct piece { piekey key; dnastring *dna; } piece; typedef struct reference { name pienam; /* name of piece referred to */ double mapbeg; /* genetic map beginning */ direction refdir; /* direction relative to coordinates */ long refbeg; /* beginning nucleotide */ long refend; /* ending nucleotide */ } reference; typedef struct genkey { /* gene key */ header hea; reference ref; } genkey; typedef struct trakey { /* transcript key */ header hea; reference ref; } trakey; typedef struct markey { /* marker key */ header hea; reference ref; state sta; line *phenotype; struct marker *next; } markey; typedef struct marker { markey key; dnastring *dna; } marker; /* end module book.type version = 7.05; {of delmod.p 1999Mar17 tds/gds} */ typedef struct asequence { long length; /* length of the sequence */ Char sequence[sequencemax]; /* store the sequence */ } asequence; Static _TEXT dbin; /* file used by this program */ Static _TEXT dbout; /* file used by this program */ Static _TEXT dbmutatep; /* file used by this program */ Static _TEXT markspots; /* file used by this program */ Static _TEXT minst; /* file used by this program */ Static Char cha; /* see ichread */ Static jmp_buf _JL1; /* begin module halt */ Static Void halt() { /* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. */ printf(" program halt.\n"); longjmp(_JL1, 1); } /* end module halt version = 4.21; (@ of prgmod.p 1997 October 22 */ /* begin module package.trigger */ /* ************************************************************************ */ /* begin module interact.clearstring */ Static Void clearstring(ribbon) string *ribbon; { /* empty the string */ long index; /* to the ribbon */ for (index = 0; index < maxstring; index++) ribbon->letters[index] = ' '; ribbon->length = 0; ribbon->current = 0; } /* clearstring */ /* end module interact.clearstring version = 4.21; (@ of prgmod.p 1997 October 22 */ /* begin module interact.writestring */ Static Void writestring(tofile, s) _TEXT *tofile; string *s; { /* write the string s to file tofile, no writeln */ long i; /* index to s */ long FORLIM; FORLIM = s->length; for (i = 0; i < FORLIM; i++) putc(s->letters[i], tofile->f); } /* writestring */ /* end module interact.writestring version = 4.21; (@ of prgmod.p 1997 October 22 */ /* begin module filler.fillstring */ Static Void fillstring(s, a_) string *s; Char *a_; { /* this procedure makes it reasonably easy to fill the string s with characters. one calls the procedure as: */ /* 1 2 3 4 5 */ /* 12345678901234567890123456789012345678901234567890 */ /* fillstring(s, 'this-is-the-string '); the two comments make it easy to line the characters up. also, for this example, it was assumed that the length of filler as defined by the constant fillermax was 50. */ long length = fillermax; /* of the string without trailing blanks */ long index; /* of s */ clearstring(s); while (length > 1 && a_[length-1] == ' ') length--; if (length == 1 && a_[length-1] == ' ') { printf("fillstring: the string is empty\n"); halt(); } for (index = 0; index < length; index++) s->letters[index] = a_[index]; s->length = length; s->current = 1; } /* fillstring */ /* end module filler.fillstring version = 4.21; (@ of prgmod.p 1997 October 22 */ /* begin module filler.filltrigger */ Static Void filltrigger(t_, a_) trigger *t_; Char *a_; { /* fill the trigger t */ fillstring(&t_->seek, a_); } /* fillstring */ /* end module filler.filltrigger version = 4.21; (@ of prgmod.p 1997 October 22 */ /* begin module trigger.proc */ /* this module allows one to scan a series of characters, as from an array or a file, and to "trigger" or detect a simple string in the series. the advantage of the trigger is that several triggers can "observe" a stream of characters at once, each looking for a different thing. some other modules required: interact.const, interact.type */ Static Void resettrigger(t_) trigger *t_; { /* reset the trigger to ground state */ t_->state = 0; t_->skip = false; t_->found = false; } /* resettrigger */ Static Void testfortrigger(ch, t_) Char ch; trigger *t_; { /* look at the character ch. if it is part of the trigger (at the current trigger state), then the trigger state goes higher. if it is not part of the trigger then the trigger state is reset, skip is true and one should skip onward to find the trigger. if the trigger is found, found is true. 1996 Sep 12: Bug found! In the case of a trigger "ab", the program used to miss it for situations like "aab". This was because at the first a it would step up. Then it would see the second a and recognize that was not part of ab. It would fail to realize that it could be the start of a new one. The code now accounts for that possibility. */ t_->state++; /* writestring(list,seek); writeln(list,'testfortrigger seek.letters[',state:1,']:', seek.letters[state],' ch:',ch); */ if (t_->seek.letters[t_->state - 1] == ch) { t_->skip = false; if (t_->state == t_->seek.length) t_->found = true; else t_->found = false; return; } /* it failed. But wait! It could be the beginning of a NEW trigger string! */ if (t_->seek.letters[0] == ch) { t_->state = 1; t_->skip = false; t_->found = false; return; } t_->state = 0; t_->skip = true; t_->found = false; /* reset trigger */ } /* testfortrigger */ /* end module trigger.proc version = 4.21; (@ of prgmod.p 1997 October 22 */ /* begin module skipblanks */ Static Void skipblanks(thefile) _TEXT *thefile; { /* skip over blanks until a non-blank, or end of line, is found */ while ((P_peek(thefile->f) == ' ') & (!P_eoln(thefile->f))) getc(thefile->f); } Static Void skipnonblanks(thefile) _TEXT *thefile; { /* skip over nonblanks until a blank, or end of line, is found */ while ((P_peek(thefile->f) != ' ') & (!P_eoln(thefile->f))) getc(thefile->f); } Static Void skipcolumn(thefile) _TEXT *thefile; { /* skip over a data column */ skipblanks(thefile); skipnonblanks(thefile); } /* end module skipblanks version = 4.21; (@ of prgmod.p 1997 October 22 */ /* ************************************************************************ */ /* end module package.trigger version = 4.21; (@ of prgmod.p 1997 October 22 */ /* begin module equalstring */ Static boolean equalstring(a_, b) string a_, b; { /* test for equality between two strings at current positions */ long index; /* index to both strings */ boolean equal; /* are letters in a and b the same? */ if (a_.length == b.length) { index = 1; do { equal = (a_.letters[index-1] == b.letters[index-1]); index++; } while (equal && index <= a_.length); return equal; } else return false; } /* equalstring */ /* end module equalstring version = 4.21; (@ of prgmod.p 1997 October 22 */ /* begin module onetoken */ Static Void onetoken(afile, buffer, gotten) _TEXT *afile; string *buffer; boolean *gotten; { /* getstring */ /* get a string from a file not using string calls. this lets one obtain lines from a file without interactive prompts */ long index = 0; /* of buffer */ boolean done = false; skipblanks(afile); clearstring(buffer); if (BUFEOF(afile->f)) { *gotten = false; return; } while (!P_eoln(afile->f) && index < maxstring && !done) { index++; buffer->letters[index-1] = getc(afile->f); if (buffer->letters[index-1] == '\n') buffer->letters[index-1] = ' '; if (buffer->letters[index-1] == ' ') { done = true; index--; } } buffer->length = index; buffer->current = 1; *gotten = true; } /* end module onetoken version = 4.21; (@ of prgmod.p 1997 October 22 */ /* begin module copyaline */ Static Void copyaline(fin, fout) _TEXT *fin, *fout; { /* copy a line from file fin to file fout */ while (!P_eoln(fin->f)) { putc(P_peek(fin->f), fout->f); getc(fin->f); } fscanf(fin->f, "%*[^\n]"); getc(fin->f); putc('\n', fout->f); } /* copyaline */ /* end module copyaline version = 4.21; (@ of prgmod.p 1997 October 22 */ /* begin module copyline */ Static Void copyline(fin, fout) _TEXT *fin, *fout; { /* copy a line from file fin to file fout but DO NOT CARRIAGE RETURN on the fout. Carriage return on the fin. */ while (!P_eoln(fin->f)) { putc(P_peek(fin->f), fout->f); getc(fin->f); } fscanf(fin->f, "%*[^\n]"); getc(fin->f); } /* copyline */ /* end module copyline version = 4.21; (@ of prgmod.p 1997 October 22 */ /* ************************************************************************** */ /* ************************************************************************** */ /* ************************************************************************** */ /*zzzccc*/ /* begin module cystem.information */ /* Cystem stands for "coordinate system". It is a coordinate system for DNA, RNA or protein sequences that have undergone deletions or insertions. The original numbering is preserved as much as possible. For example, a continuous sequence from 1 to 100 would be noted as: (1 100) If bases 51 to 59 are deleted, the sequence has two parts: (1 50)(60 100) */ /* end module cystem.information */ /* begin module cystem.functions */ Static Void clearcystem(l, freecystem) cystem **l, *freecystem; { /* clear the cystem onto the free list */ cystem *lptr; if (*l == NULL) return; lptr = *l; (*l)->lower = 0.0; (*l)->upper = 0.0; *l = (*l)->next; lptr->next = freecystem; freecystem = lptr; } Static Void disposecystem(l) cystem **l; { /* dispose of the memory used by l */ cystem *hold; /* previous pointer */ while (*l != NULL) { hold = (*l)->next; Free(*l); *l = hold; } } Static Void getcystem(l, freecystem) cystem **l, *freecystem; { /* get a cystem from a free list or create one new */ if (freecystem != NULL) { *l = freecystem; freecystem = freecystem->next; } else *l = (cystem *)Malloc(sizeof(cystem)); (*l)->lower = 0.0; (*l)->upper = 0.0; (*l)->next = NULL; } Static Void startcystem(c_, l, u, freecystem) cystem **c_; long l, u; cystem **freecystem; { /* start the cystem c with l and u bounds */ cystem *WITH; getcystem(c_, *freecystem); WITH = *c_; WITH->lower = l; WITH->upper = u; } Static Void showcystem(f, r) _TEXT *f; double r; { /* show real number r to file f with decimals if they are not zero */ if (r - (long)r > 0) fprintf(f->f, "%1.1f", r); else fprintf(f->f, "%ld", (long)floor(r + 0.5)); } Static Void writecystem(f, c_) _TEXT *f; cystem *c_; { /* write the entire cystem c to file f */ cystem *p = c_; /* pointer to c */ while (p != NULL) { putc('(', f->f); if (p->lower == p->upper) showcystem(f, p->lower); else { showcystem(f, p->lower); putc(' ', f->f); showcystem(f, p->upper); } putc(')', f->f); /* write(f,'(', p^.lower:1:1, ' ', p^.upper:1:1, ')'); */ p = p->next; if (p == c_) { printf("circular\n"); halt(); } } } Static boolean insidecystem(l, c_) double l; cystem *c_; { /* is l inside the cystem element c? */ return (c_->lower <= l && l <= c_->upper); } Static cystem *locatecystem(c_, l) cystem **c_; long l; { /* locate position l in coordinate system c */ boolean done = false; /* done with a search */ cystem *p; /* pointer to c */ _TEXT TEMP; /* writeln(output,'locating ',l:1:1); */ p = *c_; while (!done) { if (insidecystem((double)l, p)) { done = true; break; } p = p->next; if (p->next == NULL) done = true; } if (p != NULL) return p; printf("cannot locate coordinate %ld in ", l); TEMP.f = stdout; *TEMP.name = '\0'; writecystem(&TEMP, *c_); putchar('\n'); halt(); return p; } Static Void findcystem(c_, l, k, location) cystem **c_; long l; Char *k; cystem **location; { /* see if c is in l, tell what happend in k: k = 'f': found, location pointer is valid k = 'b': l is before the cystem, location pointer is nil k = 'a': l is after the cystem, location pointer is at end of c k = 'm': l is in the middle but missing from the coordinate system. the cystem, location pointer nil */ boolean done = false; /* done with a search */ cystem *p; /* pointer to c */ *k = '?'; if (l < (*c_)->lower) { *k = 'b'; *location = NULL; return; } /*zzzccc*/ *k = 'f'; /* be optimistic */ *location = *c_; while (!done) { if (insidecystem((double)l, *location)) break; if ((*location)->next != NULL) { *location = (*location)->next; /* p2c: dbmutate.p: Note: Eliminated unused assignment statement [338] */ continue; } if (l > (*location)->upper) *k = 'a'; /* oh well!! */ else { *k = 'm'; *location = NULL; } done = true; } } Static Void deletecystem(c_, l, u, freecystem) cystem **c_; long l, u; cystem **freecystem; { /* delete in the cystem c from l to u inclusive */ /* zzzccc this is not completely written. "!" means incomplete code */ boolean done; /* done with a search */ cystem *p; /* pointer to c */ cystem *plower; /* pointer to c */ cystem *pupper; /* pointer to c */ cystem *pnewer; /* new segment */ cystem *holder; /* segment to remove */ _TEXT TEMP; printf("deleting from "); TEMP.f = stdout; *TEMP.name = '\0'; showcystem(&TEMP, (double)l); printf(" to "); TEMP.f = stdout; *TEMP.name = '\0'; showcystem(&TEMP, (double)u); printf(": "); /* find the location of the lower bound of the deletion */ plower = locatecystem(c_, l); pupper = locatecystem(c_, u); if (plower == pupper) { if (plower->lower == l && pupper->upper == u) { printf(" = =!"); /* find who points to this segment and then remove the segment */ if (plower == *c_) { /* it is the first segment */ holder = *c_; *c_ = (*c_)->next; clearcystem(&holder, *freecystem); return; } p = *c_; while (p->next->lower != l) p = p->next; holder = p->next; p->next = p->next->next; clearcystem(&holder, *freecystem); return; } /* search */ if (plower->lower != l && pupper->upper == u) { printf("<> = "); pupper->upper = l - 1.0; return; } if (plower->lower == l && pupper->upper != u) { printf(" =<>!"); return; } if (plower->lower == l || pupper->upper == u) return; /* if plower^.lower = l then begin plower^.lower := u end else if pupper^.upper = u then begin pupper^.upper := l end else if plower^.lower = pupper^.upper then begin halt; end else begin end */ printf("<><> "); /* split one segment in two */ getcystem(&pnewer, *freecystem); pnewer->next = pupper->next; plower->next = pnewer; pnewer->lower = u + 1.0; pnewer->upper = pupper->upper; plower->upper = l - 1.0; return; } printf(" "); /* remove cystems in between */ p = plower->next; while (p != pupper) { holder = p; p = p->next; clearcystem(&holder, *freecystem); } plower->upper = l - 1.0; pupper->lower = u + 1.0; } Static Void checkcystem(c_, spot) cystem *c_; long spot; { /* check the coordinate given */ Char k; /* location information according to findcystem */ cystem *location; /* location information according to findcystem */ _TEXT TEMP; findcystem(&c_, spot, &k, &location); printf("@ %ld k=%c ", spot, k); TEMP.f = stdout; *TEMP.name = '\0'; /*zzzccc*/ /* procedure findcystem(var c: cystemptr; l: integer; k: char; var location: cystemptr); */ writecystem(&TEMP, location); putchar('\n'); } /* end module cystem.functions */ /* begin module cystem.test */ Static Void testcystem(f) _TEXT *f; { /* test the cystem functions. Note that it is essential to dispose of the cystems afterward to avoid a memory failure. That is, pascal on Sun Sparc workstations does not seem to clear it when this procedure is completed and this can cause trouble for later procedures. */ cystem *freecystem = NULL; /* the free list of cystems */ cystem *c_ = NULL; /* a cystem */ fprintf(f->f, "test of cystem -------------------------------------\n"); startcystem(&c_, 10L, 99L, &freecystem); writecystem(f, c_); putc('\n', f->f); deletecystem(&c_, 11L, 29L, &freecystem); writecystem(f, c_); putc('\n', f->f); deletecystem(&c_, 51L, 59L, &freecystem); writecystem(f, c_); putc('\n', f->f); deletecystem(&c_, 71L, 79L, &freecystem); writecystem(f, c_); putc('\n', f->f); deletecystem(&c_, 66L, 89L, &freecystem); writecystem(f, c_); putc('\n', f->f); deletecystem(&c_, 41L, 50L, &freecystem); writecystem(f, c_); putc('\n', f->f); deletecystem(&c_, 30L, 40L, &freecystem); writecystem(f, c_); putc('\n', f->f); checkcystem(c_, 9L); checkcystem(c_, 10L); checkcystem(c_, 11L); checkcystem(c_, 100L); checkcystem(c_, 65L); checkcystem(c_, 66L); disposecystem(&c_); disposecystem(&freecystem); /*zzzccc*/ /* deletecystem(c, 1, 10, freecystem); writecystem(f,c); writeln(f); deletecystem(c, 30, 34, freecystem); writecystem(f,c); writeln(f); deletecystem(c, 34, 41, freecystem); writecystem(f,c); writeln(f); deletecystem(c, 41, 41, freecystem); writecystem(f,c); writeln(f); */ fprintf(f->f, "test of cystem DONE --------------------------------\n"); /*zzzccc*/ /* halt; */ } /* end module cystem.test */ /*zzzccc*/ /* ************************************************************************** */ /* ************************************************************************** */ /* ************************************************************************** */ /* Dummy routines. These simulate the delila environment in which one must convert to internal coordinates. */ Static long pietoint(p, pie) long p; piece *pie; { /* piece to internal coordinates */ piece *dummy = pie; /* dummy to satisfy useage of pie */ return p; } Static long inttopie(i, pie) long i; piece *pie; { /* internal coordinates to piece */ piece *dummy = pie; /* dummy to satisfy useage of pie */ return i; } Static long piecelength(pie) piece *pie; { /* piece length */ return 0; } /* begin module dbmutate.readchangeset */ Static Void nextnonblank(f) _TEXT *f; { /* locate the next non blank in the file or die gracefully in the attempt */ boolean done = false; /* found the non-blank */ while (!done) { skipblanks(f); /* now we are either end of line or at a character */ if (P_eoln(f->f)) { /* oh well, continue */ fscanf(f->f, "%*[^\n]"); getc(f->f); } else done = true; /* should be a character there */ if (BUFEOF(f->f)) { printf("unexpected end of file found in dbmutatep\n"); printf("you are missing part of an instruction\n"); halt(); } } } Static Void readchanges(f, c_) _TEXT *f; changeset *c_; { /* Read the base changes from f in the form 'g2343c'. */ changedata *WITH; WITH = &c_->data[c_->number - 1]; WITH->changetype = 'c'; nextnonblank(f); WITH->baseold = getc(f->f); if (WITH->baseold == '\n') WITH->baseold = ' '; nextnonblank(f); fscanf(f->f, "%lg", &WITH->basecoo1); WITH->basecoo2 = WITH->basecoo1; if (P_eoln(f->f)) { printf(": to make a change, a new base is required\n"); halt(); } nextnonblank(f); WITH->basenew = getc(f->f); if (WITH->basenew == '\n') WITH->basenew = ' '; if (WITH->baseold != 't' && WITH->baseold != 'g' && WITH->baseold != 'c' && WITH->baseold != 'a') printf("\nWARNING: old base usually should be a, c, g, t\n"); if (WITH->basenew != 't' && WITH->basenew != 'g' && WITH->basenew != 'c' && WITH->basenew != 'a') printf("\n WARNING: new base usually should be a, c, g, t\n"); WITH->inserts = 0; } Static Void readinsertion(f, c_) _TEXT *f; changeset *c_; { /* Read the insertion from f in the form 'i449,450tt'. */ Char comma; /* the comma character */ changedata *WITH; WITH = &c_->data[c_->number - 1]; WITH->changetype = getc(f->f); if (WITH->changetype == '\n') WITH->changetype = ' '; fscanf(f->f, "%lg", &WITH->basecoo1); skipblanks(f); comma = getc(f->f); if (comma == '\n') comma = ' '; if (comma != ',') { printf(" comma expected between coordinates for insertion\n"); halt(); } fscanf(f->f, "%lg", &WITH->basecoo2); if (WITH->basecoo1 < 0) WITH->basecoo1 = 0.0; if (WITH->basecoo2 < 1) WITH->basecoo2 = 1.0; WITH->inserts = 0; while (P_peek(f->f) == 't' || P_peek(f->f) == 'g' || P_peek(f->f) == 'c' || P_peek(f->f) == 'a') { WITH->inserts++; if (WITH->inserts > insertmax) { printf( " no more than %ld insertion bases allowed, increase constant insertmax\n", (long)insertmax); halt(); } WITH->insert[WITH->inserts - 1] = getc(f->f); if (WITH->insert[WITH->inserts - 1] == '\n') WITH->insert[WITH->inserts - 1] = ' '; } /* The following check could be done in dbmutate, but it is not valid when working with a full coordinate system where it is possible for the fragment numbering to decrease. The check must be done elsewhere. if basecoo1 >= basecoo2 then begin writeln(output,' the first base, ',round(basecoo1):1, ', must be less than', ' the second base, ',round(basecoo2):1, ' for insertion'); halt end; */ } Static Void readdeletion(f, c_) _TEXT *f; changeset *c_; { /* Read the deletion from f in the form 'M55114 d449,450'. */ Char comma; /* the comma character */ changedata *WITH; WITH = &c_->data[c_->number - 1]; WITH->changetype = getc(f->f); if (WITH->changetype == '\n') WITH->changetype = ' '; fscanf(f->f, "%lg", &WITH->basecoo1); skipblanks(f); comma = getc(f->f); if (comma == '\n') comma = ' '; if (comma != ',') { printf("comma expected between coordinates for deletion\n"); halt(); } fscanf(f->f, "%lg", &WITH->basecoo2); if (WITH->basecoo1 < 1) WITH->basecoo1 = 1.0; if (WITH->basecoo2 < 1) WITH->basecoo2 = 1.0; WITH->inserts = 0; if (WITH->basecoo1 > WITH->basecoo2) { printf( " the first base, %ld, must be less thanor equal to the second base, %ld for deletion\n", (long)floor(WITH->basecoo1 + 0.5), (long)floor(WITH->basecoo2 + 0.5)); halt(); } } Static Void readchangeset(changes, c_) _TEXT *changes; changeset *c_; { /* read in and change the sequence */ boolean done = false; /* we found a semicolon so we are done */ c_->number = 0; while (!P_eoln(changes->f) && !done) { skipblanks(changes); if (P_peek(changes->f) == ';') done = true; else if (P_peek(changes->f) == '.') { /* just move past it */ /* accept a period as part of the set, treat as blank */ getc(changes->f); } else { c_->number++; if (c_->number > changesetmax) { printf("Too many changes requested, increase changesetmax."); halt(); } if (P_peek(changes->f) == 't' || P_peek(changes->f) == 'g' || P_peek(changes->f) == 'c' || P_peek(changes->f) == 'a') readchanges(changes, c_); else if (P_peek(changes->f) == 'i') readinsertion(changes, c_); else if (P_peek(changes->f) == 'd') readdeletion(changes, c_); else { printf(" change must be identified by one of: acgtdi\n"); printf(" the illegal change character \"%c\" was found\n", P_peek(changes->f)); halt(); } } skipblanks(changes); /* prepare for the next one */ } } Static Void writechangeset(f, changes) _TEXT *f; changeset changes; { /* write the changeset to file f in shorthand notation */ long i; /* index to insertion */ long n; /* index to changes */ long FORLIM; changedata *WITH; long FORLIM1; FORLIM = changes.number; for (n = 1; n <= FORLIM; n++) { if (n > 1) putc('.', f->f); WITH = &changes.data[n-1]; switch (WITH->changetype) { case 'c': fprintf(f->f, "%c%ld%c", WITH->baseold, (long)floor(WITH->basecoo1 + 0.5), WITH->basenew); break; case 'i': fprintf(f->f, "i%ld,%ld", (long)floor(WITH->basecoo1 + 0.5), (long)floor(WITH->basecoo2 + 0.5)); FORLIM1 = WITH->inserts; for (i = 0; i < FORLIM1; i++) putc(WITH->insert[i], f->f); break; case 'd': fprintf(f->f, "d%ld,%ld", (long)floor(WITH->basecoo1 + 0.5), (long)floor(WITH->basecoo2 + 0.5)); break; } } } Static Void describechangeset(f, changes) _TEXT *f; changeset changes; { /* describe in English the changeset to file f */ long i; /* index to insertion */ long n; /* index to changes */ long FORLIM; changedata *WITH; long FORLIM1; if (changes.number == 0) { fprintf(f->f, "no changes"); return; } FORLIM = changes.number; for (n = 1; n <= FORLIM; n++) { if (n > 1) fprintf(f->f, ", "); WITH = &changes.data[n-1]; switch (WITH->changetype) { case 'c': fprintf(f->f, "at %ld %c->%c", (long)floor(WITH->basecoo1 + 0.5), WITH->baseold, WITH->basenew); break; case 'i': fprintf(f->f, "insert "); FORLIM1 = WITH->inserts; for (i = 0; i < FORLIM1; i++) putc(WITH->insert[i], f->f); fprintf(f->f, " between %ld and %ld", (long)floor(WITH->basecoo1 + 0.5), (long)floor(WITH->basecoo2 + 0.5)); break; case 'd': fprintf(f->f, "delete %ld to %ld", (long)floor(WITH->basecoo1 + 0.5), (long)floor(WITH->basecoo2 + 0.5)); break; } } } /* end module dbmutate.readchangeset */ /* begin module marksautomate */ Static Void marksautomate(markspots) _TEXT *markspots; { /* define the components necessary for markspots */ fprintf(markspots->f, "* markspots: define markings for the lister program\n"); fprintf(markspots->f, "* The standard marks.arrow must be used prior to this file.\n\n"); fprintf(markspots->f, "u\n"); fprintf(markspots->f, "/setmarkspotarrow{\n"); fprintf(markspots->f, "/bodycolor {black} def\n"); fprintf(markspots->f, "/strokecolor {black} def\n"); fprintf(markspots->f, "/BodyThick 0.30 fs def\n"); fprintf(markspots->f, "/HeadWidth 0.90 fs def\n"); fprintf(markspots->f, "/HeadLength 1.50 fs def\n"); fprintf(markspots->f, "} def\n"); fprintf(markspots->f, "setmarkspotarrow\n\n"); fprintf(markspots->f, "/change {%% tailx taily headx heady shift change\n"); fprintf(markspots->f, "%% the head of an arrow\n"); fprintf(markspots->f, "pop\n"); fprintf(markspots->f, "setmarkspotarrow\n"); fprintf(markspots->f, "fixedarrow\n"); fprintf(markspots->f, "} def\n\n"); fprintf(markspots->f, "/changeworra{%% tailx taily headx heady shift changeworra\n"); fprintf(markspots->f, "%% the tail of an arrow is a 'worra' (spelling backards)\n"); fprintf(markspots->f, "pop\n"); fprintf(markspots->f, "setmarkspotarrow\n"); fprintf(markspots->f, "fixedworra\n"); fprintf(markspots->f, "} def\n\n"); fprintf(markspots->f, "/insertion{%% tailx taily headx heady shift insertion\n"); fprintf(markspots->f, "%% an insertion is a green rectangle\n"); fprintf(markspots->f, "pop\n"); fprintf(markspots->f, "/bodycolor {lightgreen} def\n"); fprintf(markspots->f, "boundrectangle\n"); fprintf(markspots->f, "} def\n\n"); fprintf(markspots->f, "/deletion {%% tailx taily headx heady shift deletion\n"); fprintf(markspots->f, "%% a deletion is a red rectangle\n"); fprintf(markspots->f, "pop\n"); fprintf(markspots->f, "/bodycolor {lightred} def\n"); fprintf(markspots->f, "boundrectangle\n"); fprintf(markspots->f, "} def\n\n"); /*zzzyyy*/ fprintf(markspots->f, "/doubleY{%% tailx taily headx heady shift doubleY\n"); fprintf(markspots->f, "%% Two Y's connected at their bases indicate insertion\n"); fprintf(markspots->f, "pop\n"); fprintf(markspots->f, "/bodycolor {lightgreen} def\n"); fprintf(markspots->f, "fixeddoubleY\n"); fprintf(markspots->f, "%% NOT IMPLEMENTED\n"); fprintf(markspots->f, "} def\n\n"); fprintf(markspots->f, "!\n\n"); } typedef long position; /* make quicksort be happy but still standard */ /* Local variables for sortchanges: */ struct LOC_sortchanges { changeset *sorted; } ; Local boolean lessthan(a_, b, LINK) long a_, b; struct LOC_sortchanges *LINK; { /* see quicksort */ return (LINK->sorted->data[a_-1].basecoo1 < LINK->sorted->data[b-1].basecoo1); } Local Void swap_(a_, b, LINK) long a_, b; struct LOC_sortchanges *LINK; { /* see quicksort */ changedata hold; hold = LINK->sorted->data[a_-1]; LINK->sorted->data[a_-1] = LINK->sorted->data[b-1]; LINK->sorted->data[b-1] = hold; /*;write(output,'a=',a:1,', b=',b:1); print (@ for testing */ } /* begin module quicksort */ Local Void quicksort(left, right, LINK) position left, right; struct LOC_sortchanges *LINK; { /* quick sort a list between positions left and right, into ascending order. a position is simply a scalar of the form 0..max. the array to be sorted is dimensioned 1..max. (the difference in the ranges is important to the correct operation of the sort...) two external routines are used: function lessthan(a, b: position): boolean is a generalized test for value-at-a < value-at-b. procedure swap(a, b: position) switches the items at positions a and b. since these routines are external, the procedure is general. this procedure taken from the book 'algorithms + data structures = programs' by niklaus wirth, prentice-hall, inc., englewood cliffs, n.j.(1976), pp. 76-82 */ position lower = left; position upper; /* the positions looked at currently */ position center; /* the rough center of the region being sorted */ center = (left + right) / 2; upper = right; do { while (lessthan(lower, center, LINK)) lower++; while (lessthan(center, upper, LINK)) upper--; if (lower <= upper) { /* keep track of the center through the map: */ if (lower == center) center = upper; else if (upper == center) center = lower; swap_(lower, upper, LINK); lower++; upper--; } } while (lower <= upper); if (left < upper) quicksort(left, upper, LINK); if (lower < right) quicksort(lower, right, LINK); } /* end module marksautomate */ /* begin module sortchanges */ Static Void sortchanges(unsorted, sorted_) changeset unsorted, *sorted_; { /* sort the changeset unsorted and put the result into sorted. Since marks need to be in increasing position order (as currently defined in lister) it is nice to sort the changes for each piece. */ struct LOC_sortchanges V; /* end module quicksort version = 4.21; (@ of prgmod.p 1997 October 22 */ V.sorted = sorted_; *V.sorted = unsorted; quicksort(1L, V.sorted->number, &V); } /* Local variables for propagatechanges: */ struct LOC_propagatechanges { piece *pie; long shift; /* amount to shift a change */ } ; Local Void shiftit(x, LINK) changedata *x; struct LOC_propagatechanges *LINK; { /* shift the change */ /*writeln(output,'from basecoo1 =',round(basecoo1):1);*/ /*writeln(output,'from basecoo2 =',round(basecoo2):1);*/ /*zzz*/ x->basecoo1 = inttopie( pietoint((long)floor(x->basecoo1 + 0.5), LINK->pie) + LINK->shift, LINK->pie); x->basecoo2 = inttopie( pietoint((long)floor(x->basecoo2 + 0.5), LINK->pie) + LINK->shift, LINK->pie); /*writeln(output,' to basecoo1 =',round(basecoo1):1);*/ /*writeln(output,' to basecoo2 =',round(basecoo2):1);*/ } /* shiftit */ /* end module sortchanges */ /* begin module propagatechanges */ Static Void propagatechanges(cin, cout, wildtype, pie_) changeset cin, *cout; boolean wildtype; piece *pie_; { /* propagate deletion and insertions through the changeset. This simulates making the changes on the sequence and results in a changeset that refers to the ALTERED sequence. Shifting must be done in internal coordinates, so the actual piece must be given. */ struct LOC_propagatechanges V; long m; /* counter for the changes, current place */ long n; /* counter for the changes, later places */ long location; /* location of a change */ long FORLIM; changedata *WITH1; long FORLIM1; changedata *WITH2; /* mutant */ V.pie = pie_; /*writeln(output,'NEW propagate --------------------------------');*/ /* should sorting be done before propagating? No, propagation must be in the order given. */ *cout = cin; FORLIM = cout->number; for (m = 1; m <= FORLIM; m++) { location = pietoint((long)floor(cout->data[m-1].basecoo1 + 0.5), V.pie); /*write (output,'changetype = data[',m:1,'] = ',data[m].changetype);*/ /*writeln(output,' location = ',location:1);*/ WITH1 = &cout->data[m-1]; switch (WITH1->changetype) { case 'c': /* base changes cause no downstream changes */ break; case 'i': V.shift = pietoint((long)floor(WITH1->basecoo2 + 0.5), V.pie) - pietoint( (long)floor(WITH1->basecoo1 + 0.5), V.pie) - WITH1->inserts - 1; /*writeln(output,'insertion shift =',shift:1);*/ if (wildtype) { FORLIM1 = cout->number; for (n = m; n < FORLIM1; n++) { WITH2 = &cout->data[n]; if (location < pietoint((long)floor(cout->data[n].basecoo1 + 0.5), V.pie)) { /*writeln(output,'insertion n =',n:1,' of ',changetype);*/ shiftit(&cout->data[n], &V); } } } else { V.shift = -V.shift; for (n = 0; n <= m - 2; n++) { WITH2 = &cout->data[n]; /*do begin*/ if (location < pietoint((long)floor(cout->data[n].basecoo1 + 0.5), V.pie)) shiftit(&cout->data[n], &V); } } break; /* mutant */ /*zzzppp*/ case 'd': V.shift = pietoint((long)floor(WITH1->basecoo2 + 0.5), V.pie) - pietoint((long)floor(WITH1->basecoo1 + 0.5), V.pie) + 1; /*writeln(output,'deletion shift =',shift:1);*/ if (wildtype) { FORLIM1 = cout->number; /* propagation affects all changes downstream */ for (n = m; n < FORLIM1; n++) { WITH2 = &cout->data[n]; /*do begin*/ if (location <= pietoint((long)floor(cout->data[n].basecoo1 + 0.5), V.pie)) { /*writeln(output,'delete: n =',n:1,' of ',changetype);*/ shiftit(&cout->data[n], &V); } } } else { V.shift = -V.shift; for (n = 0; n <= m - 2; n++) { WITH2 = &cout->data[n]; if (location <= pietoint((long)floor(cout->data[n].basecoo1 + 0.5), V.pie)) { /*writeln(output,'delete: n =',n:1,' of ',changetype);*/ shiftit(&cout->data[n], &V); } } } break; } } } /* end module propagatechanges */ /* begin module nwpietoint */ Static long nwpietoint(p, pie) long p; piece *pie; { /* no wrap version of pietoint */ /* p is a coordinate on the piece. we want to transform p into a number from 1 to n: an internal coordinate system for easy manipulation of piece coordinates */ long i; /* an intermediate value */ piekey *WITH; WITH = &pie->key; switch (WITH->piedir) { case plus: i = p - WITH->piebeg + 1; break; case minus: i = WITH->piebeg - p + 1; break; } /* writeln(output,'newpietoint ---------------'); if piedir = minus then writeln(output,'nwpietoint: pidir is minus') else writeln(output,'nwpietoint: pidir is plus'); writeln(output,'nwpietoint: p: ',p:1); writeln(output,'nwpietoint: pieend: ',pieend:1); writeln(output,'nwpietoint: i: ',i:1); */ /*zzzbbb*/ return i; } #define decbase 2 /* number of decimal places for bases */ #define widbase 6 /* width of places for bases */ #define decbits 2 /* number of decimal places for bits */ #define widbits 6 /* width of places for bits */ #define blackbar 0.05 /* black bar width in bases */ #define shiftdown 1 /* shift down the change mark arrow on wt sequence */ /* end module nwpietoint */ /* begin module writemarks */ Static Void writemarks(markspots, changes, insertupperbits_, insertlowerbits_, deleteupperbits_, deletelowerbits_, changeupperbits_, changelowerbits_, displacement_, pie, thenumber) _TEXT *markspots; changeset changes; double insertupperbits_, insertlowerbits_, deleteupperbits_, deletelowerbits_, changeupperbits_, changelowerbits_, displacement_; piece *pie; long thenumber; { /* upperbits for insertion symbol */ /* upperbits for insertion symbol */ /* upperbits for deletion symbol */ /* upperbits for deletion symbol */ /* upperbits for change symbol */ /* upperbits for change symbol */ /* number of bases to displace the mark backwards */ /* piece for these changes */ /* the piece number */ /* Write the marks to file markspots, at the locations defined. Writemarks works with two sequences, first is the wild-type sequence and second is the mutant sequence. Definition: This routine assumes that all previous actions have placed us onto the wild-type sequence. NOTE: the basecoo values changes get modified locally here but are not altered in the original copy. */ long i; /* counter for inserts */ long n; /* counter for the changes */ double markplace; /* the last mark place put into markspots */ double protection; /* protect against postscript bomb when the position does not change if there are no bases in an insertion (ie it acts as a deletion) by adding a little bit to the second insertion position */ changeset sorted; /* the changes sorted by the exact positions of the marks, must be done for both wild and mutant sequences */ changeset unsorted; /* the exact positions of the marks, unsorted */ changeset propagated; /* changes propagated through the changeset */ long FORLIM; changedata *WITH; long FORLIM1; fprintf(markspots->f, "\n* piece #%ld ", thenumber); writechangeset(markspots, changes); putc('\n', markspots->f); /* first do the wild type sequence: */ propagatechanges(changes, &unsorted, true, pie); /* wild type */ FORLIM = unsorted.number; /* adjust the locations of the marks */ for (n = 0; n < FORLIM; n++) { WITH = &unsorted.data[n]; switch (WITH->changetype) { case 'c': /* wait until the next loop */ break; case 'i': protection = blackbar; WITH->basecoo1 += 0.5 - protection; WITH->basecoo2 += protection - 0.5; break; case 'd': /* mark deletion as a red bar on the wild type sequence */ WITH->basecoo1 -= 0.5; WITH->basecoo2 += 0.5; break; } } sortchanges(unsorted, &sorted); FORLIM = sorted.number; /* print the marks out */ for (n = 0; n < FORLIM; n++) { WITH = &sorted.data[n]; switch (WITH->changetype) { case 'c': markplace = WITH->basecoo1 - displacement_; WITH->basecoo1 = markplace; fprintf(markspots->f, "U %*.*f %*.*f %*.*f %*.*f %*.*f (%c->%c) changeworra \n", widbits, decbits, WITH->basecoo1, widbits, decbits, changeupperbits_ - shiftdown, widbits, decbits, WITH->basecoo2, widbits, decbits, changelowerbits_ - shiftdown, widbits, decbits, displacement_, WITH->baseold, WITH->basenew); break; case 'i': /* handle the special cases of insertion at the ends of the sequence */ if (nwpietoint((long)WITH->basecoo1, pie) == 0) { /*writeln(output,'HI THERE 1');*/ WITH->basecoo1 += 1.0; WITH->basecoo2 += 1.0; displacement_ = -1.8; /* I don't know why this needs more */ /*zzzbbb*/ } if (nwpietoint((long)WITH->basecoo1, pie) > piecelength(pie)) { /* upperbits */ /*writeln(output,'HI THERE 2');*/ WITH->basecoo1 -= 1.0; WITH->basecoo2 -= 1.0; displacement_ = 0.0; /*zzzbbb*/ } /* lowerbits */ fprintf(markspots->f, "U %*.*f %*.*f %*.*f %*.*f %*.*f (insert) deletion\n", widbase, decbase, WITH->basecoo1, widbits, decbits, deleteupperbits_, widbase, decbase, WITH->basecoo2, widbits, decbits, deletelowerbits_, widbits, decbits, displacement_); break; /* mark insertion location as a black bar on the wild type sequence */ case 'd': /* upperbits */ fprintf(markspots->f, "U %*.*f %*.*f %*.*f %*.*f %*.*f (deletion", widbase, decbase, WITH->basecoo1, widbits, decbits, deleteupperbits_, widbase, decbase, WITH->basecoo2, widbits, decbits, deletelowerbits_, widbits, decbits, displacement_); fprintf(markspots->f, ") deletion \n"); break; /* mark deletion as a red bar on the wild type sequence */ /* lowerbits */ } } fprintf(markspots->f, "p - skip ahead to mutated piece"); fprintf(markspots->f, " #%ld\n", thenumber + 1); /* anticipate its number */ propagatechanges(changes, &unsorted, false, pie); /* mutant */ FORLIM = unsorted.number; for (n = 0; n < FORLIM; n++) { WITH = &unsorted.data[n]; switch (WITH->changetype) { case 'c': markplace = WITH->basecoo1 - displacement_; WITH->basecoo1 = markplace; WITH->basecoo2 = markplace; break; case 'i': /* mark insertion as a green bar on the mutant sequence. Therefore forceforward before putting the mark, and put the mark on the sequence itself */ if (WITH->inserts <= 0) protection = blackbar; else protection = 0.0; markplace = WITH->basecoo1 + 0.5; WITH->basecoo1 = markplace - protection; WITH->basecoo2 = markplace + WITH->inserts + protection; break; case 'd': markplace = WITH->basecoo1; protection = blackbar; WITH->basecoo1 = markplace - 0.5 - protection; WITH->basecoo2 = markplace - 0.5 + protection; break; } } sortchanges(unsorted, &sorted); FORLIM = sorted.number; for (n = 0; n < FORLIM; n++) { WITH = &sorted.data[n]; switch (WITH->changetype) { case 'c': fprintf(markspots->f, "U %*.*f %*.*f %*.*f %*.*f %*.*f (%c->%c) change \n", widbits, decbits, WITH->basecoo1, widbits, decbits, changeupperbits_, widbits, decbits, WITH->basecoo2, widbits, decbits, changelowerbits_, widbits, decbits, displacement_, WITH->baseold, WITH->basenew); break; case 'i': /* handle the special cases of insertion at the ends of the sequence */ if (nwpietoint((long)WITH->basecoo1, pie) == 0) { /*writeln(output,'HI THERE 3');*/ WITH->basecoo1 += 1.0; WITH->basecoo2 += 1.0; displacement_ = -1.8; /* I don't know why this needs more */ /*zzzbbb*/ } if (nwpietoint((long)WITH->basecoo1, pie) > piecelength(pie)) { /* upperbits */ printf("nwpietoint(trunc(basecoo1),pie) = %ld\n", nwpietoint((long)WITH->basecoo1, pie)); printf("nwpietoint(trunc(basecoo2),pie) = %ld\n", nwpietoint((long)WITH->basecoo2, pie)); printf("piecelength(pie) = %ld\n", piecelength(pie)); WITH->basecoo1 -= 1.0; WITH->basecoo2 -= 1.0; displacement_ = 0.0; /*zzzbbb*/ } /*writeln(output,'HI THERE 4');*/ /* lowerbits */ fprintf(markspots->f, "U %*.*f %*.*f %*.*f %*.*f %*.*f (", widbase, decbase, WITH->basecoo1, widbits, decbits, insertupperbits_, widbase, decbase, WITH->basecoo2, widbits, decbits, insertlowerbits_, widbits, decbits, displacement_); /* protect against null postscript string. It's not clear that this helps. */ if (WITH->inserts == 0) putc(' ', markspots->f); else { FORLIM1 = WITH->inserts; for (i = 0; i < FORLIM1; i++) putc(WITH->insert[i], markspots->f); } fprintf(markspots->f, ") insertion\n"); break; /* mark insertion as a green bar on the mutant sequence. Therefore forceforward before putting the mark, and put the mark on the sequence itself */ case 'd': /* upperbits */ fprintf(markspots->f, "U %*.*f %*.*f %*.*f %*.*f %*.*f (delete) deletion\n", widbase, decbase, WITH->basecoo1, widbits, decbits, deleteupperbits_, widbase, decbase, WITH->basecoo2, widbits, decbits, deletelowerbits_, widbits, decbits, displacement_); break; /* lowerbits */ } } fprintf(markspots->f, "p - skip ahead to next piece\n"); } #undef decbase #undef widbase #undef decbits #undef widbits #undef blackbar #undef shiftdown /* end module writemarks */ /* begin module dbmutate.processcommand */ Static Void processcommand(dbmutatep, fromrange, torange) _TEXT *dbmutatep; long *fromrange, *torange; { /* process commands */ Char command; /* a command in dbmutatep */ command = getc(dbmutatep->f); /* skip the '@' */ if (command == '\n') command = ' '; skipblanks(dbmutatep); command = getc(dbmutatep->f); if (command == '\n') command = ' '; skipnonblanks(dbmutatep); printf("@ command: "); if (command == 't' || command == 'f') { switch (command) { case 'f': fscanf(dbmutatep->f, "%ld", fromrange); printf("fromrange set to %ld", *fromrange); break; case 't': fscanf(dbmutatep->f, "%ld", torange); printf("torange set to %ld", *torange); break; } } else printf("unidentifed command, \",command,\", ignored"); putchar('\n'); fscanf(dbmutatep->f, "%*[^\n]"); getc(dbmutatep->f); } /* Local variables for themain: */ struct LOC_themain { _TEXT *minst; trigger accession; /* trigger to find the ACCESSION pattern */ changeset changes; /* current set of changes */ trigger entryname; /* a mark for the accession number of an entry */ trigger entryend; /* a mark for the end of an entry */ boolean gotten; /* whether the entryname was found */ trigger locus; /* trigger to find the LOCUS pattern */ trigger origin; /* trigger to find the ORIGIN pattern */ trigger organism; /* trigger to find the ORGANISM pattern */ } ; Local Void readsequence(f, s, LINK) _TEXT *f; asequence *s; struct LOC_themain *LINK; { /* read in the sequence from f */ Char c_; /* character in dbin */ boolean done = false; /* end search */ long sindex = 0; /* index for the sequence */ _TEXT TEMP; /* read in the sequence */ resettrigger(&LINK->entryend); while (!done) { if (BUFEOF(f->f)) { printf("No end to entry "); TEMP.f = stdout; *TEMP.name = '\0'; writestring(&TEMP, &LINK->entryname.seek); printf("!\n"); halt(); } if (P_eoln(f->f)) { fscanf(f->f, "%*[^\n]"); getc(f->f); continue; } c_ = getc(f->f); if (c_ == '\n') c_ = ' '; testfortrigger(c_, &LINK->entryend); if (LINK->entryend.found) { done = true; fscanf(f->f, "%*[^\n]"); getc(f->f); continue; } /* store the character */ /* if c in ['a','c','g','t'] then begin */ /* allow any alphabetic, a-z */ if (c_ < 'a' || c_ > 'z') continue; /* else then writeln(output,'rejected character: ',c); */ sindex++; if (sindex > sequencemax) { TEMP.f = stdout; *TEMP.name = '\0'; writestring(&TEMP, &LINK->entryname.seek); printf(" sequence length is larger than the allowed %ld.\n", sequencemax); printf("Increase constant sequencemax.\n"); halt(); } s->sequence[sindex-1] = c_; } s->length = sindex; } Local Void writesequence(f, s, LINK) _TEXT *f; asequence s; struct LOC_themain *LINK; { /* write sequence s to file f */ long sindex = 0; /* index for the sequence */ /* 1999 Feb 11: the bug was that this was <= instead of < ! */ while (sindex < s.length) { sindex++; if (sindex % 60 == 1) fprintf(f->f, "%9ld", sindex); /* p2c: dbmutate.p, line 1867: * Note: Using % for possibly-negative arguments [317] */ if (sindex % 10 == 1) putc(' ', f->f); /* p2c: dbmutate.p, line 1870: * Note: Using % for possibly-negative arguments [317] */ putc(s.sequence[sindex-1], f->f); /* if sindex > 14540 then begin write(output,' length = ',length:1); write(output,' sindex = ',sindex:1); write(output,' ord = ',ord(sequence[sindex])); writeln(output); end; */ if (sindex % 60 == 0) putc('\n', f->f); /* p2c: dbmutate.p, line 1882: * Note: Using % for possibly-negative arguments [317] */ } if (sindex % 60 != 1) { putc('\n', f->f); /* writeln(f,'sindex = ', sindex:1); writeln(f,'sindex mod 60 = ', sindex:1); */ } /* p2c: dbmutate.p, line 1886: * Note: Using % for possibly-negative arguments [317] */ fprintf(f->f, "//\n\n"); } Local Void changesequence(changes, thesequence, LINK) changeset changes; asequence *thesequence; struct LOC_themain *LINK; { /* read in and change the sequence */ long i; /* counter for inserts */ long n; /* counter for the changes */ long shift; /* amount to shift a portion of the sequence */ long FORLIM; changedata *WITH1; long FORLIM1; FORLIM = changes.number; /* alter the sequence */ for (n = 0; n < FORLIM; n++) { WITH1 = &changes.data[n]; /* this is not relevant - handle ends if basecoo1 > length then begin writeln(output,'first base coordinate ',basecoo1:1, ' exceeds sequence length ',length:1); halt end; if changetype in ['d','i'] then if basecoo2 > length then begin writeln(output,'second base coordinate ',basecoo2:1, ' exceeds sequence length ',length:1); halt end; */ switch (WITH1->changetype) { case 'c': if (WITH1->baseold == WITH1->basenew) { printf("The initial and final bases are the same,\n"); printf("so you did not request any change!\n"); halt(); } if (WITH1->baseold != thesequence->sequence[(long)floor(WITH1->basecoo1 + 0.5) - 1]) { printf("The old base at %ld is NOT %c! It is %c.\n", (long)floor(WITH1->basecoo1 + 0.5), WITH1->baseold, thesequence->sequence[(long)floor(WITH1->basecoo1 + 0.5) - 1]); halt(); } thesequence->sequence[(long)floor(WITH1->basecoo1 + 0.5) - 1] = WITH1-> basenew; break; case 'i': /* writeln(output,inserts:1); */ if (WITH1->basecoo1 > thesequence->length) WITH1->basecoo1 = thesequence->length; if (WITH1->basecoo2 > thesequence->length) WITH1->basecoo2 = thesequence->length + 1.0; /*writeln(output,'old length ',length:1);*/ shift = WITH1->inserts - (long)floor(WITH1->basecoo2 + 0.5) + (long)floor(WITH1->basecoo1 + 0.5) + 1; /* length := length - shift; evidently the wrong direction */ thesequence->length += shift; /*writeln(output,'shift ',shift:1);*/ /*writeln(output,'new length ',length:1);*/ if (shift > 0) { /* insert */ /* shift the rest of the sequence out of the way */ if (thesequence->length > sequencemax) { printf( " Insertion of %ld bases would cause the sequence to exceed constant sequencemax (%ld)\n", WITH1->inserts, sequencemax); halt(); } FORLIM1 = (long)floor(WITH1->basecoo2 + 0.5) - 1; for (i = thesequence->length - 1; i >= FORLIM1; i--) thesequence->sequence[i] = thesequence->sequence[i - shift]; } /*writeln(output,'ins');*/ else if (shift < 0) { /* delete */ FORLIM1 = thesequence->length; /*writeln(output,'del');*/ for (i = (long)floor(WITH1->basecoo2 + 0.5) + shift - 1; i < FORLIM1; i++) thesequence->sequence[i] = thesequence->sequence[i - shift]; } FORLIM1 = WITH1->inserts; /* insert the new material */ for (i = 0; i < FORLIM1; i++) thesequence->sequence[(long)floor(WITH1->basecoo1 + 0.5) + i] = WITH1->insert[i]; /* (* overwrite for debugging *) for i := 1 to inserts do sequence[round(basecoo1) + i] := '!'; */ /* fix //*/ break; case 'd': /* delete inclusively from basecoo1 to basecoo2 */ /* writeln(output,'DELETE'); sequence[basecoo1] := 'A'; sequence[basecoo2] := 'B'; */ if (WITH1->basecoo1 < 0) WITH1->basecoo1 = 1.0; if (WITH1->basecoo2 < 0) WITH1->basecoo2 = 1.0; if (WITH1->basecoo1 > thesequence->length) WITH1->basecoo1 = thesequence->length + 1.0; if (WITH1->basecoo2 > thesequence->length) WITH1->basecoo2 = thesequence->length + 1.0; shift = (long)floor(WITH1->basecoo2 + 0.5) - (long)floor(WITH1->basecoo1 + 0.5) + 1; thesequence->length -= shift; FORLIM1 = thesequence->length; for (i = (long)floor(WITH1->basecoo1 + 0.5) - 1; i < FORLIM1; i++) thesequence->sequence[i] = thesequence->sequence[i + shift]; break; } } } Local Void startinst(minst, version_, LINK) _TEXT *minst; double version_; struct LOC_themain *LINK; { /* start the instruction file */ if (*minst->name != '\0') { if (minst->f != NULL) minst->f = freopen(minst->name, "w", minst->f); else minst->f = fopen(minst->name, "w"); } else { if (minst->f != NULL) rewind(minst->f); else minst->f = tmpfile(); } if (minst->f == NULL) _EscIO2(FileNotFound, minst->name); SETUPBUF(minst->f, Char); fprintf(minst->f, "title \"dbmutate %4.2f\";\n", version_); printf("title \"dbmutate %4.2f\";\n", version_); } /* Local variables for instorgchr: */ struct LOC_instorgchr { struct LOC_themain *LINK; _TEXT *minst; string organismgenus, organismspecies; } ; Local Void givename(LINK) struct LOC_instorgchr *LINK; { /* just the name please with ; at end */ putc(LINK->organismgenus.letters[0], LINK->minst->f); putc('.', LINK->minst->f); writestring(LINK->minst, &LINK->organismspecies); fprintf(LINK->minst->f, ";\n"); } Local Void instorgchr(minst_, organismgenus_, organismspecies_, LINK) _TEXT *minst_; string organismgenus_, organismspecies_; struct LOC_themain *LINK; { /* organism genus name */ /* organism species name */ /* define the organism into the mutation inst file */ struct LOC_instorgchr V; V.LINK = LINK; V.minst = minst_; V.organismgenus = organismgenus_; V.organismspecies = organismspecies_; if (LINK->changes.number <= 0) return; fprintf(V.minst->f, "\norganism "); givename(&V); fprintf(V.minst->f, "chromosome "); givename(&V); putc('\n', V.minst->f); } /* Local variables for instget: */ struct LOC_instget { struct LOC_themain *LINK; _TEXT *minst; changeset changes; long fromrange, torange; } ; Local Void sign(minst, range, LINK) _TEXT *minst; long range; struct LOC_instget *LINK; { /* make a + sign on the positive range */ if (range > 0) putc('+', minst->f); } Local Void dotheget(LINK) struct LOC_instget *LINK; { fprintf(LINK->minst->f, "get from % .1E ", LINK->changes.data[0].basecoo1); sign(LINK->minst, LINK->fromrange, LINK); fprintf(LINK->minst->f, "%ld to same ", LINK->fromrange); sign(LINK->minst, LINK->torange, LINK); fprintf(LINK->minst->f, "%ld;\n", LINK->torange); } Local Void instget(minst_, piecename, changes_, fromrange_, torange_, LINK) _TEXT *minst_; string piecename; changeset changes_; long fromrange_, torange_; struct LOC_themain *LINK; { /* define the piece and get instructions to file minst. For the given piece (piecename) write the changes for the range from-to. */ struct LOC_instget V; V.LINK = LINK; V.minst = minst_; V.changes = changes_; V.fromrange = fromrange_; V.torange = torange_; if (V.changes.number <= 0) return; fprintf(V.minst->f, "(* "); describechangeset(V.minst, V.changes); fprintf(V.minst->f, " *)\n"); fprintf(V.minst->f, "name \"wildtype\";\n"); fprintf(V.minst->f, "piece "); writestring(V.minst, &piecename); fprintf(V.minst->f, "; (* wild type *)\n"); dotheget(&V); fprintf(V.minst->f, "name \""); writestring(V.minst, &piecename); putc(' ', V.minst->f); describechangeset(V.minst, V.changes); fprintf(V.minst->f, "\";\n"); fprintf(V.minst->f, "piece "); writestring(V.minst, &piecename); putc('.', V.minst->f); writechangeset(V.minst, V.changes); fprintf(V.minst->f, "; (* mutant *)\n"); dotheget(&V); } Local Void dolocus(dbin, dbout, markspots, entryname, changes, found, fromrange, torange, thenumber, LINK) _TEXT *dbin, *dbout, *markspots; trigger entryname; changeset changes; boolean *found; long *fromrange, *torange, *thenumber; struct LOC_themain *LINK; { /* look through a locus and find the current entryname. If it is the right one, process it and return flag found as true. */ string anaccessionname; /* accession name of an entry */ string alocusname; /* accession name of an entry */ Char c_; /* character in dbin */ long entrylength; /* length of sequence in an entry */ _TEXT holdfile; /* file for holding the middle of an entry */ string organismgenus; /* organism genus name */ string organismspecies; /* organism species name */ boolean done = false; /* end search */ asequence sequence; /* a sequence */ changeset sortedchanges; /* changes sorted for writing out */ piece *pie; /* dummy to satisfy propagatechanges */ piece *dummy; /* dummy to satisfy useage of pie */ _TEXT TEMP; holdfile.f = NULL; *holdfile.name = '\0'; if (*holdfile.name != '\0') { if (holdfile.f != NULL) holdfile.f = freopen(holdfile.name, "w", holdfile.f); else holdfile.f = fopen(holdfile.name, "w"); } else { if (holdfile.f != NULL) rewind(holdfile.f); else holdfile.f = tmpfile(); } if (holdfile.f == NULL) _EscIO2(FileNotFound, holdfile.name); SETUPBUF(holdfile.f, Char); /* get the locus name and determine the length */ onetoken(dbin, &alocusname, &LINK->gotten); fscanf(dbin->f, "%ld", &entrylength); clearstring(&organismgenus); clearstring(&organismspecies); /* find the accession name */ resettrigger(&LINK->entryend); *found = false; while (!done) { if (P_eoln(dbin->f)) { fscanf(dbin->f, "%*[^\n]"); getc(dbin->f); putc('\n', holdfile.f); if (BUFEOF(dbin->f)) done = true; /*writeln(output);*/ } else { c_ = getc(dbin->f); if (c_ == '\n') c_ = ' '; putc(c_, holdfile.f); /*write(output,c);*/ testfortrigger(c_, &LINK->entryend); if (LINK->entryend.found) done = true; testfortrigger(c_, &LINK->accession); if (LINK->accession.found) { onetoken(dbin, &anaccessionname, &LINK->gotten); if (LINK->gotten) { /* There are two paths here: the entry was found - in which case we copy and mutate or it was not found - in which case we skip to end of entry. */ done = true; if (equalstring(anaccessionname, entryname.seek)) { *found = true; /* complete hold file */ fprintf(holdfile.f, " "); writestring(&holdfile, &entryname.seek); /* Write the changes to the new accession name: */ if (changes.number > 0) putc('.', holdfile.f); writechangeset(&holdfile, changes); if (changes.number > 0) { fprintf(markspots->f, "\n* dbmutatep requested piece %ld\n", *thenumber); fprintf(markspots->f, "* final display pieces %ld and %ld\n", *thenumber * 2 - 3, *thenumber * 2 - 2); fprintf(markspots->f, "* "); writestring(markspots, &entryname.seek); putc('\n', markspots->f); } else { fprintf(markspots->f, "\n* The wild type piece in dbmutatep, "); writestring(markspots, &entryname.seek); fprintf(markspots->f, " does not have marks in markspots\n"); } pie = NULL; dummy = pie; /* keep compiler happy */ writemarks(markspots, sortedchanges, insertupperbits, insertlowerbits, deleteupperbits, deletelowerbits, changeupperbits, changelowerbits, (double)displacement, pie, *thenumber); /* THIS KEEPS ALL SECONDARY ACCESSION NAMES:*/ putc(' ', holdfile.f); copyaline(dbin, &holdfile); /* find the organism name */ resettrigger(&LINK->organism); done = false; while (!done) { if (BUFEOF(dbin->f)) { printf("No "); TEMP.f = stdout; *TEMP.name = '\0'; writestring(&TEMP, &LINK->organism.seek); printf(" to entry "); TEMP.f = stdout; *TEMP.name = '\0'; writestring(&TEMP, &entryname.seek); printf("!\n"); halt(); } if (P_eoln(dbin->f)) { fscanf(dbin->f, "%*[^\n]"); getc(dbin->f); putc('\n', holdfile.f); continue; } c_ = getc(dbin->f); if (c_ == '\n') c_ = ' '; putc(c_, holdfile.f); testfortrigger(c_, &LINK->organism); if (!LINK->organism.found) continue; done = true; onetoken(dbin, &organismgenus, &LINK->gotten); if (!LINK->gotten) { copyaline(dbin, &holdfile); continue; } onetoken(dbin, &organismspecies, &LINK->gotten); if (!LINK->gotten) continue; /* write(output,'genus: '); writestring(output,organismgenus); write(output,' species: '); writestring(output,organismspecies); writeln(output); */ fprintf(holdfile.f, " "); writestring(&holdfile, &organismgenus); putc(' ', holdfile.f); writestring(&holdfile, &organismspecies); instorgchr(LINK->minst, organismgenus, organismspecies, LINK); } instget(LINK->minst, entryname.seek, changes, *fromrange, *torange, LINK); /* find the origin of the sequence */ resettrigger(&LINK->origin); done = false; while (!done) { if (BUFEOF(dbin->f)) { printf("No "); TEMP.f = stdout; *TEMP.name = '\0'; writestring(&TEMP, &LINK->origin.seek); printf(" to entry "); TEMP.f = stdout; *TEMP.name = '\0'; writestring(&TEMP, &entryname.seek); printf("!\n"); halt(); } if (P_eoln(dbin->f)) { fscanf(dbin->f, "%*[^\n]"); getc(dbin->f); putc('\n', holdfile.f); continue; } c_ = getc(dbin->f); if (c_ == '\n') c_ = ' '; putc(c_, holdfile.f); testfortrigger(c_, &LINK->origin); if (LINK->origin.found) { done = true; copyaline(dbin, &holdfile); } } readsequence(dbin, &sequence, LINK); changesequence(changes, &sequence, LINK); /* dump the results out */ writestring(dbout, &LINK->locus.seek); fprintf(dbout->f, " "); writestring(dbout, &alocusname); fprintf(dbout->f, " %6ld", sequence.length); if (*holdfile.name != '\0') { if (holdfile.f != NULL) holdfile.f = freopen(holdfile.name, "r", holdfile.f); else holdfile.f = fopen(holdfile.name, "r"); } else rewind(holdfile.f); if (holdfile.f == NULL) _EscIO2(FileNotFound, holdfile.name); RESETBUF(holdfile.f, Char); while (!BUFEOF(holdfile.f)) copyaline(&holdfile, dbout); /* write out the sequence */ writesequence(dbout, sequence, LINK); } } /* write(output,' found: '); writestring(output,anaccessionname); writeln(output); */ } } resettrigger(&entryname); testfortrigger(c_, &entryname); if (BUFEOF(dbin->f)) done = true; } if (!*found) { /* it is safest to skip to the end of the entry to avoid incorrect triggerings from the current entry. */ resettrigger(&LINK->entryend); done = false; while (!done) { if (BUFEOF(dbin->f)) { printf("No end to entry "); TEMP.f = stdout; *TEMP.name = '\0'; writestring(&TEMP, &entryname.seek); printf("!\n"); halt(); } if (P_eoln(dbin->f)) { fscanf(dbin->f, "%*[^\n]"); getc(dbin->f); continue; } c_ = getc(dbin->f); if (c_ == '\n') c_ = ' '; testfortrigger(c_, &LINK->entryend); if (LINK->entryend.found) { done = true; fscanf(dbin->f, "%*[^\n]"); getc(dbin->f); } } } if (holdfile.f != NULL) fclose(holdfile.f); } Local Void locateentry(dbin, dbout, markspots, entryname, changes, fromrange, torange, thenumber, LINK) _TEXT *dbin, *dbout, *markspots; trigger entryname; changeset changes; long *fromrange, *torange, *thenumber; struct LOC_themain *LINK; { /* locate the entry in dbin */ boolean done = false; /* end search */ Char c_; /* a character in dbin */ _TEXT TEMP; if (*dbin->name != '\0') { if (dbin->f != NULL) dbin->f = freopen(dbin->name, "r", dbin->f); else dbin->f = fopen(dbin->name, "r"); } else rewind(dbin->f); if (dbin->f == NULL) _EscIO2(FileNotFound, dbin->name); RESETBUF(dbin->f, Char); resettrigger(&LINK->locus); while (!done) { /*if eoln(dbin) then writeln(output);*/ if (BUFEOF(dbin->f)) { printf("Could not locate entry "); TEMP.f = stdout; *TEMP.name = '\0'; writestring(&TEMP, &entryname.seek); printf("!\n"); halt(); } if (P_eoln(dbin->f)) { fscanf(dbin->f, "%*[^\n]"); getc(dbin->f); continue; } c_ = getc(dbin->f); if (c_ == '\n') c_ = ' '; /*write(output,c);*/ testfortrigger(c_, &LINK->locus); if (LINK->locus.found) dolocus(dbin, dbout, markspots, entryname, changes, &done, fromrange, torange, thenumber, LINK); } } /* end module dbmutate.processcommand */ /* begin module dbmutate.themain */ Static Void themain(dbin, dbout, dbmutatep, markspots, minst_) _TEXT *dbin, *dbout, *dbmutatep, *markspots, *minst_; { /* the main procedure of the program */ struct LOC_themain V; double parameterversion; /* parameter version number */ long piecesoutput = 0; /* count of the number of output pieces */ long fromrange = deffromrange, torange = deftorange; /* range of delila instructions */ _TEXT TEMP; V.minst = minst_; printf("dbmutate %4.2f\n", version); if (*dbmutatep->name != '\0') { if (dbmutatep->f != NULL) dbmutatep->f = freopen(dbmutatep->name, "r", dbmutatep->f); else dbmutatep->f = fopen(dbmutatep->name, "r"); } else rewind(dbmutatep->f); if (dbmutatep->f == NULL) _EscIO2(FileNotFound, dbmutatep->name); RESETBUF(dbmutatep->f, Char); fscanf(dbmutatep->f, "%lg%*[^\n]", ¶meterversion); getc(dbmutatep->f); if (parameterversion < updateversion) { printf("You have an old parameter file!\n"); halt(); } /* 1 2 3 4 5 */ /* 12345678901234567890123456789012345678901234567890 */ filltrigger(&V.locus, "LOCUS "); filltrigger(&V.accession, "ACCESSION "); filltrigger(&V.origin, "ORIGIN "); filltrigger(&V.organism, "ORGANISM "); filltrigger(&V.entryend, "// "); if (*dbout->name != '\0') { if (dbout->f != NULL) dbout->f = freopen(dbout->name, "w", dbout->f); else dbout->f = fopen(dbout->name, "w"); } else { if (dbout->f != NULL) rewind(dbout->f); else dbout->f = tmpfile(); } if (dbout->f == NULL) _EscIO2(FileNotFound, dbout->name); SETUPBUF(dbout->f, Char); if (*markspots->name != '\0') { if (markspots->f != NULL) markspots->f = freopen(markspots->name, "w", markspots->f); else markspots->f = fopen(markspots->name, "w"); } else { if (markspots->f != NULL) rewind(markspots->f); else markspots->f = tmpfile(); } if (markspots->f == NULL) _EscIO2(FileNotFound, markspots->name); SETUPBUF(markspots->f, Char); fprintf(markspots->f, "* dbmutate %4.2f\n", version); marksautomate(markspots); startinst(V.minst, version, &V); while (!BUFEOF(dbmutatep->f)) { skipblanks(dbmutatep); if ((P_peek(dbmutatep->f) == '*') | P_eoln(dbmutatep->f)) { /* skip comments and blank lines */ fscanf(dbmutatep->f, "%*[^\n]"); getc(dbmutatep->f); continue; } if (P_peek(dbmutatep->f) == '@') { processcommand(dbmutatep, &fromrange, &torange); continue; } onetoken(dbmutatep, &V.entryname.seek, &V.gotten); if (!V.gotten) continue; piecesoutput++; TEMP.f = stdout; *TEMP.name = '\0'; writestring(&TEMP, &V.entryname.seek); readchangeset(dbmutatep, &V.changes); putchar(' '); TEMP.f = stdout; *TEMP.name = '\0'; describechangeset(&TEMP, V.changes); fscanf(dbmutatep->f, "%*[^\n]"); getc(dbmutatep->f); putchar('\n'); locateentry(dbin, dbout, markspots, V.entryname, V.changes, &fromrange, &torange, &piecesoutput, &V); } } /* end module dbmutate.themain */ main(argc, argv) int argc; Char *argv[]; { PASCAL_MAIN(argc, argv); if (setjmp(_JL1)) goto _L1; minst.f = NULL; strcpy(minst.name, "minst"); markspots.f = NULL; strcpy(markspots.name, "markspots"); dbmutatep.f = NULL; strcpy(dbmutatep.name, "dbmutatep"); dbout.f = NULL; strcpy(dbout.name, "dbout"); dbin.f = NULL; strcpy(dbin.name, "dbin"); /* testcystem(output); {zzzccc} */ themain(&dbin, &dbout, &dbmutatep, &markspots, &minst); _L1: if (dbin.f != NULL) fclose(dbin.f); if (dbout.f != NULL) fclose(dbout.f); if (dbmutatep.f != NULL) fclose(dbmutatep.f); if (markspots.f != NULL) fclose(markspots.f); if (minst.f != NULL) fclose(minst.f); exit(EXIT_SUCCESS); } /* End. */