/* Output from p2c 1.21alpha-07.Dec.93, the Pascal-to-C translator */ /* From input file "sebo.p" */ #include /* sebo: search for aligned book sequences in another book Dr. Thomas D. Schneider National Institutes of Health National Cancer Institute Center for Cancer Research Nanobiology Program Molecular Information Theory Group Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu (use only if first address fails) http://www.ccrnp.ncifcrf.gov/~toms/ module libraries: delmod */ /* end of program */ /* begin module version */ #define version 1.46 /* of sebo.p 2009 Mar 03 2009 Mar 03: 1.46: correct bug from Blake 2008 Jan 16: 1.45: add space before 'times in the target' 2006 Oct 26: 1.44: correct reading params 2006 Oct 26: 1.43: implement rangecontrol 2006 Oct 26: 1.42: allow output instructions to match original fragment 2004 Sep 9: 1.41: upgrade for GPC 2004 Apr 30: 1.40: spelling correction, guarantee 2003 Aug 25: 1.39: bug: output at every base for book alignment 2002 Aug 27: 1.38: Do not halt if probe is larger than target sequence 2001 May 7: 1.37: report total number of probes found. 2001 Mar 26: 1.34: note in bugs on shift parameter 2001 Mar 23: 1.33: count targets and probes 2001 Mar 23: 1.32: clean up after memory leak solved. 2001 Mar 23: 1.31: hunt for a memory leak 2000 Nov 8: 1.25: upgrade documentation and fix writeinstructions bflip 2000 jun 27: 1.20: names of instout default to previous names 2000 jun 27: 1.18: add examples 2000 jun 27: 1.17: fully functional 2000 jun 26: 1.14: alert parameter 2000 jun 26: 1.12: allow mismatches 2000 jun 26: 1.11: fully functional: found that Ecoli ahpC is next to dsbG! 2000 jun 26: 1.04: barely functional 2000 jun 24: 1.01: reads target and book. 2000 jun 24: 1.00: origin */ #define updateversion 1.21 /* defines lowest acceptable current parameter file */ /* end module version */ /* begin module describe.sebo */ /* name sebo: search for aligned book sequences in another book synopsis sebo(inst: in, book: in, target: in, sebop: in, instout: out, list: out, output: out) files inst: Delila instructions to create a book. book: The Delila book created with inst, containing fragments to search in target. target: Target book to search, usually a whole genome. The sequence is treated as a circle to guarantee correct results on circular genomes. The incorrect results on linear chromosomes will be quite rare because of the telomeres. sebop: parameters to control the program. The file must contain the following parameters, one per line: 1. parameterversion: The version number of the program. This allows the user to be warned if an old parameter file is used. 2. If the first character of the second line is 'f' (for 'first') then the sequences are always aligned by their first base. 'i' then the sequences are aligned by the delila instructions. If the inst file is empty, alignment is forced to the 'b' mode. 'b' (for 'internal') then the alignment is on the internal zero of the book's sequence. This option is to be used when "default coordinate zero" is used in the Delila instructions. 3. fromdo, todo: (integer) range of aligned book to use in the search. 4. rangecontrol, frominst, toinst: (char, integer, integer): Range of Delila instructions output to instout. If range control, the first character on the line is: 'a' then the instructions will give the ORIGINAL fragment in absolute coordinates. 'r' then the instructions will give the ORIGINAL fragment in relative coordinates. For any other initial character, the instructions will give the range frominst to toinst. 5. mismatches: (integer) number of mismatches allowed. 6. alertinterval: (integer) Many genomes are large and a search may take a while, making the program seem to be doing nothing. To observe that the program is running, set this parameter to a number of bases. Sebo will write out the location it is currently searching every alertinterval bases. Nonpositive values disable the feature. instout: revised inst giving Delila instructions for finding the fragments specified in inst/book in the target. Full names of pieces from the book (such as those assigned by Delila instructions of the form "name 'abinddingsite';") will default to previous full names if there are any. This follows the convention of blank name on the alist being the previous name. list: display of aligned locations found. output: messages to the user description Nowadays it is often the case that old GenBank entries become fused into a single genome. It would be nice to be able to automatically convert old delila instructions to the new genome. examples ******************************************************************************** Example 1 ----- sebo.inst: title "An OxyR site"; {Example instruction file for sebo } { number only the pieces, starting at 1 } default numbering piece; default numbering 1; default out-of-range reduce-range; organism E.coli; chromosome E.coli; name "ahpC"; piece D13187; { ECOD13187 }; get from 116 -200 to 116 +200 direction +; ----- sebop: 1.21 version of sebo that this parameter file is designed for. i f: first base, i: inst, b: book alignment -30 30 fromdo todo: range of aligned book to use in the search. -200 200 frominst toinst: range of delila instructions output to instout 2 number of mismatches allowed 500000 alertinterval (bases) ----- target: E. coli genome U00096 ----- instout: title "sebo 1.21 search with book"; { sebop parameters: i: use the alignedbase from the book -30 30 range of book alignment used for search -200 200 range of resulting output instructions 2 mismatches allowed 500000 alert interval } organism E.coli; chromosome E.coli; piece U00096; { the target sequence is 4639221 bases } { original source probe: } { name ""; piece D13187; get from 1 to 316 direction +; } { final target instructions: } name "ahpC"; get from 638089 -200 to same +200 direction +; ----- list: sebo 1.21 search with book"; sebop parameters: i: use the alignedbase from the book -30 30 range of book alignment used for search -200 200 range of resulting output instructions 2 mismatches allowed 500000 alert interval Target sequence: piece U00096; get from 1 to 4639221 direction +; { 4639221 bases } Searching target with probe: name "ahpC"; piece D13187; get from 1 to 316 direction +; --------------------- +++++++++++++++++++++ 322222222221111111111--------- +++++++++111111111122222222223 0987654321098765432109876543210123456789012345678901234567890 ttacgaaggttgtaaggtaaaacttatcgatttgataatggaaacgcattaccggaatcgg probe X X differences ttacgaaggttgtaaggtaaaacttatcgatttgataatggaaacgcattagccgaatcgg target ^ found at 638089 on the target ******************************************************************************** Example 2 Suppose that you have some sequences in a file and one would like to locate them on the chromosome of an organism in the form of Delila instructions. Here is how to create the input files for sebo: 1. Convert the sequences into a Delila book using one of the programs rawbk, makebk, dbbk, or mkdb (see the web page mentioned below). This gives you the file 'book' in directory 1. 2. In another directory (2) build a Delila library for your organism (this is also described on that web page). 3. Use delila to extract both the sequence and the complement, for example: title "Delila instructions for Bacteriophage T4 and its complement"; organism c.T4; chromosome c.T4; piece AF158101; get all piece; get all piece direction -; 4. Move or link the resulting book into the file 'target' in Directory 1. 5. Then set up the sebop for searching without Delila instructions (since no delila instructions were used to make the book): 1.21 version of sebo that this parameter file is designed for. f f: first base, i: inst, b: book alignment 0 40 fromdo todo: range of aligned book to use in the search. 0 40 frominst toinst: range of delila instructions output to instout 10 number of mismatches allowed 500000 alertinterval (bases) 6. Finally use sebo to create the instout. You may have to allow for some mismatches. These instructions should allow you to extract the sequences from the organism database 7. Sebo reports its results by adding lots of comments to instout in the form {}. If you want to remove them, (so you have a clean set of instructions) you can use the nocom program. You can remove the extra blanks with noblank. In unix: nocom < instout | sed -e 's/{}//g' | noblank > instclean You can also remove redundant delila instructions for organism, chromosome and piece by hand, especially if there is only one piece in the target. ******************************************************************************** documentation see also {Sequence extraction program:} delila.p {Simple search engine:} search.p {More details on alignment types:} alist.p {example sebop parameter file:} sebop {example inst file:} sebo.inst {Programs for conversion of raw sequences into a Delila Book:} rawbk.p makebk.p dbbk.p mkdb.p {Conversion of a book of sequences into a Delila Library:} http://www.lecb.ncifcrf.gov/~toms/delilalibraries.html {Remove the comments created by sebo:} nocom.p {Remove the blank lines created by sebo:} noblank.p {Shift the instructions to whereever one wants:} instshift.p author Thomas Dana Schneider bugs technical notes * The org/chr/pie names written to delila instructions come from the current target sequence. * Delila instructions for sites frequently come in pairs: get from 163 -200 to 163 +200 direction +; get from 163 +200 to 163 -200 direction -; So the target will be searched in both orientations. * constant dnamax is set to a large value to increase the speed of the search. * a nice parameter to have would allow the user to shift the coordinate of the zero into any position they want in the piece. Fortunately this can be done with the instshift program, so this is not high priority. The base that one specifies would become the zero base of the Delila instructions. * 2001 April 17: if the fromdo/todo range given for the book is outside of the piece in the book (in aligned searching), then the book sequence will not be found in the target. */ /* end module describe.sebo */ /* 2001 Mar 25 * memory leak information: The memory leak can be seen with the SIZE parameter of the top program. The value of SIZE continuously increases as the program runs. Memory leak occurs under gpc and sun compilers. The freedna variable always has 0 links during the loop! The cleardna routine is never called, no halt there. The clearpiece routine IS called, halt there halts. The call to clearpiece is from getocp. Why are no links going onto the freedna list at that point? I have demonstrated that there is indeed a memory leak! By cranking up the size of the dna segments from 5000000 (5e6) to 100000000 (1e8) I got sebo to find a bunch of targets and THEN to crash: Ran out of memory Trace/Breakpoint Trap Since it should reuse the arrays, this should not happen unless the arrays are being wasted (memory is allocated but lost and then the next sequence uses more memory). Also, top shows it going up more rapidly. dnamax = 100000000; { causes crash} Now to find out why it is loosing the dna links and not reusing them. It was rather subtle. I was looking at a routine called getocp (get organism, chromosome, piece) which DOES clear the DNA with each use. To my surprise sebo was not using getocp. But the program can also use alignments, depending on the sebop. Indeed, you had set the sebop to use delila instructions, but had left the inst file EMPTY. This caused the program to use the alignement routines to read the sequences. But that routine does not clear the DNA and so there was an overflow. So I have put a halt in; sebo 1.31 i: use the alignedbase from the book 75 125 range of book alignment used for search -100 100 range of resulting output instructions 2 mismatches allowed 50000 alert interval The sebop requests instructions to be used but the inst file is empty! program halt. However these routines are called: align getpiece brpiece brdna getdna So getdna is probably guilty! Here is the original code: if freedna<>nil then begin IT NEVER REACHES HERE l:=freedna; freedna:=freedna^.next end else new(l); l^.length:=0; l^.next:=nil Fortunately the bug was simple. I used getocp which automatically takes care of memory. But for the align routine, it can't do that and so the sebo program has to. I forgot to clearpiece(Bpie); Adding this solved the problem. */ /* LOCK begin module book.const */ /* constants needed for book manipulations */ /* dnamax = 1024; (* length of dna arrays *) */ #define dnamax 5000000L /* length of dna arrays INCREASE TO GENOME SIZE FOR SPEED */ #define namelength 100 /* maximum key name length */ #define linelength 80 /* maximum line readable in book */ /* LOCK end module book.const version = 7.49; {of delmod.p 2000 June 26} */ /* begin module string.const */ #define maxstring 2000 /* the maximum string */ /* end module string.const version = 4.86; (@ of prgmod.p 2004 Sep 8 */ /* begin module filler.const */ #define fillermax 50 /* the size of the filler array for a string */ /* end module filler.const version = 4.86; (@ of prgmod.p 2004 Sep 8 */ /* begin module string.type */ /* pointer to a string */ typedef struct string { /* a string of characters */ Char letters[maxstring]; /* the letters in the string */ long length; /* the number of characters in the string */ long current; /* the letter we are working on */ Char *next; /* the next string in a series */ } string; /* end module string.type version = 4.86; (@ of prgmod.p 2004 Sep 8 */ /* begin module filler.type */ /* the following is an array used to fill a string. it is convenient to have it much shorter than the maxstring, so that it is easy to fill the string using procedure fillstring. the user must declare the value of constant fillermax. */ typedef Char filler[fillermax]; /* end module filler.type version = 4.86; (@ of prgmod.p 2004 Sep 8 */ /* begin module trigger.type */ typedef struct trigger { /* an object to be searched for */ string seek; /* the characters looked for */ long state; /* how close to triggering we are */ boolean skip; /* trigger not found- skip the line */ /* the trigger was found */ boolean found; } trigger; /* end module trigger.type version = 4.86; (@ of prgmod.p 2004 Sep 8 */ /* begin module book.type */ /* types needed for book manipulations */ typedef long chset[5]; /* types defined in book definition */ typedef Char alpha[namelength]; /* this is not alfa */ /* name is a left justified string with blanks following the characters */ typedef struct name { alpha letters; /* zero means an unspecified structure */ char length; } name; typedef struct line { /* a line of characters */ Char letters[linelength]; char length; struct line *next; } line; typedef enum { plus, minus, dircomplement, dirhomologous } direction; typedef enum { linear, circular } configuration; typedef enum { on, off } state; typedef struct header { /* header of key */ name keynam; /* key name of structure */ line *fulnam; /* full name of structure */ /* note key */ line *note; } header; /* begin module base.type */ /* define the four nucleotide bases */ typedef enum { a, c, g, t } base; /* end module base.type version = 7.67; {of delmod.p 2004 Sep 8} */ /* sequence types */ typedef long dnarange; /* p2c: sebo.p, line 472: * Note: Field width for seq assumes enum base has 4 elements [105] */ typedef uchar seq[(dnamax + 3) / 4]; typedef struct dnastring { seq part; dnarange length; struct dnastring *next; } dnastring; typedef struct orgkey { /* organism key */ header hea; /* genetic map units */ line *mapunit; } orgkey; typedef struct chrkey { /* chromosome key */ header hea; double mapbeg; /* number of genetic map beginning */ /* number of genetic map ending */ double mapend; } chrkey; typedef struct piekey { /* piece key */ header hea; double mapbeg; /* genetic map beginning */ configuration coocon; /* configruation (circular/linear) */ direction coodir; /* direction (+/-) relative to genetic map */ long coobeg; /* beginning nucleotide */ long cooend; /* ending nucleotide */ configuration piecon; /* configruation (circular/linear) */ direction piedir; /* direction (+/-) relative to coordinates */ long piebeg; /* beginning nucleotide */ long pieend; /* ending nucleotide */ } piekey; typedef struct piece { piekey key; dnastring *dna; } piece; typedef struct reference { name pienam; /* name of piece referred to */ double mapbeg; /* genetic map beginning */ direction refdir; /* direction relative to coordinates */ long refbeg; /* beginning nucleotide */ long refend; /* ending nucleotide */ } reference; typedef struct genkey { /* gene key */ header hea; reference ref; } genkey; typedef struct trakey { /* transcript key */ header hea; reference ref; } trakey; typedef struct markey { /* marker key */ header hea; reference ref; state sta; line *phenotype; struct marker *next; } markey; typedef struct marker { markey key; dnastring *dna; } marker; /* end module book.type version = 7.67; {of delmod.p 2004 Sep 8} */ Static _TEXT inst, book; /* file used by this program */ Static _TEXT target; /* file used by this program */ Static _TEXT sebop; /* file used by this program */ Static _TEXT list; /* file used by this program */ Static _TEXT instout; /* file used by this program */ /* begin module book.var */ /* ************************************************************************ */ /* global variables needed for book manipulations */ /* free storage: */ Static line *freeline; /* unused lines */ Static dnastring *freedna; /* unused dnas */ Static boolean readnumber; /* whether to read a number from the notes, or to read in the notes */ Static long number; /* the number of the item just read */ Static boolean numbered; /* true when the item just read is numbered */ Static boolean skipunnum; Static jmp_buf _JL1; /* a control variable to allow skipping of un-numbered items in the book */ /* ************************************************************************ */ /* end module book.var version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module halt */ Static Void halt() { /* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. */ printf(" program halt.\n"); longjmp(_JL1, 1); } /* end module halt version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module copyaline */ Static Void copyaline(fin, fout) _TEXT *fin, *fout; { /* copy a line from file fin to file fout */ while (!P_eoln(fin->f)) { putc(P_peek(fin->f), fout->f); getc(fin->f); } fscanf(fin->f, "%*[^\n]"); getc(fin->f); putc('\n', fout->f); } /* copyaline */ /* end module copyaline version = 7.67; {of delmod.p 2004 Sep 8} */ /******************************************************************************/ /******************************************************************************/ /******************************************************************************/ /* begin module package.trigger */ /* ************************************************************************ */ /* begin module clearstring */ Static Void clearstring(ribbon) string *ribbon; { /* empty the string */ long index; /* to the ribbon */ for (index = 0; index < maxstring; index++) ribbon->letters[index] = ' '; ribbon->length = 0; ribbon->current = 0; } /* clearstring */ Static Void initializestring(ribbon) string *ribbon; { /* start the string with a nil pointer. This routine should be called before doing linked list work. This allows the standard string routines to clear the string without killing the pointer. */ clearstring(ribbon); ribbon->next = NULL; } /* initializestring */ /* end module clearstring version = 4.86; (@ of prgmod.p 2004 Sep 8 */ /* begin module writestring */ Static Void writestring(tofile, s) _TEXT *tofile; string *s; { /* write the string s to file tofile, no writeln */ long i; /* index to s */ long FORLIM; FORLIM = s->length; for (i = 0; i < FORLIM; i++) putc(s->letters[i], tofile->f); } /* writestring */ /* end module writestring version = 4.86; (@ of prgmod.p 2004 Sep 8 */ /* begin module filler.fillstring */ Static Void fillstring(s, a_) string *s; Char *a_; { /* this procedure makes it reasonably easy to fill the string s with characters. one calls the procedure as: */ /* 1 2 3 4 5 */ /* 12345678901234567890123456789012345678901234567890 */ /* fillstring(s, 'this-is-the-string '); the two comments make it easy to line the characters up. also, for this example, it was assumed that the length of filler as defined by the constant fillermax was 50. */ long length = fillermax; /* of the string without trailing blanks */ long index; /* of s */ clearstring(s); while (length > 1 && a_[length-1] == ' ') length--; if (length == 1 && a_[length-1] == ' ') { printf("fillstring: the string is empty\n"); halt(); } for (index = 0; index < length; index++) s->letters[index] = a_[index]; s->length = length; s->current = 1; } /* fillstring */ /* end module filler.fillstring version = 4.86; (@ of prgmod.p 2004 Sep 8 */ /* begin module filler.filltrigger */ Static Void filltrigger(t_, a_) trigger *t_; Char *a_; { /* fill the trigger t */ fillstring(&t_->seek, a_); } /* fillstring */ /* end module filler.filltrigger version = 4.86; (@ of prgmod.p 2004 Sep 8 */ /* begin module trigger.proc */ /* this module allows one to scan a series of characters, as from an array or a file, and to "trigger" or detect a simple string in the series. the advantage of the trigger is that several triggers can "observe" a stream of characters at once, each looking for a different thing. some other modules required: interact.const, interact.type */ Static Void resettrigger(t_) trigger *t_; { /* reset the trigger to ground state */ t_->state = 0; t_->skip = false; t_->found = false; } /* resettrigger */ Static Void testfortrigger(ch, t_) Char ch; trigger *t_; { /* look at the character ch. if it is part of the trigger (at the current trigger state), then the trigger state goes higher. if it is not part of the trigger then the trigger state is reset, skip is true and one should skip onward to find the trigger. if the trigger is found, found is true. 1996 Sep 12: Bug found! In the case of a trigger "ab", the program used to miss it for situations like "aab". This was because at the first a it would step up. Then it would see the second a and recognize that was not part of ab. It would fail to realize that it could be the start of a new one. The code now accounts for that possibility. */ t_->state++; /* writestring(list,seek); writeln(list,'testfortrigger seek.letters[',state:1,']:', seek.letters[state],' ch:',ch); */ if (t_->seek.letters[t_->state - 1] == ch) { t_->skip = false; if (t_->state == t_->seek.length) t_->found = true; else t_->found = false; return; } /* it failed. But wait! It could be the beginning of a NEW trigger string! */ if (t_->seek.letters[0] == ch) { t_->state = 1; t_->skip = false; t_->found = false; return; } t_->state = 0; t_->skip = true; t_->found = false; /* reset trigger */ } /* testfortrigger */ /* end module trigger.proc version = 4.86; (@ of prgmod.p 2004 Sep 8 */ /* begin module skipblanks */ /* 2003 July 31: tab is considered a blank character */ Static boolean isblank(c_) Char c_; { /* is the character c blank or tab? */ return (c_ == ' ' || c_ == '\t'); } Static Void skipblanks(thefile) _TEXT *thefile; { /* skip over blanks until a non-blank, or end of line, is found */ while (isblank(P_peek(thefile->f)) & (!P_eoln(thefile->f))) getc(thefile->f); } Static Void skipnonblanks(thefile) _TEXT *thefile; { /* skip over nonblanks until a blank, or end of line, is found */ while ((!isblank(P_peek(thefile->f))) & (!P_eoln(thefile->f))) getc(thefile->f); } Static Void skipcolumn(thefile) _TEXT *thefile; { /* skip over a data column */ skipblanks(thefile); skipnonblanks(thefile); } /* end module skipblanks version = 4.86; (@ of prgmod.p 2004 Sep 8 */ /* ************************************************************************ */ /* end module package.trigger version = 4.86; (@ of prgmod.p 2004 Sep 8 */ /******************************************************************************/ /******************************************************************************/ /******************************************************************************/ /* begin module package.align */ /* ************************************************************************ */ /* begin module package.getpiece */ /* ************************************************************************ */ /* begin module package.brpiece */ /* ************************************************************************ */ /* begin module book.basis */ /* procedures needed for book manipulations */ /* get procedures should be used for all linked lists of records */ Static Void getline(l) line **l; { /* obtain a line from the free line list or by making a new one */ if (freeline != NULL) { *l = freeline; freeline = freeline->next; } else *l = (line *)Malloc(sizeof(line)); (*l)->length = 0; (*l)->next = NULL; } Static Void getdna(l) dnastring **l; { if (freedna != NULL) { *l = freedna; freedna = freedna->next; } else *l = (dnastring *)Malloc(sizeof(dnastring)); (*l)->length = 0; (*l)->next = NULL; } /* clear procedures should be called each time the records are no longer needed failure to do this may result in a stack overflow. */ Static Void clearline(l) line **l; { /* return a line to the free line list */ line *lptr; if (*l == NULL) return; lptr = *l; *l = (*l)->next; lptr->next = freeline; freeline = lptr; } Static Void writeline(afile, l, carriagereturn) _TEXT *afile; line *l; boolean carriagereturn; { /* write a line to a file, with carriage return if carriagereturn is true. */ long index; /* index to characters in l */ long FORLIM; FORLIM = l->length; for (index = 0; index < FORLIM; index++) putc(l->letters[index], afile->f); if (carriagereturn) putc('\n', afile->f); } Static Void showfreedna() { /* show the freedna list */ long counter = 0; /* count of freedna list */ dnastring *l; /* pointer into freedna list */ l = freedna; while (l != NULL) { counter++; printf("%ld", counter); printf(", length = %ld\n", l->length); /* This is illegal according to gpc because one cannot write a pointer to a text file. It can be unearthed for debugging. write(output, ', pointer id: ',l:1); */ l = l->next; } } Static Void cleardna(l) dnastring **l; { /* clear the dna strutures to the free list */ dnastring *lptr; if (*l == NULL) return; lptr = *l; *l = (*l)->next; lptr->next = freedna; freedna = lptr; } Static Void clearheader(h) header *h; { /* clear the header h (remove lines to free storage) */ clearline(&h->fulnam); while (h->note != NULL) clearline(&h->note); } Static Void clearpiece(p) piece **p; { /* clear the dna of the piece */ while ((*p)->dna != NULL) cleardna(&(*p)->dna); clearheader(&(*p)->key.hea); } Static base chartobase(ch) Char ch; { /* convert a character into a base */ base Result; switch (ch) { case 'a': Result = a; break; case 'c': Result = c; break; case 'g': Result = g; break; case 't': Result = t; break; } return Result; } Static Char basetochar(ba) base ba; { /* convert a base into a character */ Char Result; switch (ba) { case a: Result = 'a'; break; case c: Result = 'c'; break; case g: Result = 'g'; break; case t: Result = 't'; break; } return Result; } Static base complement(ba) base ba; { /* take the complement of ba */ base Result; switch (ba) { case a: Result = t; break; case c: Result = g; break; case g: Result = c; break; case t: Result = a; break; } return Result; } Static Char chomplement(b) Char b; { /* create the character complement of base b. I must be getting hungry! */ return (basetochar(complement(chartobase(b)))); } Static long pietoint(p, pie) long p; piece *pie; { /* p is a coordinate on the piece. we want to transform p into a number from 1 to n: an internal coordinate system for easy manipulation of piece coordinates */ /* Note: the dirhomologous and dircomplement are treated as plus and minus directions, which MIGHT NOT BE RIGHT! */ long i; /* an intermediate value */ piekey *WITH; WITH = &pie->key; switch (WITH->piedir) { case dirhomologous: case plus: if (p >= WITH->piebeg) i = p - WITH->piebeg + 1; else i = p - WITH->coobeg + WITH->cooend - WITH->piebeg + 2; break; case dircomplement: case minus: if (p <= WITH->piebeg) i = WITH->piebeg - p + 1; else i = WITH->cooend - p + WITH->piebeg - WITH->coobeg + 2; break; } return i; } Static long inttopie(i, pie) long i; piece *pie; { /* i is in the range 1 to some maximum. it is an internal coordinate system for the program. we want to do a coordinate transformation to obtain a value in the range of the piece called pie: i=1 corresponds to piebeg and i=its maximum corresponds to pieend */ /* Note: the dirhomologous and dircomplement are treated as plus and minus directions, which MIGHT NOT BE RIGHT! */ long p; /* an intermediate value */ piekey *WITH; WITH = &pie->key; switch (WITH->piedir) { case dirhomologous: case plus: p = WITH->piebeg + i - 1; if (p > WITH->cooend) { if (WITH->coocon == circular) p += WITH->coobeg - WITH->cooend - 1; } break; case dircomplement: case minus: p = WITH->piebeg - i + 1; if (p < WITH->coobeg) { if (WITH->coocon == circular) p += WITH->cooend - WITH->coobeg + 1; } break; } return p; } Static long piecelength(pie) piece *pie; { /* return the length of the dna in pie */ return (pietoint(pie->key.pieend, pie)); } /* end module book.basis version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.getto */ Static Char getto(thefile, theline, ch) _TEXT *thefile; long *theline; long *ch; { /* search the file for a character in the first line which is a member of the set ch. Note: on 1999 March 10 the definition of this function was cleaned up. Instead of putting thefile on the line AFTER the charcter ch has been found, it puts thefile ON the line. Other routines like brdna and brpiece have to move to the next line themselves. This makes getto give the OBJECT. */ Char achar = ' '; /* a character in thefile */ boolean done = false; /* done finding achar */ while (!done) { if (BUFEOF(thefile->f)) { done = true; break; } achar = P_peek(thefile->f); if (P_inset(achar, ch)) { done = true; break; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; } if (P_inset(achar, ch)) return achar; else { return ' '; /* The old method - while (not(achar in ch)) and (not eof(thefile)) do begin readln(thefile,achar); theline := succ(theline) end; if (achar in ch) then getto:=achar else getto:=' ' */ } } /* end module book.getto version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.skipstar */ Static Void skipstar(thefile) _TEXT *thefile; { /* skip start of line (or star = '*'). */ if (BUFEOF(thefile->f)) { printf(" procedure skipstar: end of book found\n"); halt(); return; } if (P_peek(thefile->f) != '*') { printf(" procedure skipstar: bad book\n"); printf(" \"*\" expected as first character on the line, but \"%c\" was found\n", P_peek(thefile->f)); halt(); } getc(thefile->f); /* skip the star */ if (P_peek(thefile->f) != ' ') { /* skip the blank */ printf(" procedure skipstar: bad book\n"); printf(" \"* \" expected on a line but \"*%c\" was found\n", P_peek(thefile->f)); halt(); } getc(thefile->f); } /* skipstar */ /* end module book.skipstar version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.brreanum */ Static Void brreanum(thefile, theline, reanum) _TEXT *thefile; long *theline; double *reanum; { /* read a real number from the file */ skipstar(thefile); fscanf(thefile->f, "%lg%*[^\n]", reanum); getc(thefile->f); (*theline)++; } /* end module book.brreanum version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.brnumber */ Static Void brnumber(thefile, theline, num) _TEXT *thefile; long *theline, *num; { /* read a number from the file */ skipstar(thefile); fscanf(thefile->f, "%ld%*[^\n]", num); getc(thefile->f); (*theline)++; } /* end module book.brnumber version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.brname */ Static Void brname(thefile, theline, nam) _TEXT *thefile; long *theline; name *nam; { /* read a name from the file */ long i; /* an index to the name */ Char c_; /* a character read */ skipstar(thefile); nam->length = 0; do { nam->length++; c_ = getc(thefile->f); if (c_ == '\n') c_ = ' '; nam->letters[nam->length - 1] = c_; } while (!(P_eoln(thefile->f) || nam->length >= namelength || nam->letters[nam->length - 1] == ' ')); if (nam->letters[nam->length - 1] == ' ') nam->length--; if (nam->length < namelength) { for (i = nam->length; i < namelength; i++) nam->letters[i] = ' '; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; } /* brname */ /* end module book.brname version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.brline */ Static Void brline(thefile, theline, l) _TEXT *thefile; long *theline; line **l; { /* read a line from the file */ long i = 0; Char acharacter; skipstar(thefile); while (!P_eoln(thefile->f)) { i++; acharacter = getc(thefile->f); if (acharacter == '\n') acharacter = ' '; (*l)->letters[i-1] = acharacter; } (*l)->length = i; (*l)->next = NULL; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; } /* end module book.brline version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.brdirect */ Static Void brdirect(thefile, theline, direct) _TEXT *thefile; long *theline; direction *direct; { /* read a direction */ Char ch; skipstar(thefile); fscanf(thefile->f, "%c%*[^\n]", &ch); getc(thefile->f); if (ch == '\n') ch = ' '; (*theline)++; if (ch == '+') *direct = plus; else *direct = minus; } /* end module book.brdirect version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.brconfig */ Static Void brconfig(thefile, theline, config) _TEXT *thefile; long *theline; configuration *config; { /* read a configuration */ Char ch; skipstar(thefile); fscanf(thefile->f, "%c%*[^\n]", &ch); getc(thefile->f); if (ch == '\n') ch = ' '; (*theline)++; if (ch == 'l') *config = linear; else *config = circular; } /* end module book.brconfig version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.brnotenumber */ Static Void brnotenumber(thefile, theline, note) _TEXT *thefile; long *theline; line **note; { /* book note reading to obtain the number of the object. the procedure returns the value of the number as a global. (this is not such a good practice, but we are stuck with it for now.) */ *note = NULL; numbered = false; number = 0; /* force number to zero if there is no number at all */ /* the next character is n or * depending on whether there are notes */ if (P_peek(thefile->f) != 'n') return; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; if (P_peek(thefile->f) != 'n') { skipstar(thefile); if (!P_eoln(thefile->f)) { if (P_peek(thefile->f) == '#') { numbered = true; getc(thefile->f); /* move past the number symbol */ fscanf(thefile->f, "%ld", &number); } } do { fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; } while (P_peek(thefile->f) != 'n'); fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; return; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; } /* brnotenumber */ /* end module book.brnotenumber version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.brnote */ Static Void brnote(thefile, theline, note) _TEXT *thefile; long *theline; line **note; { /* read note key */ line *newnote; /* the new note */ line *previousnote; /* the last line of the notes */ *note = NULL; if (P_peek(thefile->f) != 'n') /* enter note */ return; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; if (P_peek(thefile->f) != 'n') { /* abort null note (n/n) */ getline(note); newnote = *note; while (P_peek(thefile->f) != 'n') { /* wait until end of note */ brline(thefile, theline, &newnote); previousnote = newnote; /* get next note */ getline(&newnote->next); newnote = newnote->next; } /* last note was not used, so: */ clearline(&newnote); previousnote->next = NULL; fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; return; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; } /* brnote */ /* end module book.brnote version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.brheader */ Static Void brheader(thefile, theline, hea) _TEXT *thefile; long *theline; header *hea; { /* read the header of a key. */ fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); /* move past the object name - new definition 1999 Mar 13 */ (*theline)++; /*bbb*/ /* read key name */ brname(thefile, theline, &hea->keynam); /* read full name */ getline(&hea->fulnam); brline(thefile, theline, &hea->fulnam); /* read note key */ if (readnumber) brnotenumber(thefile, theline, &hea->note); else brnote(thefile, theline, &hea->note); } /* end module book.brheader version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.copyheader */ Static Void copyheader(fromhea, tohea) header fromhea, *tohea; { /* copy the header fromhea into tohea. Note that the linked objects are NOT copied, but merely pointed to. */ memcpy(tohea->keynam.letters, fromhea.keynam.letters, sizeof(alpha)); tohea->keynam.length = fromhea.keynam.length; tohea->note = fromhea.note; tohea->fulnam = fromhea.fulnam; } /* end module book.copyheader version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.brpiekey */ Static Void brpiekey(thefile, theline, pie) _TEXT *thefile; long *theline; piekey *pie; { /* read piece key, track the line number */ brheader(thefile, theline, &pie->hea); brreanum(thefile, theline, &pie->mapbeg); brconfig(thefile, theline, &pie->coocon); brdirect(thefile, theline, &pie->coodir); brnumber(thefile, theline, &pie->coobeg); brnumber(thefile, theline, &pie->cooend); brconfig(thefile, theline, &pie->piecon); brdirect(thefile, theline, &pie->piedir); brnumber(thefile, theline, &pie->piebeg); brnumber(thefile, theline, &pie->pieend); } /* end module book.brpiekey version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.brdna */ Static Void brdna(thefile, theline, dna) _TEXT *thefile; long *theline; dnastring **dna; { /* read in dna from thefile, track the line */ /* note: if the dna were circularized, by linking the last dnastring to the first, then the cleardna routine could not clear properly, and would loop forever... there is no reason to do that, since a simple mod function will allow one to access the circle. */ Char ch; dnastring *workdna; long SET[5]; long TEMP; getdna(dna); workdna = *dna; ch = getto(thefile, theline, P_addset(P_expset(SET, 0L), 'd')); fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); (*theline)++; ch = getc(thefile->f); /* skipstar */ if (ch == '\n') ch = ' '; while (ch == '*') { ch = getc(thefile->f); /* skip blank */ if (ch == '\n') ch = ' '; do { ch = getc(thefile->f); if (ch == '\n') ch = ' '; if (ch == 't' || ch == 'g' || ch == 'c' || ch == 'a') { if (workdna->length == dnamax) { getdna(&workdna->next); workdna = workdna->next; } workdna->length++; TEMP = workdna->length - 1; P_clrbits_B(workdna->part, TEMP, 1, 3); P_putbits_UB(workdna->part, TEMP, (int)chartobase(ch), 1, 3); } } while (!P_eoln(thefile->f)); fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); /* go to next line */ (*theline)++; ch = getc(thefile->f); /* ch is either '*' or 'd' */ if (ch == '\n') ch = ' '; } fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); /* read past the d */ (*theline)++; } /* end module book.brdna version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.brpiece */ Static Void brpiece(thefile, theline, pie) _TEXT *thefile; long *theline; piece **pie; { /* read in a piece, change theline to reflect the lines traversed */ /* readln(thefile); (* move past the word 'piece' - new definition 1999 Mar 13 *) theline := succ(theline); (* BUG: was below! *) bbb*/ brpiekey(thefile, theline, &(*pie)->key); if (numbered || !skipunnum) brdna(thefile, theline, &(*pie)->dna); fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); /* move past the word 'piece' - new definition 1999 Mar 13 */ (*theline)++; } /* end module book.brpiece version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.brinit */ Static Void brinit(book, theline) _TEXT *book; long *theline; { /* check that the book is ok to read, and set up the global variables for br routines */ /* halt if the book is bad (first word is 'halt') or the first character is not * */ if (*book->name != '\0') { if (book->f != NULL) book->f = freopen(book->name, "r", book->f); else book->f = fopen(book->name, "r"); } else rewind(book->f); if (book->f == NULL) _EscIO2(FileNotFound, book->name); RESETBUF(book->f, Char); if (!BUFEOF(book->f)) { /* check for the date line */ if (P_peek(book->f) != '*') { if (P_peek(book->f) != 'h') printf(" this is not the first line of a book:\n"); else printf(" bad book:\n"); putchar(' '); while (!(P_eoln(book->f) | BUFEOF(book->f))) { putchar(P_peek(book->f)); getc(book->f); } putchar('\n'); halt(); } } else { printf(" book is empty\n"); halt(); } /* initialize free storage */ freeline = NULL; freedna = NULL; readnumber = true; /* usually we read in numbers for items */ number = 0; /* arbitrary value */ numbered = false; /* the piece has no number (none yet read in) */ skipunnum = false; *theline = 1; } /* brinit */ /* end module book.brinit version = 7.67; {of delmod.p 2004 Sep 8} */ /* ************************************************************************ */ /* end module package.brpiece version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.getpiece */ Static Void getpiece(thefile, theline, pie) _TEXT *thefile; long *theline; piece **pie; { /* move to and read in the next piece in the book */ Char ch; long SET[5]; ch = getto(thefile, theline, P_addset(P_expset(SET, 0L), 'p')); /* get to the next p(iece) in the book */ if (ch != ' ') { brpiece(thefile, theline, pie); /* 1999 june 2: removed this: ch:=getto(thefile,theline,['p']); (* read to end of p *) */ /* bbb - now done in brpiece readln(thefile); (* read past piece *) theline := succ(theline); */ } else clearpiece(pie); } /* end module book.getpiece version = 7.67; {of delmod.p 2004 Sep 8} */ /* ************************************************************************ */ /* end module package.getpiece version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module findblank */ Static Void findblank(afile) _TEXT *afile; { /* read a file to find the next blank character */ Char ch; do { ch = getc(afile->f); if (ch == '\n') ch = ' '; } while (ch != ' '); } /* end module findblank version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module findnonblank */ Static Void findnonblank(afile, ch) _TEXT *afile; Char *ch; { /* find the next non blank character in a file, return it in ch. */ *ch = ' '; while (!BUFEOF(afile->f) && *ch == ' ') { *ch = getc(afile->f); if (*ch == '\n') *ch = ' '; if (P_eoln(afile->f)) { fscanf(afile->f, "%*[^\n]"); getc(afile->f); } } } #define maximumrange 10000 /* if the alignment point is more than this distance from the piece ends, the program halts in an attempt to catch the alignment bug... 1991 Jan 11 It appears that the rewrite of the code has removed the bug, but the check will be kept. */ #define semicolon ';' /* end of delila instruction */ /* Local variables for align: */ struct LOC_align { _TEXT *inst; Char ch; /* a character in inst */ trigger endcomment; /* trigger to find '*-)' (ignore the dash!) */ trigger endcurly; /* trigger to find comments: '}' */ } ; /* procedure rd(var f: text; var ch: char); (* read ch from f allowing inspection of the result *) begin read(f,ch); write(output,ch); write(list,ch); write(output,'<',ch,'>'); end; procedure rdln(var f: text); (* readln f allowing inspection of the result *) begin readln(f); writeln(output); writeln(list); end; */ Local Void skipcomment(f, LINK) _TEXT *f; struct LOC_align *LINK; { /* skip an entire comment */ boolean comment = true; /* true means we are inside a comment */ /* skip to end of comment */ resettrigger(&LINK->endcomment); while (comment) { if (BUFEOF(f->f)) { printf("A comment does not end!\n"); halt(); } if (P_eoln(f->f)) { fscanf(f->f, "%*[^\n]"); getc(f->f); continue; } /* rdln(f) */ LINK->ch = getc(f->f); if (LINK->ch == '\n') LINK->ch = ' '; testfortrigger(LINK->ch, &LINK->endcomment); if (LINK->endcomment.found) { comment = false; /*write(output,'<'); rd(f,ch); write(output,'>');*/ } } } Local Void skipcurly(f, LINK) _TEXT *f; struct LOC_align *LINK; { /* skip an entire comment made by {}*/ boolean comment = true; /* true means we are inside a comment */ /* skip to end of comment */ resettrigger(&LINK->endcurly); while (comment) { if (BUFEOF(f->f)) { printf("A comment does not end!\n"); halt(); } if (P_eoln(f->f)) { fscanf(f->f, "%*[^\n]"); getc(f->f); continue; } /* rdln(f) */ LINK->ch = getc(f->f); if (LINK->ch == '\n') LINK->ch = ' '; testfortrigger(LINK->ch, &LINK->endcurly); if (LINK->endcurly.found) { comment = false; /*write(output,'<'); rd(f,ch); write(output,'>');*/ } } } Local Void skipquote(quote, LINK) trigger quote; struct LOC_align *LINK; { /* skip an entire quote of either the ' or " persuasion */ Char kind; /* the kind of quote, ' or " */ kind = quote.seek.letters[0]; /*writeln(output,'skipquote ',kind);*/ do { findnonblank(LINK->inst, &LINK->ch); /* get to the quote */ } while (!((LINK->ch == kind) | BUFEOF(LINK->inst->f))); if (LINK->ch != kind) { printf("end of quote starting with %c not found\n", kind); halt(); } } /* end module findnonblank version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module align.align */ Static Void align(inst_, book, theline, pie, length, alignedbase) _TEXT *inst_, *book; long *theline; piece **pie; long *length, *alignedbase; { /* documentation on align is in module info.align and delman.use.aligned.books. 1996 Sep 12: The routine now uses the trigger functions found in prgmod. The bug in the oldalign routine (that it misses the end of comments that end in a series of astrisks) has been fixed. It now checks that the piece corresponds to the book. */ struct LOC_align V; long p; /* index to a piece name */ long p1; /* another index to a piece name */ boolean done = false; /* done finding an aligning get */ long thebase; /* the base read in */ boolean indefault = false; /* true when within a default statement. These can contain the word 'piece', which must be ignored. */ trigger gettrigger; /* trigger to find 'get' */ trigger defaulttrigger; /* trigger to find 'default' */ trigger nametrigger; /* trigger to find 'name' */ trigger piecetrigger; /* trigger to find 'piece' */ trigger settrigger; /* trigger to find 'set' */ trigger begincomment; /* trigger to find '(-*' (ignore the dash!) */ trigger begincurly; /* trigger to find comments: '{' */ trigger quote1trigger; /* trigger to find single quote ' */ trigger quote2trigger; /* trigger to find double quote " */ name *WITH; long FORLIM; V.inst = inst_; filltrigger(&defaulttrigger, "default "); filltrigger(&gettrigger, "get "); filltrigger(&nametrigger, "name "); filltrigger(&piecetrigger, "piece "); filltrigger(&settrigger, "set "); filltrigger(&begincomment, "(* "); filltrigger(&V.endcomment, "*) "); filltrigger(&begincurly, "{ "); filltrigger(&V.endcurly, "} "); filltrigger("e1trigger, "' "); filltrigger("e2trigger, "\" "); resettrigger(&defaulttrigger); resettrigger(&gettrigger); resettrigger(&nametrigger); resettrigger(&piecetrigger); resettrigger(&settrigger); resettrigger(&begincomment); resettrigger(&begincurly); resettrigger("e1trigger); resettrigger("e2trigger); if (BUFEOF(book->f)) /* if there is still more to the book ... */ return; getpiece(book, theline, pie); /* read in the piece */ if (BUFEOF(book->f)) /* if we found a piece ... */ return; *length = pietoint((*pie)->key.pieend, *pie); /* calculate piece length */ /* now find in inst the next occurance of 'get' */ while (!done) { if (BUFEOF(V.inst->f)) { /* no instructions? */ *alignedbase = 1; /* simply align by the first base */ done = true; break; } if (P_eoln(V.inst->f)) { fscanf(V.inst->f, "%*[^\n]"); getc(V.inst->f); continue; } /*then rdln(inst)*/ V.ch = getc(V.inst->f); if (V.ch == '\n') V.ch = ' '; testfortrigger(V.ch, &begincomment); testfortrigger(V.ch, &begincurly); if (begincomment.found || begincurly.found) { if (V.ch == '*') { skipcomment(V.inst, &V); resettrigger(&begincomment); } else { resettrigger(&begincurly); skipcurly(V.inst, &V); } continue; } testfortrigger(V.ch, &gettrigger); if (gettrigger.found) { findnonblank(V.inst, &V.ch); /* get to "from" */ findblank(V.inst); /* get past "from" */ fscanf(V.inst->f, "%ld", &thebase); /* read in the alignedbase */ /*writeln(output);writeln(output,'thebase = ',thebase:1);*/ *alignedbase = pietoint(thebase, *pie); /*writeln(output,'alignedbase=',alignedbase:1);*/ done = true; } testfortrigger(V.ch, "e1trigger); if (quote1trigger.found) skipquote(quote1trigger, &V); testfortrigger(V.ch, "e2trigger); if (quote2trigger.found) skipquote(quote2trigger, &V); testfortrigger(V.ch, &defaulttrigger); if (defaulttrigger.found) { indefault = true; resettrigger(&defaulttrigger); } if (V.ch == semicolon) indefault = false; testfortrigger(V.ch, &settrigger); if (settrigger.found) { indefault = true; resettrigger(&settrigger); } if (V.ch == semicolon) indefault = false; /* check that piece names are correct */ testfortrigger(V.ch, &piecetrigger); if (indefault) continue; if (!piecetrigger.found) continue; skipblanks(V.inst); /* get to name */ WITH = &(*pie)->key.hea.keynam; FORLIM = WITH->length; for (p = 1; p <= FORLIM; p++) { V.ch = getc(V.inst->f); if (V.ch == '\n') V.ch = ' '; if (WITH->letters[p-1] != V.ch) { printf("The piece name in the book: \n"); /* p2c: sebo.p, line 1660: Note: * Format for packed-array-of-char will work only if width < length [321] */ printf("%.*s\n", WITH->length, WITH->letters); printf("does not match the inst file piece name:\n"); /* write the letters that matched: */ for (p1 = 0; p1 <= p - 2; p1++) putchar(WITH->letters[p1]); /* write the offending letter: */ putchar(V.ch); /* get the rest of the name and show it: */ done = P_eoln(V.inst->f); while (!done) { done = P_eoln(V.inst->f); if (done) break; V.ch = getc(V.inst->f); if (V.ch == '\n') V.ch = ' '; if (V.ch == ' ' || V.ch == ';') done = true; if (!done) putchar(V.ch); } putchar('\n'); /* mark the first letter that does not match: */ for (p1 = 1; p1 < p; p1++) putchar(' '); printf("^\n"); halt(); /* we are not inside a comment */ } } } /*rd(inst,ch);*/ if (*alignedbase > -maximumrange && *alignedbase <= *length + maximumrange) return; printf(" In procedure align:\n"); printf(" read in base was %ld\n", thebase); printf(" in internal coordinates: %ld\n", *alignedbase); printf(" maximum range was %ld\n", (long)maximumrange); printf(" piece length was %ld\n", *length); WITH = &(*pie)->key.hea.keynam; /* p2c: sebo.p, line 1704: Note: * Format for packed-array-of-char will work only if width < length [321] */ printf(" piece name: %.*s\n", WITH->length, WITH->letters); printf(" piece number: %ld\n", number); printf(" aligned base is too far away... see the code\n"); halt(); } #undef maximumrange #undef semicolon /* end module align.align version = 7.67; {of delmod.p 2004 Sep 8} */ /* ************************************************************************ */ /* end module package.align version = 7.67; {of delmod.p 2004 Sep 8} */ /******************************************************************************/ /* begin module align.withinalignment */ Static boolean withinalignment(alignedposition, alignedbase, length) long alignedposition, alignedbase, length; { /* this function tells one if an aligned position, relative to an aligned base in a piece of some length is within the piece. */ long p; /* the position on the piece */ p = alignedposition + alignedbase; return (p > 0 && p <= length); } /* end module align.withinalignment version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.getbase */ Static base getbase(position, pie) long position; piece *pie; { /* Get a base from the position (internal coordinates) of the piece. Protection is made against positions outside the piece. In the case of circles it would be convenient to wrap around when requests are off the end. So the routine will do a modular wrap for positions outside the range 1 to the length. This is a new feature as of 2000 March 22. */ dnastring *workdna; /* pointer to the dna part of pie */ long p; /* current count of bases into the workdna */ long spot; /* the last base of the dna part */ long thelength; /* the length of the piece */ /* writeln(output,'NEW getbase: position=',position:1,'^^^^^^^^^^^^^^^^^^^^'); */ /* handle cases of position out of range by circular wrapping */ thelength = piecelength(pie); while (position < 1) position += thelength; while (position > thelength) position -= thelength; workdna = pie->dna; p = workdna->length; while (position > p) { /* writeln(output,' workdna^.length=',workdna^.length:1); */ workdna = workdna->next; if (workdna == NULL) { printf("error in function getbase!\n"); halt(); } p += workdna->length; } /* writeln(output,'p=',p:1); */ if (true) { spot = workdna->length - p + position; /* writeln(output,'spot=',spot:1); showdnasegment(output,workdna, spot); */ if (spot <= 0) { printf("error in getbase, spot (= %ld) must be positive\n", spot); halt(); } if (spot > workdna->length) { printf("error in getbase, spot (=%ld) must be less than length (=%ld)\n", spot, workdna->length); halt(); } /* writeln(output,'base = ', workdna^.part[spot]); */ return ((base)P_getbits_UB(workdna->part, spot - 1, 1, 3)); } printf("error in getbase: request off end of piece\n"); halt(); } /* end module book.getbase version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.name */ Static Void clearname(n) name *n; { /* clear the name n */ long i; /* index to piece name */ n->length = 0; for (i = 0; i < namelength; i++) n->letters[i] = ' '; } Static Void writename(f, n) _TEXT *f; name n; { /* write the name n to file f */ long i; /* index to piece name */ for (i = 0; i < n.length; i++) putc(n.letters[i], f->f); } Static Void copyname(a_, b) name a_, *b; { /* copy name a to name b */ long i; /* index to piece name */ for (i = 0; i < a_.length; i++) b->letters[i] = a_.letters[i]; b->length = a_.length; } Static boolean equalname(a_, b) name a_, b; { /* is name a equal to name b? */ long i = 1; /* index to piece name */ boolean same = true; /* temporary variable to hold the answer */ /* what optimism!! */ if (b.length != a_.length) return false; while (same && i <= a_.length) { same = (b.letters[i-1] == a_.letters[i-1]); i++; /* p2c: sebo.p: Note: Eliminated unused assignment statement [338] */ } return same; } /* end module book.name version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.brorgkey */ Static Void brorgkey(thefile, theline, org) _TEXT *thefile; long *theline; orgkey *org; { /* read organism key */ /*bbb*/ brheader(thefile, theline, &org->hea); getline(&org->mapunit); brline(thefile, theline, &org->mapunit); } /* end module book.brorgkey version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.brchrkey */ Static Void brchrkey(thefile, theline, chr) _TEXT *thefile; long *theline; chrkey *chr; { /* read chromosome key */ /*bbb*/ brheader(thefile, theline, &chr->hea); brreanum(thefile, theline, &chr->mapbeg); brreanum(thefile, theline, &chr->mapend); } /* end module book.brchrkey version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.getocp */ Static Void getocp(thefile, theline, org, orgchange, orgopen, chr, chrchange, chropen, pie, piechange, pieopen) _TEXT *thefile; long *theline; orgkey *org; boolean *orgchange, *orgopen; chrkey *chr; boolean *chrchange, *chropen; piece **pie; boolean *piechange, *pieopen; { /* Get the next piece and its organism and chromosome keys. The three change variables indicate whether or not a new organism, chromosome or piece name was found. If a piece is not found, then pieopen will be false. orgopen, chropen and pieopen are used by getocp to tell when it has entered an organism, chromosome or piece. All booleans should be set to false initially. There should be one triplet for each book read. It is important to initialize ALL variables, including pie: orgchange := false; orgopen := false; chrchange := false; chropen := false; piechange := false; pieopen := false; pie := nil; theline := 0; 1999 June 2 The book reading routines now treat data objects more precisely. Rather than test for eof, the endo of book occurs when pieopen is returned as false. A book reading loop now looks like this: repeat getocp(book, theline, org, orgchange, orgopen, chr, chrchange, chropen, pie, piechange, pieopen); writeln(output,'pieopen: ',pieopen); if pieopen then begin writeln(output,'piece at line: ',theline:1); end; until not pieopen; */ Char ch = 'a'; chrkey newchr; orgkey neworg; piece *newpie; long SET[5]; while (ch != 'p' && ch != ' ') { P_addset(P_expset(SET, 0L), 'o'); P_addset(SET, 'c'); ch = getto(thefile, theline, P_addset(SET, 'p')); if (ch == ' ') { *pieopen = false; break; } switch (ch) { case 'o': if (*orgopen) { fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); /* move past the word 'organism' - new definition 1999 Mar 13 */ *orgopen = false; /* close organism */ } else { brorgkey(thefile, theline, &neworg); if (strncmp(neworg.hea.keynam.letters, org->hea.keynam.letters, sizeof(alpha)) && neworg.hea.keynam.length != org->hea.keynam.length) { /* writeln(output,'--------orgchanged!'); write (output,'--------old org:"', org.hea.keynam.letters); writeln(output, '" ', org.hea.keynam.length:1); write (output,'--------new org:"',neworg.hea.keynam.letters); writeln(output, '" ',neworg.hea.keynam.length:1); */ /*ccc*/ *orgchange = true; copyheader(neworg.hea, &org->hea); /* move the mapunit over to the org! */ org->mapunit = neworg.mapunit; clearline(&neworg.mapunit); } else *orgchange = false; *orgopen = true; } break; case 'c': if (*chropen) { fscanf(thefile->f, "%*[^\n]"); getc(thefile->f); /* move past the word 'chromosome' - new definition 1999 Mar 13 */ *chropen = false; /* close chromosome */ } else { brchrkey(thefile, theline, &newchr); if (strncmp(newchr.hea.keynam.letters, chr->hea.keynam.letters, sizeof(alpha)) && newchr.hea.keynam.length != chr->hea.keynam.length) { /* writeln(output,'--------chrchanged!'); write (output,'--------old chr:"', chr.hea.keynam.letters); writeln(output, '" ', chr.hea.keynam.length:1); write (output,'--------new chr:"',newchr.hea.keynam.letters); writeln(output, '" ',newchr.hea.keynam.length:1); */ *chrchange = true; copyheader(newchr.hea, &chr->hea); /* move the map range over to the chr! */ chr->mapbeg = newchr.mapbeg; chr->mapend = newchr.mapend; } else *chrchange = false; *chropen = true; } break; case 'p': if (*pieopen) { *pieopen = false; /* close last piece */ ch = 'a'; /* prevent falling out of the loop */ } else { newpie = (piece *)Malloc(sizeof(piece)); brpiece(thefile, theline, &newpie); if (*pie == NULL) *piechange = true; else { if (strncmp(newpie->key.hea.keynam.letters, (*pie)->key.hea.keynam.letters, sizeof(alpha)) && newpie->key.hea.keynam.length != (*pie)->key.hea.keynam.length) *piechange = true; else *piechange = false; } *pieopen = true; /* we always have to switch over to the new piece, because although the name may be the same, the DNA sequence could be different. That is, the book may contain two pieces with the same name, and we want to be sure to search the new one, not the old one. */ if (*pie != NULL) { clearpiece(pie); /* save the links */ Free(*pie); /* close up shop */ } *pie = newpie; } break; } } } /* origin: search version = 6.39 */ /* end module book.getocp version = 7.67; {of delmod.p 2004 Sep 8} */ /* ************************************************************************ */ /* ************************************************************************ */ /* ************************************************************************ */ /* begin module book.iwname */ Static Void iwname(thefile, thename) _TEXT *thefile; name thename; { /* write the name to the file */ long c_; for (c_ = 0; c_ < thename.length; c_++) putc(thename.letters[c_], thefile->f); } /* end module book.iwname version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.iworg */ Static Void iworg(afile, org) _TEXT *afile; orgkey org; { /* write an organism specification. no writeln is done to allow write orgchr to do this. */ fprintf(afile->f, "organism "); iwname(afile, org.hea.keynam); putc(';', afile->f); } /* end module book.iworg version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.iwchr */ Static Void iwchr(afile, chr) _TEXT *afile; chrkey chr; { /* write an chromosome specification. no writeln is done to allow write orgchr to do this. */ fprintf(afile->f, "chromosome "); iwname(afile, chr.hea.keynam); putc(';', afile->f); } /* end module book.iwchr version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.iwpie */ Static Void iwpie(afile, pie) _TEXT *afile; piekey pie; { /* write a piece specification */ fprintf(afile->f, "piece "); iwname(afile, pie.hea.keynam); fprintf(afile->f, ";\n"); } /* end module book.iwpie version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.iworgchr */ Static Void iworgchr(afile, org, orgchange, orgopen, chr, chrchange, chropen) _TEXT *afile; orgkey org; boolean orgchange, orgopen; chrkey chr; boolean chrchange, chropen; { /* write both organism and chromosome specifications, based on whether the organism or chromosome changed (orgchange and chrchange) and whether they are currently open (orgopen, chropen). See getocp in the br routines. */ if (orgchange && orgopen) iworg(afile, org); if (orgchange && chrchange && orgopen && chropen) putc(' ', afile->f); if (chrchange && chropen) iwchr(afile, chr); if (orgchange && orgopen || chrchange && chropen) putc('\n', afile->f); } /* Local variables for iwget: */ struct LOC_iwget { _TEXT *afile; piece *pie; long pieceplace, insttype; } ; /* Local variables for iwposition: */ struct LOC_iwposition { struct LOC_iwget *LINK; } ; Local Void iwrelative(relative, LINK) long relative; struct LOC_iwposition *LINK; { if (relative >= 0) fprintf(LINK->LINK->afile->f, " +%ld", relative); else if (relative < 0) fprintf(LINK->LINK->afile->f, " %ld", relative); } Local Void iwposition(relative, sameallowed, LINK) long relative; boolean sameallowed; struct LOC_iwget *LINK; { /* write the */ struct LOC_iwposition V; V.LINK = LINK; if (LINK->insttype == 1 && sameallowed) fprintf(LINK->afile->f, " same"); else fprintf(LINK->afile->f, " %ld", LINK->pieceplace); switch (LINK->pie->key.piedir) { case plus: iwrelative(relative, &V); break; case minus: iwrelative(-relative, &V); break; } } /* end module book.iworgchr version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.iwget */ Static Void iwget(afile_, pie_, fromplace, pieceplace_, toplace, flip, insttype_, carriagereturn) _TEXT *afile_; piece *pie_; long fromplace, pieceplace_, toplace; boolean flip; long insttype_; boolean carriagereturn; { /* print a get delila instruction in the orientation of pie, from fromplace to toplace pieceplace. +/- direction; If flip is false, the piece direction is as on the piece, if it is true, the it is the opposite direction. insttype: instruction type. insttype=1 means the form get from p -/+f to same +/-t dir +/-; insttype=2 means the form get from p1 -/+f to p2 +/-t dir +/-; where p, p1 and p2 are locations carriagereturn: if true, add a carriage return to the end of the line. */ struct LOC_iwget V; V.afile = afile_; V.pie = pie_; V.pieceplace = pieceplace_; V.insttype = insttype_; fprintf(V.afile->f, "get from"); iwposition(fromplace, false, &V); fprintf(V.afile->f, " to"); iwposition(toplace, true, &V); fprintf(V.afile->f, " direction"); switch (V.pie->key.piedir) { case dirhomologous: case plus: /* handle case, may not be right */ switch (flip) { case false: fprintf(V.afile->f, " +"); break; case true: fprintf(V.afile->f, " -"); break; } break; case dircomplement: case minus: /* handle case, may not be right */ switch (flip) { case false: fprintf(V.afile->f, " -"); break; case true: fprintf(V.afile->f, " +"); break; } break; } putc(';', V.afile->f); if (carriagereturn) putc('\n', V.afile->f); } /* end module book.iwget version = 7.67; {of delmod.p 2004 Sep 8} */ /* begin module book.iwgetsimple */ Static Void iwgetsimple(afile, pie, fromplace, toplace, flip, carriagereturn) _TEXT *afile; piece *pie; long fromplace, toplace; boolean flip, carriagereturn; { /* iwget */ /* print a get delila instruction in the orientation of pie, from fromplace to toplace pieceplace. +/- direction; If flip is false, the piece direction is as on the piece, if it is true, the it is the opposite direction. Format: get from [pieceplace+fromplace] to [pieceplace+ toplace] +/- direction; where [] means to precompute the value. carriagereturn: if true, add a carriage return to the end of the line. */ fprintf(afile->f, "get from"); fprintf(afile->f, " %ld", fromplace); fprintf(afile->f, " to"); fprintf(afile->f, " %ld", toplace); fprintf(afile->f, " direction"); switch (pie->key.piedir) { case dirhomologous: case plus: /* handle case, may not be right */ switch (flip) { case false: fprintf(afile->f, " +"); break; case true: fprintf(afile->f, " -"); break; } break; case dircomplement: case minus: /* handle case, may not be right */ switch (flip) { case false: fprintf(afile->f, " -"); break; case true: fprintf(afile->f, " +"); break; } break; } putc(';', afile->f); if (carriagereturn) putc('\n', afile->f); } /* Local variables for numberdigit: */ struct LOC_numberdigit { long number, place; /* the exponent of logplace */ long myabsolute; /* the absolute value of number */ Char acharacter; /* the character to be returned */ } ; Local Void digit(LINK) struct LOC_numberdigit *LINK; { /* extract a digit at the place position */ long tenplace; /* ten times place */ long z; /* an intermediate value */ long d; /* the digit extracted */ tenplace = LINK->place * 10; z = LINK->myabsolute - LINK->myabsolute / tenplace * tenplace; if (LINK->place == 1) d = z; else d = z / LINK->place; switch (d) { case 0: LINK->acharacter = '0'; break; case 1: LINK->acharacter = '1'; break; case 2: LINK->acharacter = '2'; break; case 3: LINK->acharacter = '3'; break; case 4: LINK->acharacter = '4'; break; case 5: LINK->acharacter = '5'; break; case 6: LINK->acharacter = '6'; break; case 7: LINK->acharacter = '7'; break; case 8: LINK->acharacter = '8'; break; case 9: LINK->acharacter = '9'; break; } } /* digit */ Local Void sign(LINK) struct LOC_numberdigit *LINK; { /* put a negative sign out or a positive sign */ if (LINK->number < 0) LINK->acharacter = '-'; else LINK->acharacter = '+'; } /* sign */ /* end module book.iwgetsimple version = 7.67; {of delmod.p 2004 Sep 8} */ /* ************************************************************************ */ /* ************************************************************************ */ /* ************************************************************************ */ /* begin module package.numbar */ /* ************************************************************************ */ /* begin module numberdigit */ Static Char numberdigit(number_, logplace) long number_, logplace; { /* return the digit at the place value ('logplace') position of number. example: numberdigit(13625, 3) = 3 numberdigit(13625, 4) = 1 2000 July 30 'myabsolute' replaced 'absolute', which is apparently a keyword for GPC. The name is kept to keep the code looking similar to its origin. */ struct LOC_numberdigit V; long count; /* used to make place */ V.number = number_; V.place = 1; for (count = 1; count <= logplace; count++) V.place *= 10; if (V.number == 0) { if (V.place == 1) V.acharacter = '0'; else V.acharacter = ' '; return V.acharacter; } V.myabsolute = labs(V.number); if (V.myabsolute < V.place / 10) { V.acharacter = ' '; return V.acharacter; } if (V.myabsolute >= V.place) digit(&V); else sign(&V); return V.acharacter; } /* numberdigit */ #define ln10 2.30259 /* natural log of 10 - for conversion to log base 10 */ #define epsilon 0.00001 /* a small number to correct log base 10 errors */ /* end module numberdigit version = 4.86; (@ of prgmod.p 2004 Sep 8 */ /* begin module numbersize */ Static long numbersize(n) long n; { /* calculate amount of space to be reserved for the integer n */ long size; /* intermediate result */ if (n == 0) return 1; else { /* account for minus sign */ size = (long)(log((double)labs(n)) / ln10 + epsilon) + 1; /* the 1 is for the last digit */ /* the epsilon assures that we do not lose a place due to roundoff. eg, sometimes log base 10 of 10 would be 0.9999 instead of 1, and we would not do it right... note: this will fail for very large numbers on the order of 1/epsilon. */ if (n < 0) size++; return size; } } /* numbersize */ #undef ln10 #undef epsilon /* end module numbersize version = 4.86; (@ of prgmod.p 2004 Sep 8 */ /* begin module numberbar */ Static long firstlastmax(firstnumber, lastnumber) long firstnumber, lastnumber; { /* compute the sizes of firstnumber and lastnumber (including + or - sign) and then determine which number is larger */ long firstlines; /* number of lines needed for firstumber */ long lastlines; /* number of lines needed for lastnumber */ firstlines = numbersize(firstnumber); if (firstnumber > 0) /* add one more for + sign */ firstlines++; lastlines = numbersize(lastnumber); if (lastnumber > 0) /* add one more for + sign */ lastlines++; if (firstlines > lastlines) return firstlines; else return lastlines; } Static Void numberbar(afile, spaces, firstnumber, lastnumber, linesused) _TEXT *afile; long spaces, firstnumber, lastnumber, *linesused; { /* write a bar of numbers to a file, with several spaces before. the number of lines used is returned */ long logplace; /* the log of the digit being looked at */ long number; /* the current number being written */ long spacecount; /* count of spaces */ /* 2000 June 24: This code was not sufficient to deal with the sign correctly. The numbersize routine now does *not* give the + sign (which makes it useful for other purposes) so we have to account for it here now. if abs(firstnumber) > abs(lastnumber) then linesused:= numbersize(firstnumber) else linesused:= numbersize(lastnumber); */ *linesused = firstlastmax(firstnumber, lastnumber); /* writeln(output,'numberbar says linesused = ',linesused:1); for logplace:=linesused-1 downto 0 do begin 1999 July 15: this is changed to linesused since numbersize now accounts for the sign: for logplace := linesused downto 0 do begin 2000 June 24: back to the old code ... */ for (logplace = *linesused - 1; logplace >= 0; logplace--) { for (spacecount = 1; spacecount <= spaces; spacecount++) putc(' ', afile->f); for (number = firstnumber; number <= lastnumber; number++) fputc(numberdigit(number, logplace), afile->f); putc('\n', afile->f); } } /* end module numberbar version = 4.86; (@ of prgmod.p 2004 Sep 8 */ /* ************************************************************************ */ /* end module package.numbar version = 4.86; (@ of prgmod.p 2004 Sep 8 */ /* begin module space */ Static Void space(afile, spaces) _TEXT *afile; long spaces; { /* space over some amount to file afile. */ long spacecount; /* count of spaces */ for (spacecount = 1; spacecount <= spaces; spacecount++) putc(' ', afile->f); } /* end module space */ /* begin module sebo.readparameters */ Static Void readparameters(sebop, parameterversion, alignmenttype, fromdo, todo, rangecontrol, frominst, toinst, mismatches, alertinterval) _TEXT *sebop; double *parameterversion; Char *alignmenttype; long *fromdo, *todo; Char *rangecontrol; long *frominst, *toinst, *mismatches, *alertinterval; { /* the parameter file to read */ /* read the parameters as defined in the main routine. */ if (*sebop->name != '\0') { if (sebop->f != NULL) sebop->f = freopen(sebop->name, "r", sebop->f); else sebop->f = fopen(sebop->name, "r"); } else rewind(sebop->f); if (sebop->f == NULL) _EscIO2(FileNotFound, sebop->name); RESETBUF(sebop->f, Char); fscanf(sebop->f, "%lg%*[^\n]", parameterversion); getc(sebop->f); /* writeln(output,'parameterversion:', parameterversion:40:38); writeln(output,'updateversion:', updateversion:40:38); */ if ((long)floor(100 * *parameterversion + 0.5) < (long)floor(100 * updateversion + 0.5)) { printf("You have an old parameter file!\n"); halt(); } fscanf(sebop->f, "%c%*[^\n]", alignmenttype); getc(sebop->f); if (*alignmenttype == '\n') *alignmenttype = ' '; fscanf(sebop->f, "%ld%ld%*[^\n]", fromdo, todo); getc(sebop->f); if (*fromdo > *todo) { printf("fromdo (%ld) must be less than todo (%ld)\n", *fromdo, *todo); halt(); } if (P_peek(sebop->f) == 'r' || P_peek(sebop->f) == 'a') { fscanf(sebop->f, "%c%*[^\n]", rangecontrol); getc(sebop->f); if (*rangecontrol == '\n') *rangecontrol = ' '; *frominst = 0; *toinst = 0; } else { fscanf(sebop->f, "%ld%ld%*[^\n]", frominst, toinst); getc(sebop->f); if (*frominst > *toinst) { printf("frominst (%ld) must be less than toinst (%ld)\n", *frominst, *toinst); halt(); } } fscanf(sebop->f, "%ld%*[^\n]", mismatches); getc(sebop->f); if (*mismatches < 0) { printf("mismatches must be non-negative\n"); halt(); } fscanf(sebop->f, "%ld%*[^\n]", alertinterval); getc(sebop->f); } #define wid 10 /* width of output numbers */ /* end module sebo.readparameters */ /* begin module sebo.writeparameters */ Static Void writeparameters(afile, alignmenttype, fromdo, todo, rangecontrol, frominst, toinst, mismatches, alertinterval) _TEXT *afile; Char alignmenttype; long fromdo, todo; Char rangecontrol; long frominst, toinst, mismatches, alertinterval; { /* write the parameters to afile. */ switch (alignmenttype) { case 'f': fprintf(afile->f, "f: force alignment on first base\n"); break; case 'i': fprintf(afile->f, "i: use the alignedbase from the book\n"); break; case 'b': fprintf(afile->f, "b: use the internal book\n"); break; } fprintf(afile->f, "%*ld %*ld range of book alignment used for search\n", wid, fromdo, wid, todo); if (rangecontrol != 'r' && rangecontrol != 'a') fprintf(afile->f, "%*ld %*ld range of resulting output instructions\n", wid, frominst, wid, toinst); else { fprintf(afile->f, "%c%*c%*c", rangecontrol, wid, ' ', wid, ' '); if (rangecontrol == 'r') fprintf(afile->f, " RELATIVE"); if (rangecontrol == 'a') fprintf(afile->f, " ABSOLUTE"); fprintf(afile->f, " instructions\n"); } fprintf(afile->f, "%*ld %*c mismatches allowed\n", wid, mismatches, wid, ' '); fprintf(afile->f, "%*ld %*c alert interval\n", wid, alertinterval, wid, ' '); } #undef wid /* end module sebo.writeparameters */ /* begin module identifypiece */ Static Void identifypiece(afile, Ppie, carriagereturn) _TEXT *afile; piece **Ppie; boolean carriagereturn; { /* identify the piece Ppie by delila instructions; write result to afile. carriagereturn: if true, add a carriage return to the end of the line. example: piece U00096; get from 637551 to 638151 direction +; */ boolean Pflip; /* is the piece flipped? */ header *WITH; WITH = &(*Ppie)->key.hea; if (WITH->fulnam != NULL) { if (WITH->fulnam->length > 0) { fprintf(afile->f, "name \""); writeline(afile, WITH->fulnam, false); fprintf(afile->f, "\"; "); } } Pflip = ((*Ppie)->key.piedir != (*Ppie)->key.coodir); fprintf(afile->f, "piece "); iwname(afile, (*Ppie)->key.hea.keynam); fprintf(afile->f, "; "); if (Pflip) /* with cr */ iwgetsimple(afile, *Ppie, inttopie(1L, *Ppie), inttopie(piecelength(*Ppie), *Ppie), !Pflip, false); else iwgetsimple(afile, *Ppie, inttopie(1L, *Ppie), inttopie(piecelength(*Ppie), *Ppie), Pflip, false); /* with cr */ if (carriagereturn) putc('\n', afile->f); } /* Local variables for themain: */ struct LOC_themain { _TEXT *instout, *list; long alertinterval; /* interval in bases to tell the user we are still hard at work */ long alignedbase; /* the aligning base of Bpie */ Char alignmenttype; /* 'f' means alignment by First internal coordinate base, 'b' means alignment by Book, 'i' means alignment by Instructions */ boolean done; /* done searching this Bpie against this Tpie? */ boolean found; /* the sequence was found in the target? */ long fromdo, todo; /* user defined range to use for searching */ long frominst, toinst; /* range of resulting instout instructions */ long hits; /* number of probes found in target */ long indent; /* amount to indent book pieces on display */ long insttype; /* type of instructions to write out; see iwget */ long l; /* aligned position */ long linesused; /* line count from numberbar */ long mismatches; /* number of mismatches allowed */ long misses; /* number of mismatches found so far */ long probenumber; /* count of probes */ Char rangecontrol; /* control of output range. 'a': absolute coordinates; 'r': relative; other: frominst to toinst */ long targetnumber; /* count of targets */ line *thefulnam; /* the current name to use in instout. It will default to the previous name. */ long totalprobes; /* total number of probes found in target */ piece *Bpie; long Blength; /* length of Bpie */ base Bbase; /* a base in the book sequence */ long Bposition; /* position on Bpie */ piece *Tpie; long Tlength; /* length of Tpie */ base Tbase; /* a base in the target book sequence */ long Tposition; /* position on Tpie */ } ; Local Void showalignment(fout, t_, LINK) _TEXT *fout; long t_; struct LOC_themain *LINK; { /* show the alignment to file fout for coordinate t */ long index; /* index to the book sequence */ long FORLIM; /* show the probe sequence */ putc('\n', fout->f); numberbar(fout, LINK->indent, LINK->fromdo, LINK->todo, &LINK->linesused); space(fout, LINK->indent); FORLIM = LINK->todo; for (index = LINK->fromdo; index <= FORLIM; index++) { if (withinalignment(index, LINK->alignedbase, LINK->Blength)) { LINK->Bbase = getbase(index + LINK->alignedbase, LINK->Bpie); fputc(basetochar(LINK->Bbase), fout->f); } else putc(' ', fout->f); } fprintf(fout->f, " probe\n"); /* show the probe-target differences */ if (LINK->misses > 0) { space(fout, LINK->indent); FORLIM = LINK->todo; for (index = LINK->fromdo; index <= FORLIM; index++) { if (withinalignment(index, LINK->alignedbase, LINK->Blength) & withinalignment(index, t_, LINK->Tlength)) { if (getbase(index + LINK->alignedbase, LINK->Bpie) == getbase(index + t_, LINK->Tpie)) putc(' ', fout->f); else putc('X', fout->f); } else putc(' ', fout->f); } fprintf(fout->f, " differences\n"); } /* show the target sequence if it differs from the probe */ if (LINK->misses > 0) { space(fout, LINK->indent); FORLIM = LINK->todo; for (index = LINK->fromdo; index <= FORLIM; index++) { if (withinalignment(index, t_, LINK->Tlength)) { LINK->Tbase = getbase(index + t_, LINK->Tpie); fputc(basetochar(LINK->Tbase), fout->f); } else putc(' ', fout->f); } fprintf(fout->f, " target\n"); } /* show the zero base location */ space(fout, LINK->indent); space(fout, labs(LINK->fromdo)); fprintf(fout->f, "^ found at %ld", inttopie(t_, LINK->Tpie)); fprintf(fout->f, " on the target\n\n"); } /* showalignment */ Local Void copyline(a_, b, LINK) line *a_, **b; struct LOC_themain *LINK; { /* copy the contents of line a to b */ long index; /* index to characters in l */ long FORLIM; FORLIM = a_->length; for (index = 0; index < FORLIM; index++) (*b)->letters[index] = a_->letters[index]; (*b)->length = a_->length; } Local Void writeinstructions(instout, t_, LINK) _TEXT *instout; long t_; struct LOC_themain *LINK; { /* write instructions out for coordinate t */ fprintf(instout->f, "\n{Searching target %ld with probe %ld: \n", LINK->targetnumber, LINK->probenumber); /* writeln(instout,' }'); */ /* 2009 Mar 3: this code gave entirely wrong coordinates - 8065429 on the E. coli genome! - replace with identifypiece below. writeln(instout,'{ BUBBA original source probe: }'); */ /* write(instout,'{'); if Bpie^.key.hea.fulnam^.length <> 0 then begin write(instout,' name "'); writeline(instout, Bpie^.key.hea.fulnam, false); write(instout,'";'); end; write(instout,' piece '); iwname(instout, Bpie^.key.hea.keynam); write(instout,'; '); if Bflip then iwgetsimple(instout, Bpie, inttopie(Bpie^.key.pieend,Bpie), inttopie(Bpie^.key.piebeg,Bpie), Bflip, false) (* with cr *) else iwgetsimple(instout, Bpie, inttopie(Bpie^.key.piebeg,Bpie), inttopie(Bpie^.key.pieend,Bpie), Bflip, false); (* with cr *) */ /* 2009 Mar 3 search target as reported to output */ identifypiece(instout, &LINK->Bpie, false); fprintf(instout->f, " }\n"); fprintf(instout->f, "{ final target instructions: }\n"); if (LINK->Bpie->key.hea.fulnam->length != 0) { fprintf(instout->f, "name \""); writeline(instout, LINK->Bpie->key.hea.fulnam, false); fprintf(instout->f, "\"; "); } /*original: iwget(instout, Tpie, frominst, inttopie(t, Tpie), toinst, Tflip, insttype, false); This gave the wrong instruction in the case that the target was flipped. Force it to be correct: */ if (LINK->rangecontrol != 'r' && LINK->rangecontrol != 'a') { iwget(instout, LINK->Tpie, LINK->frominst, inttopie(t_, LINK->Tpie), LINK->toinst, false, LINK->insttype, true); return; } switch (LINK->rangecontrol) { case 'a': /* absolute instructions */ /* may need orientation stuff here */ iwget(instout, LINK->Tpie, LINK->frominst, inttopie(t_, LINK->Tpie), LINK->frominst + piecelength(LINK->Bpie) - 1, false, 2L, true); break; case 'r': /* relative is from the given coordinate */ /* relative instructions */ iwget(instout, LINK->Tpie, 0L, inttopie(t_, LINK->Tpie), piecelength(LINK->Bpie) - 1, false, LINK->insttype, true); /* writeln(instout); write(instout,' {'); writeln(instout,'frominst: ', frominst:1); writeln(instout,' toinst: ', toinst:1); if Tflip then writeln(instout,'Tflip is true') else writeln(instout,'Tflip is false'); if Bflip then writeln(instout,' Bflip is true') else writeln(instout,' Bflip is false'); write(instout,' }'); */ break; /* base 0 counts as one */ } } /* writeinstructions */ Local Void announceTarget(fout, LINK) _TEXT *fout; struct LOC_themain *LINK; { /* announce the Target sequence to file fout */ fprintf(fout->f, "\nTarget sequence: \n"); identifypiece(fout, &LINK->Tpie, false); fprintf(fout->f, " { %ld bases }\n", LINK->Tlength); } /* announceTarget */ Local Void announceProbe(fout, LINK) _TEXT *fout; struct LOC_themain *LINK; { /* announce the Probe sequence to file fout */ space(fout, LINK->indent); fprintf(fout->f, "Searching target %ld with probe %ld: \n", LINK->targetnumber, LINK->probenumber); identifypiece(fout, &LINK->Bpie, true); } /* announceProbe */ Local Void usingpreviousname(fout, LINK) _TEXT *fout; struct LOC_themain *LINK; { /* make an announcement to fout about the probe name */ fprintf(fout->f, "{ Using a previous probe full name: \""); writeline(fout, LINK->thefulnam, false); fprintf(fout->f, "\" as the full name for the next probe. }\n"); } /* usingpreviousname */ Local Void reporthits(fout, LINK) _TEXT *fout; struct LOC_themain *LINK; { /* make an announcement to fout of the number of probes found */ if (LINK->hits == 0) { fprintf(fout->f, "{ probe"); if (LINK->thefulnam->length != 0) { fprintf(fout->f, " \""); writeline(fout, LINK->thefulnam, false); putc('"', fout->f); } fprintf(fout->f, " was not found in the target. }\n"); return; } if (LINK->hits <= 1) return; fprintf(fout->f, "{ probe \""); writeline(fout, LINK->thefulnam, false); fprintf(fout->f, "\" was found %ld times in the target. }\n", LINK->hits); } /* reporthits */ Local Void dosearch(LINK) struct LOC_themain *LINK; { /* we now have the book piece and the target, so we can search */ long t_; /* position on Tpie that we are checking against Bposition */ long FORLIM; _TEXT TEMP; switch (LINK->alignmenttype) { case 'f': /* force alignment on first base */ LINK->alignedbase = 1; break; case 'i': /* use the alignedbase from the inst */ break; case 'b': /* use the internal book */ LINK->alignedbase = pietoint(0L, LINK->Bpie); break; } LINK->hits = 0; FORLIM = LINK->Tlength; /* writeln(output,'Blength = ',Blength:1,' ================='); writeln(output,'fromdo = ',fromdo:1,' ================='); writeln(output,'todo = ',todo:1,' ================='); */ for (t_ = 1; t_ <= FORLIM; t_++) { if (LINK->alertinterval > 0) { if (t_ % LINK->alertinterval == 0) printf("... %ld\n", inttopie(t_, LINK->Tpie)); /* p2c: sebo.p, line 2748: * Note: Using % for possibly-negative arguments [317] */ } LINK->done = false; LINK->found = true; LINK->l = LINK->fromdo; LINK->misses = 0; while (LINK->l <= LINK->todo && LINK->found) { /* location on B of the base to compare: */ LINK->Bposition = LINK->l + LINK->alignedbase; /* make sure we are using only the book sequence! */ if (LINK->Bposition >= 1 && LINK->Bposition <= LINK->Blength) { /* location on T of the base to compare: */ LINK->Tposition = t_ + LINK->l; /* circular wrap for target: */ if (LINK->Tposition < 1) LINK->Tposition += LINK->Tlength; if (LINK->Tposition > LINK->Tlength) LINK->Tposition -= LINK->Tlength; LINK->Tbase = getbase(LINK->Tposition, LINK->Tpie); LINK->Bbase = getbase(LINK->Bposition, LINK->Bpie); if (LINK->Bbase != LINK->Tbase) { LINK->misses++; if (LINK->misses > LINK->mismatches) LINK->found = false; } } LINK->l++; } if (LINK->found) { TEMP.f = stdout; *TEMP.name = '\0'; showalignment(&TEMP, t_, LINK); showalignment(LINK->list, t_, LINK); writeinstructions(LINK->instout, t_, LINK); LINK->hits++; LINK->totalprobes += LINK->hits; } } TEMP.f = stdout; *TEMP.name = '\0'; reporthits(&TEMP, LINK); reporthits(LINK->list, LINK); reporthits(LINK->instout, LINK); } /* dosearch */ /* end module identifypiece */ /* begin module sebo.themain */ Static Void themain(inst, book, target, sebop, instout_, list_) _TEXT *inst, *book, *target, *sebop, *instout_, *list_; { /* the main procedure of the program */ struct LOC_themain V; double parameterversion; /* parameter version number */ boolean Bflip; /* probe direction = target direction? */ boolean Tflip; /* target pie = target coo? */ /* book pieces of DNA to search with */ long Btheline; /* location in the book */ orgkey Borg; boolean Borgchange = false, Borgopen = false; chrkey Bchr; boolean Bchrchange = false, Bchropen = false, Bpiechange = false, Bpieopen = false; /* target piece to search */ long Ttheline; /* location in the target book */ orgkey Torg; boolean Torgchange = false, Torgopen = false; chrkey Tchr; boolean Tchrchange = false, Tchropen = false, Tpiechange = false, Tpieopen = false; _TEXT TEMP; V.instout = instout_; V.list = list_; printf("sebo %4.2f\n", version); readparameters(sebop, ¶meterversion, &V.alignmenttype, &V.fromdo, &V.todo, &V.rangecontrol, &V.frominst, &V.toinst, &V.mismatches, &V.alertinterval); TEMP.f = stdout; *TEMP.name = '\0'; writeparameters(&TEMP, V.alignmenttype, V.fromdo, V.todo, V.rangecontrol, V.frominst, V.toinst, V.mismatches, V.alertinterval); /* solution to memory leak 2001 Mar 23: */ if (V.alignmenttype == 'i') { if (*inst->name != '\0') { if (inst->f != NULL) inst->f = freopen(inst->name, "r", inst->f); else inst->f = fopen(inst->name, "r"); } else rewind(inst->f); if (inst->f == NULL) _EscIO2(FileNotFound, inst->name); RESETBUF(inst->f, Char); if (BUFEOF(inst->f)) { printf("The sebop requests instructions to be used\n"); printf("but the inst file is empty!\n"); halt(); } } if (*V.instout->name != '\0') { if (V.instout->f != NULL) V.instout->f = freopen(V.instout->name, "w", V.instout->f); else V.instout->f = fopen(V.instout->name, "w"); } else { if (V.instout->f != NULL) rewind(V.instout->f); else V.instout->f = tmpfile(); } if (V.instout->f == NULL) _EscIO2(FileNotFound, V.instout->name); SETUPBUF(V.instout->f, Char); fprintf(V.instout->f, "title \"sebo %4.2f search with book\";\n", version); fprintf(V.instout->f, "{ sebop parameters:\n"); writeparameters(V.instout, V.alignmenttype, V.fromdo, V.todo, V.rangecontrol, V.frominst, V.toinst, V.mismatches, V.alertinterval); fprintf(V.instout->f, "}\n"); V.insttype = 1; if (*V.list->name != '\0') { if (V.list->f != NULL) V.list->f = freopen(V.list->name, "w", V.list->f); else V.list->f = fopen(V.list->name, "w"); } else { if (V.list->f != NULL) rewind(V.list->f); else V.list->f = tmpfile(); } if (V.list->f == NULL) _EscIO2(FileNotFound, V.list->name); SETUPBUF(V.list->f, Char); fprintf(V.list->f, "sebo %4.2f search with book\";\n", version); fprintf(V.list->f, "sebop parameters:\n"); writeparameters(V.list, V.alignmenttype, V.fromdo, V.todo, V.rangecontrol, V.frominst, V.toinst, V.mismatches, V.alertinterval); brinit(target, &Ttheline); V.Tpie = (piece *)Malloc(sizeof(piece)); V.Bpie = (piece *)Malloc(sizeof(piece)); V.linesused = 0; V.indent = 3; getline(&V.thefulnam); V.targetnumber = 0; V.totalprobes = 0; while (!BUFEOF(target->f)) { printf("\nreading in target book ...\n"); getocp(target, &Ttheline, &Torg, &Torgchange, &Torgopen, &Tchr, &Tchrchange, &Tchropen, &V.Tpie, &Tpiechange, &Tpieopen); printf("... done\n"); iworgchr(V.instout, Torg, Torgchange, Torgopen, Tchr, Tchrchange, Tchropen); if (BUFEOF(target->f)) { printf("No further targets.\n"); continue; } V.targetnumber++; V.Tlength = piecelength(V.Tpie); fprintf(V.instout->f, "piece "); writename(V.instout, V.Tpie->key.hea.keynam); fprintf(V.instout->f, "; { the target sequence is %ld bases }\n", V.Tlength); TEMP.f = stdout; *TEMP.name = '\0'; announceTarget(&TEMP, &V); announceTarget(V.list, &V); if (*inst->name != '\0') { if (inst->f != NULL) inst->f = freopen(inst->name, "r", inst->f); else inst->f = fopen(inst->name, "r"); } else rewind(inst->f); if (inst->f == NULL) _EscIO2(FileNotFound, inst->name); RESETBUF(inst->f, Char); brinit(book, &Btheline); V.probenumber = 0; while (!BUFEOF(book->f)) { switch (V.alignmenttype) { case 'i': align(inst, book, &Btheline, &V.Bpie, &V.Blength, &V.alignedbase); break; case 'b': case 'f': getocp(book, &Btheline, &Borg, &Borgchange, &Borgopen, &Bchr, &Bchrchange, &Bchropen, &V.Bpie, &Bpiechange, &Bpieopen); V.Blength = piecelength(V.Bpie); break; } if (BUFEOF(book->f)) break; V.probenumber++; /* Determine relative orientation (flip). We want to report instructions in instout that are how to grab the given fragment from the Target, irrespective of how it was obtained from the probe library. So we only care if the target pie is the complement of the coo. */ Tflip = (V.Tpie->key.piedir != V.Tpie->key.coodir); Bflip = (V.Bpie->key.piedir != V.Bpie->key.coodir); /* if Tflip then writeln(output,'Tflip is true') else writeln(output,'Tflip is false'); if Bflip then writeln(output,'Bflip is true') else writeln(output,'Bflip is false'); */ /* write(output,'Bpie^.key.hea.fulnam= "'); writeline(output,Bpie^.key.hea.fulnam, false); writeln(output,'"'); write(output,'thefulnam = "'); writeline(output,thefulnam, false); writeln(output,'"'); */ /* this is the memory of the non-empty previous probe name: */ if (V.Bpie->key.hea.fulnam->length != 0) copyline(V.Bpie->key.hea.fulnam, &V.thefulnam, &V); else if (V.thefulnam->length != 0) { /* retrieve the previous memory */ TEMP.f = stdout; *TEMP.name = '\0'; usingpreviousname(&TEMP, &V); usingpreviousname(V.list, &V); usingpreviousname(V.instout, &V); copyline(V.thefulnam, &V.Bpie->key.hea.fulnam, &V); } TEMP.f = stdout; *TEMP.name = '\0'; /* announce the book */ announceProbe(&TEMP, &V); announceProbe(V.list, &V); if (V.Tlength < V.Blength) { printf("Target length is less than probe in book.\n"); /* halt; */ } else dosearch(&V); /* solution to memory leak 2001 Mar 23: */ clearpiece(&V.Bpie); } } /* read in the target piece */ printf("\nprobes: %ld\n", V.probenumber); printf("total probes found: %ld\n", V.totalprobes); fprintf(V.list->f, "total probes found: %ld\n", V.totalprobes); fprintf(V.instout->f, "{total probes found: %ld}\n", V.totalprobes); printf("targets: %ld\n", V.targetnumber); printf("sebo search completed\n"); } /* end module sebo.themain */ main(argc, argv) int argc; Char *argv[]; { PASCAL_MAIN(argc, argv); if (setjmp(_JL1)) goto _L1; instout.f = NULL; strcpy(instout.name, "instout"); list.f = NULL; strcpy(list.name, "list"); sebop.f = NULL; strcpy(sebop.name, "sebop"); target.f = NULL; strcpy(target.name, "target"); book.f = NULL; strcpy(book.name, "book"); inst.f = NULL; strcpy(inst.name, "inst"); themain(&inst, &book, &target, &sebop, &instout, &list); _L1: if (inst.f != NULL) fclose(inst.f); if (book.f != NULL) fclose(book.f); if (target.f != NULL) fclose(target.f); if (sebop.f != NULL) fclose(sebop.f); if (list.f != NULL) fclose(list.f); if (instout.f != NULL) fclose(instout.f); exit(EXIT_SUCCESS); } /* End. */