program bk2seq(inst, book, bk2seqp, alprop, protseq, output); (* converts a book to a sequence by Mike Stephens modified by Thomas Schneider Dr. Thomas D. Schneider National Institutes of Health National Cancer Institute Center for Cancer Research Nanobiology Program Molecular Information Theory Group Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu http://www.ccrnp.ncifcrf.gov/~toms/ module libraries: delman, delmods, prgmods *) label 1; (* end of program *) const (* begin module version *) version = 1.13; (* of bk2seq.p 2008 Jul 29 2008 Jul 29, 1.13: upgrade for triggers 2008 Jul 29, 1.12: maximumrange increased in minmaxalignment. 1997 Mar 17, 1.11: functional origin 1992 August 18 *) (* end module version *) (* begin module describe.bk2seq *) (* name bk2seq: takes a book file and makes it a protseq file. synopsis bk2seq(inst: in, book: in, bk2seqp: in, alprop: out, protseq: out, output: out) files inst: delila instructions of the form 'get from 56 -5 to 56 +10;' (This file may be empty, in which case the sequences will be aligned by their 5' ends.) book: the book generated by delila using inst bk2seqp: parameters to control the program. If the file is empty, the protseq format is generated. If the file begins with the letter 'f', fasta format is generated. alprop: parameter file to control the alpro program so that alpro uses the correct alignment coordinates. protseq: the output file, usable by alpro and other programs. output: messages to the user description This program simply takes a file in book format and converts it to protseq format. It was written mainly as an interface between books and those progams such as alpro and align that require protseq format. Fasta format can be used by other programs. documentation none author R. Michael Stephens bugs User defined range is not yet implemented. technical notes symbolsperline is a constant that determines the maximum number of symbols written per line to the protseq. *) (* end module describe.bk2seq *) symbolsperline = 70; (* the maximum number of symbols written per line to the protseq *) (* begin module book.const *) (* constants needed for book manipulations *) dnamax = 1024; (* length of dna arrays *) namelength = 100; (* maximum key name length *) linelength = 200; (* maximum line readable in book *) (* end module book.const version = 7.73; {of delmod.p 2008 Jul 29} *) (* PostScript constants *) pwid = 8; (* width in character places to print PostScript numbers *) pdec = 5; (* decimal places to print PostScript numbers *) pdecolor = 4; (* decimal places for color descriptions (5 WILL CAUSE NeWS 1.1 TO BOMB) *) (* module filler.const *) fillermax = 50; (* the size of the filler array for a string *) (* module filler.const from prgmod.p 4.20 *) (* begin module string.const *) maxstring = 2000; (* the maximum string *) (* end module string.const version = 4.86; (@ of prgmod.p 2004 Sep 8 *) type (* begin module book.type *) (* types needed for book manipulations *) chset = set of 'a'..'z'; (* types defined in book definition *) alpha = packed array[1..namelength] of char; (* this is not alfa *) (* name is a left justified string with blanks following the characters *) name = record letters: alpha; length: 0..namelength (* zero means an unspecified structure *) end; lineptr = ^line; line = record (* a line of characters *) letters: packed array [1..linelength] of char; length: 0..linelength; next: lineptr end; direction = (plus, minus, dircomplement, dirhomologous); configuration = (linear, circular); state = (on, off); header = record (* header of key *) keynam: name; (* key name of structure *) fulnam: lineptr; (* full name of structure *) note: lineptr (* note key *) end; (* begin module base.type *) (* define the four nucleotide bases *) base = (a,c,g,t); (* end module base.type version = 7.73; {of delmod.p 2008 Jul 29} *) (* sequence types *) dnaptr = ^dnastring; dnarange = 0..dnamax; seq = packed array[1..dnamax] of base; dnastring = record part: seq; length: dnarange; next: dnaptr end; orgkey = record (* organism key *) hea: header; mapunit: lineptr (* genetic map units *) end; chrkey = record (* chromosome key *) hea: header; mapbeg: real; (* number of genetic map beginning *) mapend: real (* number of genetic map ending *) end; pieceptr = ^piece; piekey = record (* piece key *) hea: header; mapbeg: real; (* genetic map beginning *) coocon: configuration; (* configruation (circular/linear) *) coodir: direction; (* direction (+/-) relative to genetic map *) coobeg: integer; (* beginning nucleotide *) cooend: integer; (* ending nucleotide *) piecon: configuration; (* configruation (circular/linear) *) piedir: direction; (* direction (+/-) relative to coordinates *) piebeg: integer; (* beginning nucleotide *) pieend: integer; (* ending nucleotide *) end; piece = record key: piekey; dna: dnaptr end; reference = record pienam : name; (* name of piece referred to *) mapbeg : real; (* genetic map beginning *) refdir : direction; (* direction relative to coordinates *) refbeg : integer; (* beginning nucleotide *) refend : integer; (* ending nucleotide *) end; genkey = record (* gene key *) hea : header; ref : reference; end; trakey = record (* transcript key *) hea : header; ref : reference; end; markerptr = ^marker; markey = record (* marker key *) hea : header; ref : reference; sta : state; phenotype : lineptr; next : markerptr; end; marker = record key : markey; dna : dnaptr; end; (* end module book.type version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module string.type *) stringptr = ^string; (* pointer to a string *) string = record (* a string of characters *) letters: array[1..maxstring] of char; (* the letters in the string *) length: integer; (* the number of characters in the string *) current: integer; (* the letter we are working on *) next: stringptr; (* the next string in a series *) end; (* end module string.type version = 4.86; (@ of prgmod.p 2004 Sep 8 *) (* module filler.type *) (* the following is an array used to fill a string. it is convenient to have it much shorter than the maxstring, so that it is easy to fill the string using procedure fillstring. the user must declare the value of constant fillermax. *) filler = packed array[1..fillermax] of char; (* module filler.type from prgmod.p 4.20 *) (* module trigger.type *) trigger = record (* an object to be searched for *) seek: string; (* the characters looked for *) state: integer; (* how close to triggering we are *) skip: boolean; (* trigger not found- skip the line *) found: boolean (* the trigger was found *) end; (* module trigger.type from prgmod.p 4.20 *) var inst, (* the delila instructions required by the align procedures *) book, (* the book to be aligned *) bk2seqp, (* control parameters for the bk2seq program *) alprop, (* control for the alpro program *) protseq: (* the output of the program *) text; (* variables used by the align routines: *) apiece: pieceptr; length, alignedbase: integer; fromparam, toparam: integer; fromdo, todo: integer; (* user defined range to use *) userrange: boolean; (* if true, the user defined range is used *) paging: char; (* defines whether to do pages. If 'n' no pages are done *) index: integer; (* aligned space index *) orgopen: boolean; (* used by procedures maxname and printname *) nametype: char; (* the type of name to look for in namebook. it can be 'g', 't', or 'p'. *) namewidth: integer; (* the amount of space to allocate to names *) b: char; (* a base to print out *) column: integer; (* counts the column of the printout so that color PostScript won't have really huge lines *) (* begin module book.var *) (* ************************************************************************ *) (* global variables needed for book manipulations *) (* free storage: *) freeline: lineptr; (* unused lines *) freedna: dnaptr; (* unused dnas *) readnumber: boolean; (* whether to read a number from the notes, or to read in the notes *) number: integer; (* the number of the item just read *) numbered: boolean; (* true when the item just read is numbered *) skipunnum: boolean; (* a control variable to allow skipping of un-numbered items in the book *) (* ************************************************************************ *) (* end module book.var version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module skipblanks *) (* 2003 July 31: tab is considered a blank character *) function isblank(c: char): boolean; (* is the character c blank or tab? *) begin isblank := (c = ' ') or (c = chr(9)) end; procedure skipblanks(var thefile: text); (* skip over blanks until a non-blank, or end of line, is found *) begin while isblank(thefile^) and not eoln(thefile) do get(thefile); end; procedure skipnonblanks(var thefile: text); (* skip over nonblanks until a blank, or end of line, is found *) begin while (not isblank(thefile^)) and not eoln(thefile) do get(thefile); end; procedure skipcolumn(var thefile: text); (* skip over a data column *) begin skipblanks(thefile); skipnonblanks(thefile) end; (* end module skipblanks version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module copyaline *) procedure copyaline(var fin, fout: text); (* copy a line from file fin to file fout *) begin (* copyaline *) while not eoln(fin) do begin fout^ := fin^; put(fout); get(fin) end; readln(fin); writeln(fout); end; (* copyaline *) (* end module copyaline version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module interact.clearstring *) (* begin module clearstring *) procedure clearstring(var ribbon: string); (* empty the string *) var index: integer; (* to the ribbon *) begin (* clearstring *) with ribbon do begin for index := 1 to maxstring do letters[index] := ' '; length := 0; current := 0; end end; (* clearstring *) procedure initializestring(var ribbon: string); (* start the string with a nil pointer. This routine should be called before doing linked list work. This allows the standard string routines to clear the string without killing the pointer. *) begin (* initializestring *) clearstring(ribbon); ribbon.next := nil; end; (* initializestring *) (* end module clearstring version = 4.86; (@ of prgmod.p 2004 Sep 8 *) (* end module interact.clearstring version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module filler.fillstring *) procedure fillstring(var s: string; a: filler); (* this procedure makes it reasonably easy to fill the string s with characters. one calls the procedure as: *) (* 1 2 3 4 5 *) (* 12345678901234567890123456789012345678901234567890 *) (* fillstring(s, 'this-is-the-string '); the two comments make it easy to line the characters up. also, for this example, it was assumed that the length of filler as defined by the constant fillermax was 50. *) var length: integer; (* of the string without trailing blanks *) index: integer; (* of s *) begin (* fillstring *) clearstring(s); length := fillermax; while (length > 1) and (a[length] = ' ') do length := pred(length); if (length = 1) and (a[length] = ' ') then begin writeln(output, 'fillstring: the string is empty'); halt end; for index := 1 to length do s.letters[index] := a[index]; s.length := length; s.current := 1 end; (* fillstring *) (* end module filler.fillstring version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module filler.filltrigger *) procedure filltrigger(var t: trigger; a: filler); (* fill the trigger t *) begin (* filltrigger *) fillstring(t.seek,a) end; (* fillstring *) (* end module filler.filltrigger version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module trigger.proc *) (* this module allows one to scan a series of characters, as from an array or a file, and to "trigger" or detect a simple string in the series. the advantage of the trigger is that several triggers can "observe" a stream of characters at once, each looking for a different thing. some other modules required: interact.const, interact.type *) procedure resettrigger(var t: trigger); (* reset the trigger to ground state *) begin (* resettrigger *) with t do begin state := 0; skip := false; found := false end end; (* resettrigger *) procedure testfortrigger(ch: char; var t: trigger); (* look at the character ch. if it is part of the trigger (at the current trigger state), then the trigger state goes higher. if it is not part of the trigger then the trigger state is reset, skip is true and one should skip onward to find the trigger. if the trigger is found, found is true. 1996 Sep 12: Bug found! In the case of a trigger "ab", the program used to miss it for situations like "aab". This was because at the first a it would step up. Then it would see the second a and recognize that was not part of ab. It would fail to realize that it could be the start of a new one. The code now accounts for that possibility. *) begin (* testfortrigger *) with t do begin state := succ(state); { writestring(list,seek); writeln(list,'testfortrigger seek.letters[',state:1,']:', seek.letters[state],' ch:',ch); } if seek.letters[state] = ch then begin skip := false; if state = seek.length then found := true else found := false end else begin (* it failed. But wait! It could be the beginning of a NEW trigger string! *) if seek.letters[1] = ch then begin state := 1; skip := false; found := false end else begin (* reset trigger *) state := 0; skip := true; found := false end end end end; (* testfortrigger *) (* end module trigger.proc version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module package.align *) (* ************************************************************************ *) (* begin module package.getpiece *) (* ************************************************************************ *) (* begin module package.brpiece *) (* ************************************************************************ *) (* begin module book.basis *) (* procedures needed for book manipulations *) (* get procedures should be used for all linked lists of records *) procedure getline(var l: lineptr); (* obtain a line from the free line list or by making a new one *) begin if freeline<>nil then begin l:=freeline; freeline:=freeline^.next end else new(l); l^.length:=0; l^.next:=nil end; procedure getdna(var l: dnaptr); begin if freedna<>nil then begin l:=freedna; freedna:=freedna^.next end else new(l); l^.length:=0; l^.next:=nil end; (* clear procedures should be called each time the records are no longer needed failure to do this may result in a stack overflow. *) procedure clearline(var l: lineptr); (* return a line to the free line list *) var lptr: lineptr; begin if l<>nil then begin lptr:=l; l:=l^.next; lptr^.next:=freeline; freeline:=lptr end end; procedure writeline(var afile: text; l: lineptr; carriagereturn: boolean); (* write a line to a file, with carriage return if carriagereturn is true. *) var index: integer; (* index to characters in l *) begin with l^ do begin for index := 1 to length do write(afile, letters[index]); end; if carriagereturn then writeln(afile); end; procedure showfreedna; (* show the freedna list *) var counter: integer; (* count of freedna list *) l: dnaptr; (* pointer into freedna list *) begin l := freedna; counter := 0; while l <> nil do begin counter := succ(counter); write(output,counter:1); write(output, ', length = ',l^.length:1); { This is illegal according to gpc because one cannot write a pointer to a text file. It can be unearthed for debugging. write(output, ', pointer id: ',l:1); } writeln(output); l := l^.next end; end; procedure cleardna(var l: dnaptr); (* clear the dna strutures to the free list *) var lptr: dnaptr; begin if l<>nil then begin lptr:=l; l:=l^.next; lptr^.next:=freedna; freedna:=lptr end end; procedure clearheader(var h: header); (* clear the header h (remove lines to free storage) *) begin with h do begin clearline(fulnam); while note<>nil do clearline(note) end end; procedure clearpiece(var p: pieceptr); (* clear the dna of the piece *) begin while p^.dna<>nil do cleardna(p^.dna); clearheader(p^.key.hea) end; function chartobase(ch:char):base; (* convert a character into a base *) begin case ch of 'a': chartobase:=a; 'c': chartobase:=c; 'g': chartobase:=g; 't': chartobase:=t end end; function basetochar(ba:base):char; (* convert a base into a character *) begin case ba of a: basetochar:='a'; c: basetochar:='c'; g: basetochar:='g'; t: basetochar:='t'; end end; function complement(ba:base):base; (* take the complement of ba *) begin case ba of a: complement:=t; c: complement:=g; g: complement:=c; t: complement:=a; end end; function chomplement(b: char): char; (* create the character complement of base b. I must be getting hungry! *) begin chomplement := basetochar(complement(chartobase(b))); end; function pietoint(p: integer; pie: pieceptr): integer; (* p is a coordinate on the piece. we want to transform p into a number from 1 to n: an internal coordinate system for easy manipulation of piece coordinates *) (* Note: the dirhomologous and dircomplement are treated as plus and minus directions, which MIGHT NOT BE RIGHT! *) var i: integer; (* an intermediate value *) begin with pie^.key do begin case piedir of dirhomologous, plus: if p>=piebeg then i:=p-piebeg+1 else i:=(p-coobeg)+(cooend-piebeg)+2; dircomplement, minus: if p<=piebeg then i:=piebeg-p+1 else i:=(cooend-p)+(piebeg-coobeg)+2 end; pietoint:=i end end; function inttopie(i: integer; pie: pieceptr):integer; (* i is in the range 1 to some maximum. it is an internal coordinate system for the program. we want to do a coordinate transformation to obtain a value in the range of the piece called pie: i=1 corresponds to piebeg and i=its maximum corresponds to pieend *) (* Note: the dirhomologous and dircomplement are treated as plus and minus directions, which MIGHT NOT BE RIGHT! *) var p: integer; (* an intermediate value *) begin with pie^.key do begin case piedir of dirhomologous, plus: begin p:=piebeg+(i-1); if p>cooend then if coocon=circular then p:=p-(cooend-coobeg+1) end; dircomplement, minus: begin p:=piebeg-(i-1); if p '*' then begin writeln(output,' procedure skipstar: bad book'); writeln(output,' "*" expected as first character on the line, but "', thefile^,'" was found'); halt end; get(thefile); (* skip the star *) if thefile^ <> ' ' then begin writeln(output,' procedure skipstar: bad book'); writeln(output,' "* " expected on a line but "*', thefile^,'" was found'); halt end; get(thefile) (* skip the blank *) end end; (* skipstar *) (* end module book.skipstar version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module book.brreanum *) procedure brreanum(var thefile: text; var theline: integer; var reanum: real); (* read a real number from the file *) begin skipstar(thefile); readln(thefile,reanum); theline := succ(theline) end; (* end module book.brreanum version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module book.brnumber *) procedure brnumber(var thefile: text; var theline: integer; var num: integer); (* read a number from the file *) begin skipstar(thefile); readln(thefile,num); theline := succ(theline) end; (* end module book.brnumber version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module book.brname *) procedure brname(var thefile: text; var theline: integer; var nam: name); (* read a name from the file *) var i: integer; (* an index to the name *) c: char; (* a character read *) begin (* brname *) skipstar(thefile); with nam do begin length:=0; repeat length:=succ(length); read(thefile,c); letters[length] := c until (eoln(thefile)) or (length>=namelength) or (letters[length]=' '); if letters[length]=' ' then length:=length-1; if length ',linelength:1,' characters'); writeln(output,'* Only ',linelength:1,' characters read from book'); writeln(output,'***********************************************'); end; l^.length:=i; l^.next:=nil; readln(thefile); theline := succ(theline) end; (* end module book.brline version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module book.brdirect *) procedure brdirect(var thefile: text; var theline: integer; var direct: direction); (* read a direction *) var ch: char; begin skipstar(thefile); readln(thefile,ch); theline := succ(theline); if ch='+' then direct:=plus else direct:=minus end; (* end module book.brdirect version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module book.brconfig *) procedure brconfig(var thefile: text; var theline: integer; var config: configuration); (* read a configuration *) var ch: char; begin skipstar(thefile); readln(thefile,ch); theline := succ(theline); if ch='l' then config:=linear else config:=circular end; (* end module book.brconfig version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module book.brnotenumber *) procedure brnotenumber(var thefile: text; var theline: integer; var note: lineptr); (* book note reading to obtain the number of the object. the procedure returns the value of the number as a global. (this is not such a good practice, but we are stuck with it for now.) *) begin (* brnotenumber *) note:=nil; numbered := false; number := 0; (* force number to zero if there is no number at all *) (* the next character is n or * depending on whether there are notes *) if thefile^ = 'n' then begin readln(thefile); theline := succ(theline); if thefile^ <> 'n' then begin skipstar(thefile); if not eoln(thefile) then begin if thefile^ = '#' then begin numbered := true; get(thefile); (* move past the number symbol *) read(thefile,number); end end; repeat readln(thefile); theline := succ(theline) until thefile^ = 'n'; readln(thefile); theline := succ(theline) end else begin readln(thefile); theline := succ(theline) end end end; (* brnotenumber *) (* end module book.brnotenumber version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module book.brnote *) procedure brnote(var thefile: text; var theline: integer; var note: lineptr); (* read note key *) var newnote: lineptr; (* the new note *) previousnote: lineptr; (* the last line of the notes *) begin (* brnote *) note:=nil; if thefile^ = 'n' then begin (* enter note *) readln(thefile); theline := succ(theline); if thefile^ <> 'n' then begin (* abort null note (n/n) *) getline(note); newnote:=note; while thefile^ <> 'n' do begin (* wait until end of note *) brline(thefile,theline,newnote); previousnote:=newnote; (* get next note *) getline(newnote^.next); newnote:=newnote^.next; end; (* last note was not used, so: *) clearline(newnote); previousnote^.next:=nil; readln(thefile); theline := succ(theline); end else begin readln(thefile); theline := succ(theline); end; end end; (* brnote *) (* end module book.brnote version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module book.brheader *) procedure brheader(var thefile: text; var theline: integer; var hea: header); (* read the header of a key. *) begin with hea do begin readln(thefile); (* move past the object name - new definition 1999 Mar 13 *) theline := succ(theline); {bbb} (* read key name *) brname(thefile,theline,keynam); (* read full name *) getline(fulnam); brline(thefile,theline,fulnam); (* read note key *) if readnumber then brnotenumber(thefile,theline,note) else brnote(thefile,theline,note) end end; (* end module book.brheader version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module book.copyheader *) procedure copyheader(fromhea: header; var tohea: header); (* copy the header fromhea into tohea. Note that the linked objects are NOT copied, but merely pointed to. *) begin tohea.keynam.letters := fromhea.keynam.letters; tohea.keynam.length := fromhea.keynam.length; tohea.note := fromhea.note; tohea.fulnam := fromhea.fulnam; end; (* end module book.copyheader version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module book.brpiekey *) procedure brpiekey(var thefile: text; var theline: integer; var pie: piekey); (* read piece key, track the line number *) begin with pie do begin brheader(thefile,theline,hea); brreanum(thefile,theline,mapbeg); brconfig(thefile,theline,coocon); brdirect(thefile,theline,coodir); brnumber(thefile,theline,coobeg); brnumber(thefile,theline,cooend); brconfig(thefile,theline,piecon); brdirect(thefile,theline,piedir); brnumber(thefile,theline,piebeg); brnumber(thefile,theline,pieend); end end; (* end module book.brpiekey version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module book.brdna *) procedure brdna(var thefile: text; var theline: integer; var dna: dnaptr); (* read in dna from thefile, track the line *) (* note: if the dna were circularized, by linking the last dnastring to the first, then the cleardna routine could not clear properly, and would loop forever... there is no reason to do that, since a simple mod function will allow one to access the circle. *) var ch: char; workdna: dnaptr; begin getdna(dna); workdna:=dna; ch:=getto(thefile,theline,['d']); readln(thefile); theline := succ(theline); read(thefile,ch); (* skipstar *) while (ch = '*') do begin read(thefile,ch); (* skip blank *) repeat read(thefile,ch); if ch in ['a','c','g','t'] then begin if workdna^.length=dnamax then begin getdna(workdna^.next); workdna:=workdna^.next end; workdna^.length:=succ(workdna^.length); workdna^.part[workdna^.length]:=chartobase(ch) end until eoln(thefile); readln(thefile); (* go to next line *) theline := succ(theline); read(thefile,ch); (* ch is either '*' or 'd' *) end; readln(thefile); (* read past the d *) theline := succ(theline); end; (* end module book.brdna version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module book.brpiece *) procedure brpiece(var thefile: text; var theline: integer; var pie: pieceptr); (* read in a piece, change theline to reflect the lines traversed *) begin { readln(thefile); (* move past the word 'piece' - new definition 1999 Mar 13 *) theline := succ(theline); (* BUG: was below! *) bbb} brpiekey(thefile,theline,pie^.key); if numbered or (not skipunnum) then brdna(thefile,theline,pie^.dna); readln(thefile); (* move past the word 'piece' - new definition 1999 Mar 13 *) theline := succ(theline); end; (* end module book.brpiece version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module book.brinit *) procedure brinit(var book: text; var theline: integer); (* check that the book is ok to read, and set up the global variables for br routines *) begin (* brinit *) (* halt if the book is bad (first word is 'halt') or the first character is not * *) reset(book); if not eof(book) then begin (* check for the date line *) if book^ <> '*' then begin if book^ <> 'h' then writeln(output, ' this is not the first line of a book:') else writeln(output, ' bad book:'); write(output, ' '); while not (eoln(book) or eof(book)) do begin write(output, book^); get(book) end; writeln(output); halt end end else begin writeln(output, ' book is empty'); halt end; (* initialize free storage *) freeline:=nil; freedna:=nil; readnumber:=true; (* usually we read in numbers for items *) number:=0; (* arbitrary value *) numbered:=false; (* the piece has no number (none yet read in) *) skipunnum:=false; theline := 1; end; (* brinit *) (* end module book.brinit version = 7.73; {of delmod.p 2008 Jul 29} *) (* ************************************************************************ *) (* end module package.brpiece version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module book.getpiece *) procedure getpiece(var thefile: text; var theline: integer; var pie: pieceptr); (* move to and read in the next piece in the book *) var ch: char; begin ch:=getto(thefile,theline,['p']); (* get to the next p(iece) in the book *) if ch<>' ' then begin brpiece(thefile,theline,pie); { 1999 june 2: removed this: ch:=getto(thefile,theline,['p']); (* read to end of p *) } { bbb - now done in brpiece readln(thefile); (* read past piece *) theline := succ(theline); } end else clearpiece(pie); end; (* end module book.getpiece version = 7.73; {of delmod.p 2008 Jul 29} *) (* ************************************************************************ *) (* end module package.getpiece version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module findblank *) procedure findblank(var afile: text); (* read a file to find the next blank character *) var ch: char; begin repeat read(afile,ch) until ch = ' ' end; (* end module findblank version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module findnonblank *) procedure findnonblank(var afile: text; var ch: char); (* find the next non blank character in a file, return it in ch. *) begin ch:=' '; while (not eof(afile)) and (ch = ' ') do begin read(afile,ch); if eoln(afile) then readln(afile) end end; (* end module findnonblank version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module align.align *) procedure align(var inst, book: text; var theline: integer; var pie: pieceptr; var length, alignedbase: integer); (* documentation on align is in module info.align and delman.use.aligned.books. 1996 Sep 12: The routine now uses the trigger functions found in prgmod. The bug in the oldalign routine (that it misses the end of comments that end in a series of asterisks) has been fixed. It now checks that the piece corresponds to the book. *) const maximumrange = 10000; (* if the alignment point is more than this distance from the piece ends, the program halts in an attempt to catch the alignment bug... 1991 Jan 11 It appears that the rewrite of the code has removed the bug, but the check will be kept. *) semicolon = ';'; (* end of delila instruction *) var ch: char; (* a character in inst *) p: integer; (* index to a piece name *) p1: integer; (* another index to a piece name *) done: boolean; (* done finding an aligning get *) thebase: integer; (* the base read in *) indefault: boolean; (* true when within a default statement. These can contain the word 'piece', which must be ignored. *) gettrigger: trigger; (* trigger to find 'get' *) defaulttrigger: trigger; (* trigger to find 'default' *) nametrigger: trigger; (* trigger to find 'name' *) piecetrigger: trigger; (* trigger to find 'piece' *) settrigger: trigger; (* trigger to find 'set' *) begincomment: trigger; (* trigger to find '(-*' (ignore the dash!) *) endcomment: trigger; (* trigger to find '*-)' (ignore the dash!) *) begincurly: trigger; (* trigger to find comments: '{' *) endcurly: trigger; (* trigger to find comments: '}' *) quote1trigger: trigger; (* trigger to find single quote ' *) quote2trigger: trigger; (* trigger to find double quote " *) dotteddone: boolean; (* a dot '.' has been found in the name - ignore the rest of the name - for comparisons with mutations. *) { procedure rd(var f: text; var ch: char); (* read ch from f allowing inspection of the result *) begin read(f,ch); write(output,ch); write(list,ch); write(output,'<',ch,'>'); end; procedure rdln(var f: text); (* readln f allowing inspection of the result *) begin readln(f); writeln(output); writeln(list); end; } procedure skipcomment(var f: text); (* skip an entire comment *) var comment: boolean; (* true means we are inside a comment *) begin (* skip to end of comment *) resettrigger(endcomment); comment := true; while comment do begin if eof(f) then begin writeln(output,'A comment does not end!'); halt end; if eoln(f) then readln(f) { rdln(f) } else begin {write(output,'<'); rd(f,ch); write(output,'>');} read(f,ch); testfortrigger(ch, endcomment); if endcomment.found then comment := false; end end end; procedure skipcurly(var f: text); (* skip an entire comment made by {}*) var comment: boolean; (* true means we are inside a comment *) begin (* skip to end of comment *) resettrigger(endcurly); comment := true; while comment do begin if eof(f) then begin writeln(output,'A comment does not end!'); halt end; if eoln(f) then readln(f) { rdln(f) } else begin {write(output,'<'); rd(f,ch); write(output,'>');} read(f,ch); testfortrigger(ch, endcurly); if endcurly.found then comment := false; end end end; procedure skipquote(quote: trigger); (* skip an entire quote of either the ' or " persuasion *) var kind: char; (* the kind of quote, ' or " *) begin kind := quote.seek.letters[1]; {writeln(output,'skipquote ',kind);} repeat findnonblank(inst,ch); (* get to the quote *) until (ch = kind) or eof(inst); if ch <> kind then begin writeln(output,'end of quote starting with ',kind,' not found'); halt; end; end; begin filltrigger(defaulttrigger,'default'); filltrigger(gettrigger,'get '); filltrigger(nametrigger,'name '); filltrigger(piecetrigger,'piece '); filltrigger(settrigger,'set '); filltrigger(begincomment,'(* '); filltrigger(endcomment,'*) '); filltrigger(begincurly,'{ '); filltrigger(endcurly,'} '); filltrigger(quote1trigger,''' '); filltrigger(quote2trigger,'" '); resettrigger(defaulttrigger); resettrigger(gettrigger); resettrigger(nametrigger); resettrigger(piecetrigger); resettrigger(settrigger); resettrigger(begincomment); resettrigger(begincurly); resettrigger(quote1trigger); resettrigger(quote2trigger); indefault := false; if not eof(book) then begin (* if there is still more to the book ... *) getpiece(book,theline,pie); (* read in the piece *) if not eof(book) then begin (* if we found a piece ... *) length:=pietoint(pie^.key.pieend,pie); (* calculate piece length *) (* now find in inst the next occurance of 'get' *) done := false; while not done do begin if eof(inst) then begin (* no instructions? *) alignedbase := 1; (* simply align by the first base *) done := true end else begin if eoln(inst) then readln(inst) {then rdln(inst)} else begin {rd(inst,ch);} read(inst,ch); testfortrigger(ch, begincomment); testfortrigger(ch, begincurly); if begincomment.found or begincurly.found then begin if ch = '*' then begin skipcomment(inst); resettrigger(begincomment); end else begin resettrigger(begincurly); skipcurly(inst); end end else begin (* we are not inside a comment *) testfortrigger(ch, gettrigger); if gettrigger.found then begin findnonblank(inst,ch); (* get to "from" *) findblank(inst); (* get past "from" *) read(inst,thebase); (* read in the alignedbase *) {writeln(output);writeln(output,'thebase = ',thebase:1);} alignedbase:=pietoint(thebase,pie); {writeln(output,'alignedbase=',alignedbase:1);} done := true end; testfortrigger(ch, quote1trigger); if quote1trigger.found then begin skipquote(quote1trigger); end; testfortrigger(ch, quote2trigger); if quote2trigger.found then begin skipquote(quote2trigger); end; testfortrigger(ch, defaulttrigger); if defaulttrigger.found then begin indefault := true; resettrigger(defaulttrigger) end; if ch = semicolon then indefault := false; testfortrigger(ch, settrigger); if settrigger.found then begin indefault := true; resettrigger(settrigger) end; if ch = semicolon then indefault := false; (* check that piece names are correct *) testfortrigger(ch, piecetrigger); if not indefault then if piecetrigger.found then begin skipblanks(inst); (* get to name *) with pie^.key.hea.keynam do begin { for p := 1 to length do begin } (* 2007 Jun 22: replace loop with while so that we can drop out when dotted names are detected. *) p := 1; dotteddone := false; while not dotteddone do begin if eoln(inst) then dotteddone := true else begin read(inst,ch); (* ignore names after a dot *) { if ch = '.' then writeln(output,'inst dotteddone'); } if ch = '.' then dotteddone := true; if letters[p] = '.' then dotteddone := true; { if ch = '.' then writeln(output,'book dotteddone'); writeln(output,'BUBBa ch = ',ch,' ',p:1); } {zzz} if (letters[p] <> ch) and (not dotteddone) and (ch <> ';') then begin writeln(output, 'The piece name in the book: '); writeln(output,letters:length); writeln(output,'does not match', ' the inst file piece name:'); (* write the letters that matched: *) for p1 := 1 to p-1 do write(output,letters[p1]); (* write the offending letter: *) write(output, ch); (* get the rest of the name and show it: *) done := eoln(inst); while not done do begin done := eoln(inst); if not done then begin read(inst,ch); if (ch = ' ') or (ch = ';') then done := true; if not done then write(output,ch); end; end; writeln(output); (* mark the first letter that does not match: *) for p1 := 1 to p-1 do write(output,' '); write(output,'^'); writeln(output); halt end; p := p + 1; if p > length then dotteddone := true; end; end end; end; end end end end; if (alignedbase <= -maximumrange) or (alignedbase > length + maximumrange) then begin writeln(output,' In procedure align:'); writeln(output,' read in base was ',thebase:1); writeln(output,' in internal coordinates: ',alignedbase:1); writeln(output,' maximum range was ',maximumrange:1); writeln(output,' piece length was ',length:1); with pie^.key.hea.keynam do writeln(output,' piece name: ',letters:length); writeln(output,' piece number: ',number:1); writeln(output,' aligned base is too far away... see the code'); halt end end end end; (* end module align.align version = 7.73; {of delmod.p 2008 Jul 29} *) (* ************************************************************************ *) (* end module package.align version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module align.maxminalignment *) procedure maxminalignment(var inst, book: text; var theline: integer; var fromparam, toparam: integer; alignmenttype: char); (* prescan the book to find the range over which the pieces of the book are spread, relative to the aligned base. the procedure uses the same variables that align does (so it can call align itself), and it returns the range in fromparam and toparam. alignmenttype: 'f' means alignment by First internal coordinate base, 'b' means alignment by Book, 'i' means alignment by Instructions. *) const maximumrange = 500; (* the maximum size aligned piece; this will presumably catch the alignment bug *) var distance: integer; (* a distance to the aligned base *) pie: pieceptr; length, alignedbase: integer; begin new(pie); (* set an initial range for the two bounds *) fromparam:=+maxint; toparam:=-maxint; reset(book); reset(inst); while not eof(book) do begin case alignmenttype of 'i': align(inst,book,theline,pie,length,alignedbase); 'b','f': begin getpiece(book,theline,pie); (* read in the piece *) length := piecelength(pie); end; end; if not eof(book) then begin case alignmenttype of 'f': begin (* force alignment on first base *) alignedbase := 0; fromparam := 1; distance:=length-alignedbase; if toparam < distance then toparam:=distance; end; 'i': begin (* use the alignedbase from the book *) distance:=1-alignedbase; if fromparam > distance then fromparam:=distance; distance:=length-alignedbase; if toparam < distance then toparam:=distance; end; 'b': begin (* use the internal book *) alignedbase := pietoint(0, pie); distance:=1-alignedbase; if fromparam > distance then fromparam:=distance; distance:=length-alignedbase; if toparam < distance then toparam:=distance; end; end; clearpiece(pie) end end; if toparam - fromparam > maximumrange then begin writeln(output,' WARNING: in procedure maxminalignment:'); writeln(output,' alignedbase = ',alignedbase:1); writeln(output,' fromparameter = ',fromparam:1); writeln(output,' toparameter = ',toparam:1); writeln(output,' this exceeds the maximum range allowed (', maximumrange:1,')'); writeln(output,' see notes in the procedure. '); (* 2008 Jul 29: remove the halt to allow bk2seq to function. *) (* notes: if you desired this range, increase 'maximumrange'. otherwise, this may indicate a bug - either: 1) locate the bug (and tell tom schneider, please...) 2) reduce the size of the fragments, from one or the other end until the bombing is stopped. *) end; (* make the book readable again *) reset(book); reset(inst); dispose(pie) end; (* end module align.maxminalignment version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module align.withinalignment *) function withinalignment(alignedposition, alignedbase, length: integer) :boolean; (* this function tells one if an aligned position, relative to an aligned base in a piece of some length is within the piece. *) var p: integer;(* the position on the piece *) begin p := alignedposition + alignedbase; withinalignment := (p>0) and (p<=length) end; (* end module align.withinalignment version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module book.getbase *) function getbase(position: integer; pie: pieceptr):base; (* Get a base from the position (internal coordinates) of the piece. Protection is made against positions outside the piece. In the case of circles it would be convenient to wrap around when requests are off the end. So the routine will do a modular wrap for positions outside the range 1 to the length. This is a new feature as of 2000 March 22. *) var workdna: dnaptr; (* pointer to the dna part of pie *) p: integer; (* current count of bases into the workdna *) spot: integer; (* the last base of the dna part *) thelength: integer; (* the length of the piece *) begin { writeln(output,'NEW getbase: position=',position:1,'^^^^^^^^^^^^^^^^^^^^'); } (* handle cases of position out of range by circular wrapping *) thelength := piecelength(pie); while position < 1 do position := position + thelength; while position > thelength do position := position - thelength; workdna:=pie^.dna; p:=workdna^.length; while position > p do begin { writeln(output,' workdna^.length=',workdna^.length:1); } workdna := workdna^.next; if workdna = nil then begin writeln(output,'error in function getbase!'); halt end; p := p + workdna^.length; end; { writeln(output,'p=',p:1); } if workdna = nil then begin writeln(output,'error in getbase: request off end of piece'); halt end else begin spot := workdna^.length - (p-position); { writeln(output,'spot=',spot:1); showdnasegment(output,workdna, spot); } if (spot <= 0) then begin writeln(output,'error in getbase, spot (= ',spot:1, ') must be positive'); halt end; if (spot > workdna^.length) then begin writeln(output,'error in getbase, spot (=',spot:1, ') must be less than length (=',workdna^.length:1,')'); halt end; { writeln(output,'base = ', workdna^.part[spot]); } getbase:=workdna^.part[spot] end end; (* end module book.getbase version = 7.73; {of delmod.p 2008 Jul 29} *) (* begin module package.numbar *) (* ************************************************************************ *) (* begin module numberdigit *) function numberdigit(number, logplace:integer): char; (* return the digit at the place value ('logplace') position of number. example: numberdigit(13625, 3) = 3 numberdigit(13625, 4) = 1 *) var place: integer; (* the exponent of logplace *) count: integer; (* used to make place *) absolute: integer; (* the absolute value of number *) acharacter: char; (* the character to be returned *) procedure digit; (* extract a digit at the place position *) var tenplace: integer; (* ten times place *) z: integer; (* an intermediate value *) d: integer; (* the digit extracted *) begin (* digit *) tenplace:=10*place; z:=absolute-((absolute div tenplace)*tenplace); if place = 1 then d:=z else d:= z div place; case d of 0: acharacter:='0'; 1: acharacter:='1'; 2: acharacter:='2'; 3: acharacter:='3'; 4: acharacter:='4'; 5: acharacter:='5'; 6: acharacter:='6'; 7: acharacter:='7'; 8: acharacter:='8'; 9: acharacter:='9'; end end; (* digit *) procedure sign; (* put a negative sign out or a positive sign *) begin (* sign *) if number <0 then acharacter:='-' else acharacter:='+' end; (* sign *) begin (* numberdigit *) place:=1; for count:=1 to logplace do place:=10*place; if number=0 then begin if place=1 then acharacter:='0' else acharacter:=' ' end else begin absolute:=abs(number); if absolute < (place div 10) then acharacter:=' ' else if absolute >= place then digit else sign end; numberdigit:=acharacter end; (* numberdigit *) (* end module numberdigit version = 'prgmod 3.96 85 mar 18 tds'; *) (* begin module numbersize *) function numbersize(n: integer):integer; (* calculate amount of space to be reserved for the integer n *) const ln10 = 2.30259; (* natural log of 10 - for conversion to log base 10 *) epsilon = 0.00001; (* a small number to correct log base 10 errors *) begin (* numbersize *) if n = 0 then numbersize:=1 else numbersize:=trunc(ln(abs(n))/ln10 + epsilon) + 2; (* the epsilon assures that we do not lose a place due to roundoff. eg, sometimes log base 10 of 10 would be 0.9999 instead of 1, and we would not do it right... note: this will fail for very large numbers on the order of 1/epsilon. *) (* the 2 is for the sign and last digit *) end; (* numbersize *) (* end module numbersize version = 'prgmod 3.96 85 mar 18 tds'; *) (* begin module numberbar *) procedure numberbar(var afile: text; spaces, firstnumber, lastnumber: integer; var linesused: integer); (* write a bar of numbers to a file, with several spaces before. the number of lines used is returned *) var logplace: integer; (* the log of the digit being looked at *) spacecount: integer; (* count of spaces *) number: integer; (* the current number being written *) begin if abs(firstnumber) > abs(lastnumber) then linesused:= numbersize(firstnumber) else linesused:= numbersize(lastnumber); for logplace:=linesused-1 downto 0 do begin for spacecount:=1 to spaces do write(afile,' '); for number:=firstnumber to lastnumber do write(afile,numberdigit(number,logplace)); writeln(afile) end end; (* end module numberbar version = 'prgmod 3.96 85 mar 18 tds'; *) (* ************************************************************************ *) (* end module package.numbar version = 'prgmod 3.96 85 mar 18 tds'; *) (* begin module pnumberbar *) procedure pnumberbar(var afile: text; spaces, firstnumber, lastnumber: integer; var linesused: integer); (* write a bar of numbers to a file, with several spaces before. the number of lines used is returned. In this variant of numberbar, lines are to be written in postscript *) var logplace: integer; (* the log of the digit being looked at *) spacecount: integer; (* count of spaces *) number: integer; (* the current number being written *) begin if abs(firstnumber) > abs(lastnumber) then linesused:= numbersize(firstnumber) else linesused:= numbersize(lastnumber); for logplace:=linesused-1 downto 0 do begin write(afile,'('); for spacecount:=1 to spaces do write(afile,' '); for number:=firstnumber to lastnumber do write(afile,numberdigit(number,logplace)); writeln(afile,') sn'); end end; (* end module pnumberbar *) procedure getname(var namebook: text; nametype: char; var typefound: char; var aname: name; var orgopen: boolean; var theline : integer); (* get to the next name in name book, looking for an organism or the type defined by nametype. orgopen keeps track of the organism. *) (* getname should eventually be replaced by a procedure that reads instructions rather than a book. this procedure is based on getocp. *) var ch: char; (* for using the getto procedure *) procedure obtain; (* obtain the name. *) begin brname(namebook, theline, aname); typefound:=ch end; begin typefound:='.'; repeat ch:=getto(namebook, theline, ['o', nametype]); if ch<> ' ' then case ch of 'o': begin orgopen:=not orgopen; if orgopen then obtain end; 'g', 't', 'p': begin if ch = nametype then obtain; (* skip the rest of the object: *) ch:=getto(namebook, theline, [ch]) end end else typefound:=ch until typefound in [' ', 'o', nametype] end; procedure printname(var namebook: text; nametype: char; namewidth: integer; var tofile, ctofile: text; var orgopen: boolean; var linenumber, clinenumber: integer; var theline : integer); (* print names of items in the book from namebook to tofile. organism is always printed, and one other type specified by nametype (one of 'g', 't', 'p') is printed namewidth wide. orgopen is used to keep track of the organism. *) var aname: name; (* the name found *) typefound: char; (* the type of the name *) index: integer; (* for printing the name *) begin repeat getname(namebook, nametype, typefound, aname, orgopen, theline); if typefound <> ' ' then begin case typefound of 'o': begin writeln(tofile); writeln(tofile, ' organism ', aname.letters); writeln(ctofile); writeln(ctofile, ' organism ', aname.letters); linenumber:=linenumber+2; (* two lines written *) clinenumber:=clinenumber+2 (* two lines written *) end; 'g', 't', 'p': begin write(tofile, ' '); write(ctofile, ' '); for index:=1 to namewidth do write(tofile, aname.letters[index]); for index:=1 to namewidth do write(ctofile, aname.letters[index]) end end end until typefound in [' ', nametype] end; (* begin module makelogo.protectpostscript *) procedure protectpostscript(var afile: text; c: char); (* Special characters must be protected against! Put out a protective backslash for character c which would otherwise destroy the PostScript interpreter. The parenthesis is used in PostScript to indicate the bounds of a string, while the percent is the comment character. The backslash also needs protection, since it is the escape to indicate that the next character is part of the string. *) begin if c in ['(',')','%','\'] then write(afile,'\'); end; (* end module makelogo.protectpostscript *) (* begin module copynoreturn *) procedure copynoreturn(var fin, fout: text); (* copy a line from file fin to file fout but don't put a carriage return *) begin (* copynoreturn *) while not eoln(fin) do begin fout^ := fin^; put(fout); get(fin) end; readln(fin); (* writeln(fout); *) end; (* copynoreturn *) (* end module copynoreturn *) procedure themain(var inst, book, bk2seqp, alprop, protseq: text); var alignpoint: integer; (* point of the sequence that should be the 0 alignment coordinate *) index: integer; (* a loop control variable *) outtype: char; (* p: protseq format generated, f: fasta format generated *) theline: integer; (* the line in the book *) alignmenttype: char; (* type of alignment: 'f' means alignment by First internal coordinate base, 'b' means alignment by Book, 'i' means alignment by Instructions. *) begin writeln(output,'bk2seq ',version:4:2); rewrite(alprop); rewrite(protseq); reset(bk2seqp); if eof(bk2seqp) then outtype := 'p' else outtype := 'f'; new(apiece); reset(inst); brinit(book, theline); alignmenttype := 'i'; if outtype = 'p' then begin writeln(protseq,'* bk2seq ',version:4:2); write(protseq, '* '); end else begin write(protseq,'> bk2seq ',version:4:2); end; copyaline(book, protseq); reset(book); maxminalignment(inst, book, theline, fromparam, toparam, alignmenttype); fromdo := fromparam; todo := toparam; userrange := false; if outtype = 'p' then begin writeln(protseq, '* The book is from: ', fromparam:1, ' to ', toparam:1); writeln(protseq, '* Alist list is from: ', fromdo:1, ' to ', todo:1); end; alignpoint := 1 - fromparam; if alignpoint < 1 then begin writeln(output,'WARNING'); writeln(output,'Something may be wrong with the alignment of the book.'); writeln(output,'The alignment is outside the pieces.'); end; writeln(alprop,alignpoint:1,' alignment point for the alpro program'); while not eof(book) do begin align(inst, book, theline, apiece, length, alignedbase); if not eof(book) then begin if outtype = 'p' then begin write(protseq,'* ') end else write(protseq,'> '); { for index := 1 to namelength do write(protseq,apiece^.key.hea.keynam.letters[index]); } index := 1; while (apiece^.key.hea.keynam.letters[index] <> ' ') and (index < namelength) do begin write(protseq,apiece^.key.hea.keynam.letters[index]); index := index + 1; end; write(protseq,' ',inttopie(alignedbase,apiece):5); if numbered then write(protseq, ' ', number:5, ' '); writeln(protseq); column := 0; for index := fromdo to todo do begin if withinalignment(index, alignedbase, length) then begin b := basetochar(getbase(index+alignedbase, apiece)); write(protseq,b); end else begin write(protseq, '-'); end; column := column + 1; if (column mod symbolsperline) = 0 then begin column := 0; writeln(protseq); end; end; if outtype = 'p' then begin write(protseq,'.'); end; (* fasta format does not have an end of sequence mark! *) writeln(protseq); clearpiece(apiece); end end; end; begin themain(inst, book, bk2seqp, alprop, protseq); 1: end. (* bk2seq *)