program rep(hlist, xbook, ybook, fout, pout, repp, output); (* rep: repeated sequences in two books Britta Singer and Lane Wyatt copyright (c) 1985 module libraries required: delman, delmods, auxmods, prgmods *) label 1; (* end of program *) const (* begin module version *) version = 1.75; (* of rep.p 1995 July 26 last major change: 1985 apr 26 date of origin: september 1983 *) (* end module version *) (* begin module describe.rep *) (* name rep: records repeats between sequences in two books synopsis rep(hlist: in, xbook: in, ybook: in, fout: out, pout: out, repp: in, output: out) files hlist: a list of helices for xbook and ybook generated by the program helix. xbook: a book from the delila system. ybook: a book from the delila system. fout: a file containing the following information about each repeated sequence that satisfies the criteria of repp: * the 5 prime ends of the two occurrences of the repeat. * rlength, length of the repeated sequence. * distance: if direct repeats, the number of bases from five prime end to five prime end of each repeat; if inverted repeats, the number of bases from three prime end to five prime end (i.e., pseudo-loop distance). in every case, the smallest possible distance is given. pout: a file containing information about palindromes (only filled when inverted repeats are found in related sequences, see below). repp: input parameter file, must contain 3 characters, one per line. this may be followed by 4 integers, one per line. * mode of repeat: d = direct repeat (xbook and ybook have opposite directions) i = inverted repeat (xbook and ybook are in the same direction) * the types of xbook and ybook used in helix program: u = unrelated (any two sequences - no distances are calculated) r = related (sequences derived from the same piece of dna. the coordinate numbering of both books must be the same in order to calculate distances.) * the energies of hlist reflect the composition of the repeat e = "energies" are to be reported n = no "energies" are to be reported * minimum number of bases in a repeat to be recorded. * maximum number of bases in a repeat to be recorded. * minimum distance between repeated sequences to be recorded. * maximum distance between repeated sequences to be recorded. output: messages to the user. description rep uses information generated by the helix program to record the occurrences of repeated sequences of dna. helices are interpreted as repeats, direct or inverted depending upon the input sequences. repeats that meet the criteria of minimum length and minimum and/or maximum distance between half repeats are reported in fout. palindromes are reported in pout. see also helix.p, matrix.p, keymat.p author Britta Singer and Lane Wyatt bugs 1. when xbook and ybook have sequence in common, hlist reports each "helix" twice. rep is able to eliminate duplicates only when xbook and ybook overlap completely. thus in cases of partial overlap, some repeats may be duplicated. 2. rep uses external coordinates to calculate distances and will bomb with complicated coordinates. *) (* end module describe.rep *) (* begin module rep.const *) headerlines = 7; (* the number of lines of the header in hlist *) defdmin = 0; (* default value for minimum distance between the five prime ends of the two repeated sequences *) defdmax = maxint; (* default value for maximum distance between the five prime ends of the two repeated sequences *) defminlength = 3; (* default value for minimum length of repeated sequences *) defmaxlength = maxint; (* default value for maximum length of repeated sequences *) debugging = true; (* allows one to trace bugs in program *) prime = '"'; (* allows one to write five prime in symbols *) (* end module rep.const version = 1.63; (* of rep 1984 june 5 *) (* begin module book.const *) (* constants needed for book manipulations *) dnamax = 3000; (* length of dna arrays *) namelength = 20; (* maximum key name length *) linelength = 80; (* maximum line readable in book *) (* end module book.const version = 'delmod 6.51 85 apr 17 tds/gds' *) type (* begin module rep.type *) prec = record (* parameters from repp *) mode: char; (* type of repeat *) typbk: char; (* relation of books in hlist *) energy: char; (* indicates if energies (so-called) of repeats (helices) are to be recorded *) minlength: integer; (* minimum number of bases in repeat *) maxlength: integer; (* maximum number of bases in repeat *) dmax: integer; (* maximum distance between repeats *) dmin: integer; (* minimum distance between repeats *) end; (* prec *) reprec = record (* contains processed information about repeat *) x5p : integer; (* location of the first base in the first occurance of the repeated sequence *) y5p : integer; (* location of the first base in the second occurance of repeated sequence *) rlength : integer; (* number of bases in repeated sequence *) distance : integer; (* distance in bases between five prime ends of direct repeats or between the closest three prime and five prime ends of inverted repeats *) deltagee : real (* "energies" of helices from hlist *) end; (* reprec *) (* end module rep.type version = 1.63; (* of rep 1984 june 5 *) (* begin module book.type *) (* types needed for book manipulations *) chset = set of 'a'..'z'; (* types defined in book definition *) alpha = packed array[1..namelength] of char; (* this is not alfa *) (* name is a left justified string with blanks following the characters *) name = record letters: alpha; length: 0..namelength (* zero means an unspecified structure *) end; lineptr = ^line; line = record (* a line of characters *) letters: packed array [1..linelength] of char; length: 0..linelength; next: lineptr end; direction = (plus, minus, dircomplement, dirhomologous); configuration = (linear, circular); state = (on, off); header = record (* header of key *) keynam: name; (* key name of structure *) fulnam: lineptr; (* full name of structure *) note: lineptr (* note key *) end; (* base types *) base = (a,c,g,t); dnaptr = ^dnastring; dnarange = 0..dnamax; seq = packed array[1..dnamax] of base; dnastring = record part: seq; length: dnarange; next: dnaptr end; orgkey = record (* organism key *) hea: header; mapunit: lineptr (* genetic map units *) end; chrkey = record (* chromosome key *) hea: header; mapbeg: real; (* number of genetic map beginning *) mapend: real (* number of genetic map ending *) end; pieceptr = ^piece; piekey = record (* piece key *) hea: header; mapbeg: real; (* genetic map beginning *) coocon: configuration; (* configruation (circular/linear) *) coodir: direction; (* direction (+/-) relative to genetic map *) coobeg: integer; (* beginning nucleotide *) cooend: integer; (* ending nucleotide *) piecon: configuration; (* configruation (circular/linear) *) piedir: direction; (* direction (+/-) relative to coordinates *) piebeg: integer; (* beginning nucleotide *) pieend: integer; (* ending nucleotide *) end; piece = record key: piekey; dna: dnaptr end; reference = record pienam : name; (* name of piece referred to *) mapbeg : real; (* genetic map beginning *) refdir : direction; (* direction relative to coordinates *) refbeg : integer; (* beginning nucleotide *) refend : integer; (* ending nucleotide *) end; genkey = record (* gene key *) hea : header; ref : reference; end; trakey = record (* transcript key *) hea : header; ref : reference; end; markerptr = ^marker; markey = record (* marker key *) hea : header; ref : reference; sta : state; phenotype : lineptr; next : markerptr; end; marker = record key : markey; dna : dnaptr; end; (* end module book.type version = 'delmod 6.51 85 apr 17 tds/gds' *) var (* begin module rep.var *) (* files for rep program *) xbook, (* one book *) ybook, (* the other book *) hlist, (* a list of helices from program helix *) fout, (* a list of repeated sequences *) pout, (* a list of palindromes *) repp (* parameters for the program *) : text; (* end module rep.var version = 1.63; (* of rep 1984 june 5 *) (* begin module book.var *) (* ************************************************************************ *) (* global variables needed for book manipulations *) (* free storage: *) freeline: lineptr; (* unused lines *) freedna: dnaptr; (* unused dnas *) readnumber: boolean; (* whether to read a number from the notes, or to read in the notes *) number: integer; (* the number of the item just read *) numbered: boolean; (* true when the item just read is numbered *) skipunnum: boolean; (* a control variable to allow skipping of un-numbered items in the book *) (* ************************************************************************ *) (* end module book.var version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module package.primitive *) (* ************************************************************************ *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module copyaline *) procedure copyaline(var fin, fout: text); (* copy a line from file fin to file fout *) begin (* copyaline *) while not eoln(fin) do begin fout^ := fin^; put(fout); get(fin) end; readln(fin); writeln(fout); end; (* copyaline *) (* end module copyaline version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module copylines *) function copylines(var fin, fout: text; n: integer): integer; (* copy n lines of file fin to file fout. the actual number of lines copied is returned. *) var index: integer; (* the current line number *) begin (* copylines *) index := 0; while (not eof(fin)) and (index < n) do begin copyaline(fin, fout); index := succ(index) end; copylines := index end; (* copylines *) (* end module copylines version = 'delmod 6.51 85 apr 17 tds/gds' *) (* ************************************************************************ *) (* end module package.primitive version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module package.getpiece *) (* ************************************************************************ *) (* begin module package.brpiece *) (* ************************************************************************ *) (* begin module book.basis *) (* procedures needed for book manipulations *) (* get procedures should be used for all linked lists of records *) procedure getline(var l: lineptr); (* obtain a line from the free line list or by making a new one *) begin if freeline<>nil then begin l:=freeline; freeline:=freeline^.next end else new(l); l^.length:=0; l^.next:=nil end; procedure getdna(var l: dnaptr); begin if freedna<>nil then begin l:=freedna; freedna:=freedna^.next end else new(l); l^.length:=0; l^.next:=nil end; (* clear procedures should be called each time the records are no longer needed failure to do this may result in a stack overflow. *) procedure clearline(var l: lineptr); (* return a line to the free line list *) var lptr: lineptr; begin if l<>nil then begin lptr:=l; l:=l^.next; lptr^.next:=freeline; freeline:=lptr end end; procedure cleardna(var l: dnaptr); var lptr: dnaptr; begin if l<>nil then begin lptr:=l; l:=l^.next; lptr^.next:=freedna; freedna:=lptr end end; procedure clearheader(var h: header); (* clear the header h (remove lines to free storage) *) begin with h do begin clearline(fulnam); while note<>nil do clearline(note) end end; procedure clearpiece(var p: pieceptr); (* clear the dna of the piece *) begin while p^.dna<>nil do cleardna(p^.dna); clearheader(p^.key.hea) end; function chartobase(ch:char):base; (* convert a character into a base *) begin case ch of 'a': chartobase:=a; 'c': chartobase:=c; 'g': chartobase:=g; 't': chartobase:=t end end; function basetochar(ba:base):char; (* convert a base into a character *) begin case ba of a: basetochar:='a'; c: basetochar:='c'; g: basetochar:='g'; t: basetochar:='t'; end end; function complement(ba:base):base; (* take the complement of ba *) begin case ba of a: complement:=t; c: complement:=g; g: complement:=c; t: complement:=a; end end; function pietoint(p: integer; pie: pieceptr): integer; (* p is a coordinate on the piece. we want to transform p into a number from 1 to n: an internal coordinate system for easy manipulation of piece coordinates *) var i: integer; (* an intermediate value *) begin with pie^.key do begin case piedir of plus: if p>=piebeg then i:=p-piebeg+1 else i:=(p-coobeg)+(cooend-piebeg)+2; minus: if p<=piebeg then i:=piebeg-p+1 else i:=(cooend-p)+(piebeg-coobeg)+2 end; pietoint:=i end end; function inttopie(i: integer; pie: pieceptr):integer; (* i is in the range 1 to some maximum. it is an internal coordinate system for the program. we want to do a coordinate transformation to obtain a value in the range of the piece called pie: i=1 corresponds to piebeg and i=its maximum corresponds to pieend *) var p: integer; (* an intermediate value *) begin with pie^.key do begin case piedir of plus: begin p:=piebeg+(i-1); if p>cooend then if coocon=circular then p:=p-(cooend-coobeg+1) end; minus: begin p:=piebeg-(i-1); if p '*' then begin writeln(output,' procedure skipstar: bad book'); writeln(output,' "*" expected as first character on the line, but "', thefile^,'" was found'); halt end; get(thefile); (* skip the star *) if thefile^ <> ' ' then begin writeln(output,' procedure skipstar: bad book'); writeln(output,' "* " expected on a line but "*', thefile^,'" was found'); halt end; get(thefile) (* skip the blank *) end; (* skipstar *) (* end module book.skipstar version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module book.brreanum *) procedure brreanum(var thefile: text; var reanum: real); (* read a real number from the file *) begin skipstar(thefile); readln(thefile,reanum); end; (* end module book.brreanum version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module book.brnumber *) procedure brnumber(var thefile: text; var num: integer); (* read a number from the file *) begin skipstar(thefile); readln(thefile,num) end; (* end module book.brnumber version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module book.brname *) procedure brname(var thefile: text; var nam: name); (* read a name from the file *) var i: integer; (* an index to the name *) c: char; (* a character read *) begin (* brname *) skipstar(thefile); with nam do begin length:=0; repeat length:=succ(length); read(thefile,c); letters[length] := c until (eoln(thefile)) or (length>=namelength) or (letters[length]=' '); if letters[length]=' ' then length:=length-1; if length 'n' then begin skipstar(thefile); if not eoln(thefile) then begin if thefile^ = '#' then begin numbered := true; get(thefile); (* move past the number symbol *) read(thefile,number); end end; repeat readln(thefile) until thefile^ = 'n'; readln(thefile) end else readln(thefile) end end; (* brnotenumber *) (* end module book.brnotenumber version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module book.brnote *) procedure brnote(var thefile: text; var note: lineptr); (* read note key *) var newnote: lineptr; (* the new note *) previousnote: lineptr; (* the last line of the notes *) begin (* brnote *) note:=nil; if thefile^ = 'n' then begin (* enter note *) readln(thefile); if thefile^ <> 'n' then begin (* abort null note (n/n) *) getline(note); newnote:=note; while thefile^ <> 'n' do begin (* wait until end of note *) brline(thefile,newnote); previousnote:=newnote; (* get next note *) getline(newnote^.next); newnote:=newnote^.next; end; (* last note was not used, so: *) clearline(newnote); previousnote^.next:=nil; readln(thefile) end else readln(thefile) end end; (* brnote *) (* end module book.brnote version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module book.brheader *) procedure brheader(var thefile: text; var hea: header); (* read the header of a key. *) begin with hea do begin (* read key name *) brname(thefile,keynam); (* read full name *) getline(fulnam); brline(thefile,fulnam); (* read note key *) if readnumber then brnotenumber(thefile,note) else brnote(thefile,note) end end; (* end module book.brheader version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module book.brpiekey *) procedure brpiekey(var thefile: text; var pie: piekey); (* read piece key *) begin with pie do begin brheader(thefile,hea); brreanum(thefile,mapbeg); brconfig(thefile,coocon); brdirect(thefile,coodir); brnumber(thefile,coobeg); brnumber(thefile,cooend); brconfig(thefile,piecon); brdirect(thefile,piedir); brnumber(thefile,piebeg); brnumber(thefile,pieend); end end; (* end module book.brpiekey version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module book.brdna *) procedure brdna(var thefile: text; var dna: dnaptr); (* read in dna from thefile *) (* note: if the dna were circularized, by linking the last dnastring to the first, then the cleardna routine could not clear properly, and would loop forever... there is no reason to do that, since a simple mod function will allow one to access the circle. *) var ch: char; workdna: dnaptr; begin getdna(dna); workdna:=dna; ch:=getto(thefile,['d']); read(thefile,ch); (* skipstar *) while (ch = '*') do begin read(thefile,ch); (* skip blank *) repeat read(thefile,ch); if ch in ['a','c','g','t'] then begin if workdna^.length=dnamax then begin getdna(workdna^.next); workdna:=workdna^.next end; workdna^.length:=succ(workdna^.length); workdna^.part[workdna^.length]:=chartobase(ch) end until eoln(thefile); readln(thefile); (* go to next line *) read(thefile,ch); (* ch is either '*' or 'd' *) end; readln(thefile) end; (* end module book.brdna version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module book.brpiece *) procedure brpiece(var thefile: text; var pie: pieceptr); (* read in a piece *) begin brpiekey(thefile,pie^.key); if numbered or (not skipunnum) then brdna(thefile,pie^.dna) end; (* end module book.brpiece version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module book.brinit *) procedure brinit(var book: text); (* check that the book is ok to read, and set up the global variables for br routines *) begin (* brinit *) (* halt if the book is bad (first word is 'halt') or the first character is not * *) reset(book); if not eof(book) then begin (* check for the date line *) if book^ <> '*' then begin if book^ <> 'h' then writeln(output, ' this is not the first line of a book:') else writeln(output, ' bad book:'); write(output, ' '); while not (eoln(book) or eof(book)) do begin write(output, book^); get(book) end; writeln(output); halt end end else begin writeln(output, ' book is empty'); halt end; (* initialize free storage *) freeline:=nil; freedna:=nil; readnumber:=true; (* usually we read in numbers for items *) number:=0; (* arbitrary value *) numbered:=false; (* the piece has no number (none yet read in) *) skipunnum:=false; end; (* brinit *) (* end module book.brinit version = 'delmod 6.51 85 apr 17 tds/gds' *) (* ************************************************************************ *) (* end module package.brpiece version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module book.getpiece *) procedure getpiece(var thefile: text; var pie: pieceptr); (* move to and read in the next piece in the book *) var ch: char; begin ch:=getto(thefile,['p']); (* get to the next p(iece) in the book *) if ch<>' ' then begin brpiece(thefile,pie); ch:=getto(thefile,['p']); (* read past closing p *) end end; (* end module book.getpiece version = 'delmod 6.51 85 apr 17 tds/gds' *) (* ************************************************************************ *) (* end module package.getpiece version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module findcolon *) procedure findcolon(var thefile: text); (* move the file to the characters just past ': ' (colon, space) *) var found: boolean; procedure die; begin writeln(output,' no helix list data'); halt end; begin found:=false; while not found do begin if eof(thefile) then die else if thefile^ = ':' then begin get(thefile); if eof(thefile) then die else if thefile^ = ' ' then begin get(thefile); found:=true end end else get(thefile) end end; (* end module findcolon version = 'auxmod 1.37 85 apr 4 gds/tds'; *) (* begin module gethelix *) procedure gethelix(var hlist: text; var x,y: integer; var length: integer; getenergy: boolean; var energy: real; var done: boolean); (* get the (x,y,length) info on some helix listed in hlist. if the x,y pair changes, then done is true and no values are returned. checknames may be used at this point to start the next pair. if getenergy is true, then the routine expects that there will be an energy on the helix line *) begin (* gethelix *) if eof(hlist) then done:=true else begin get(hlist); (* skip the blank *) if hlist^ <> ' ' then done:=true else begin done:=false; findcolon(hlist); read(hlist,x); findcolon(hlist); read(hlist,y); findcolon(hlist); read(hlist,length); if getenergy then read(hlist, energy) else energy := 0; readln(hlist) end end end; (* gethelix *) (* end module gethelix version = 'auxmod 1.37 85 apr 4 gds/tds'; *) (* begin module rep.initbook *) procedure initbook(var xbook, ybook: text; (* two delila books *) var xseq, yseq: pieceptr); (* pointers for pieces in the books *) (* initialize the books and the pointers for pieces *) begin (* initbook *) brinit(xbook); brinit(ybook); new(xseq); new(yseq); end; (* initbook *) (* end module rep.initbook version = 1.63; (* of rep 1984 june 5 *) (* begin module complines *) function complines(var a,b: text): boolean; (* compare one line from each of files a and b starting at the current file positions. both lines are compared up to and including the end-of-line mark (i.e., they are both readln-ed). return true if the lines are identical and false if they are not. ignore trailing blanks. this is done by letting chara or charb assume the value of a blank when eoln is true. when both lines are eoln, the next line is inspected. if one line is at end-of-file and the other is not, 'false' is returned. if both lines are at eof, 'true' is returned. *) var chara,charb: char; (* characters in files a and b *) done: boolean; (* stop looking for identity *) begin if eof(a) and eof(b) then complines := true else if eof(a) or eof(b) then complines := false else begin (* neither file is eof. *) done := false; while not done do begin if eoln(a) then chara := ' ' else read(a,chara); if eoln(b) then charb := ' ' else read(b,charb); if eoln(a) and eoln(b) then begin done := true; complines := true end else if chara <> charb then begin done := true; complines := false end; end; readln(a); readln(b); end end; (* complines *) (* end module complines version = 'prgmod 3.96 85 mar 18 tds'; *) (* begin module hlistvsbooks *) procedure hlistvsbooks (var hlist : text; (* list of helices from helix *) var xbook,ybook : text (* the delila books *) ); (* checks the title line in xbook with the corresponding line in hlist, ditto ybook. if they are not identical, error messages are sent to output and the program halts. written by britta singer *) var xasx, (* true if xbook input file corresponds to xbook in hlist *) yasy, (* true if ybook input file corresponds to ybook in hlist *) xasy, (* true if xbook input file corresponds to ybook in hlist *) yasx: (* true if ybook input file corresponds to xbook in hlist *) boolean; procedure gettitleline(var xbook,ybook,hlist : text); (* gets to the lines in the xbook, ybook, and hlist input files that contain the titles and thus are to be compared *) begin (* gettitleline *) reset(xbook); reset(ybook); reset(hlist); readln(hlist) end; (* gettitleline *) procedure findtitle(var hlist : text); (* places the cursor at the beginning of the title in hlist *) var n : integer; (* counter for skipping characters *) begin (* findtitle *) for n := 1 to 4 do get(hlist) end; (* findtitle *) begin (* hlistvsbooks *) gettitleline(xbook,ybook,hlist); findtitle(hlist); xasx := complines(xbook,hlist); findtitle(hlist); yasy := complines(ybook,hlist); gettitleline(xbook,ybook,hlist); findtitle(hlist); yasx := complines(ybook,hlist); findtitle(hlist); xasy := complines(xbook,hlist); if not xasx then writeln(output, ' xbook input file does not correspond to xbook in hlist.'); if not yasy then writeln(output, ' ybook input file does not correspond to ybook in hlist.'); if xasy and (not xasx) then writeln(output, ' xbook input file corresponds to ybook in hlist.'); if yasx and (not yasy) then writeln(output, ' ybook input file corresponds to xbook in hlist.'); if xasy and yasx and (not xasx) and (not yasy) then writeln(output, ' xbook and ybook input files are reversed. try again.'); if not (xasx and yasy) then halt end; (* hlistvsbooks *) (* end module hlistvsbooks version = 'auxmod 1.37 85 apr 4 gds/tds'; *) (* begin module rep.readparameters *) procedure readparameters (var repp : text; (* parameter file *) var para: prec); (* parameters from repp *) (* read in parameters, set default values *) begin reset(repp); if eof(repp) (* is repp file empty *) then begin writeln(output,' the repp input file is empty.'); halt end; readln(repp, para.mode); (* type of repeat *) if (para.mode <> 'd') and (para.mode <> 'i') then begin (* direct or inverted *) writeln (output, ' the mode parameter in repp should be d(irect) or i(nverted)'); halt; end; (* if *) readln(repp, para.typbk); (* relation of xbook to ybook in hlist *) if (para.typbk <> 'r') and (para.typbk <> 'u') then begin (* related or unrelated *) writeln(output, ' type of book parameter in repp should be r(elated)', ' or u(nrelated)'); halt end; (* if *) readln(repp,para.energy); (* will energies be recorded in fout? *) if (para.energy <> 'e') and (para.energy <> 'n') then begin (* energies reported *) writeln(output,' the third line of repp should have'); writeln(output,' e(nergies reported) or n(o energies reported)'); halt end; (* if *) (* read minimum helix length, maximum length, minimum distance, and maximum distance if specified by repp. otherwise, assign default values. *) if eof(repp) then para.minlength := defminlength else readln(repp,para.minlength); if eof(repp) then para.maxlength := defmaxlength else readln(repp,para.maxlength); if eof(repp) then para.dmin := defdmin else readln(repp,para.dmin); if eof(repp) then para.dmax := defdmax else readln(repp,para.dmax) end; (* readparameters *) (* end module rep.readparameters version = 1.63; (* of rep 1984 june 5 *) (* begin module rep.checkhlistparams *) procedure checkhlistparams (var hlist : text; (* list of helices from helix *) var para: prec; (* parameters from repp *) var getenergy: boolean); (* will energies be recorded? *) (* skips text in header of helix program until it gets to the minimum length. reads in minlength, compares it to the desired minimum length from repp, keeps the larger. it evaluates whether gu pairs are allowed and if energies are called for by repp. it returns a value for getenergy for use in gethelix. *) var i: integer; (* loop control variable *) helminlength: integer; (* minimum length of repeat from helix program *) blank: char; (* for skipping the blank at the beginning of the line *) gupairs: char; (* 'g' if gu pairs are allowed in hlist *) energycalculated: char; (* were energies calculated in hlist? *) begin (* checkhlistparams *) reset(hlist); for i:= 1 to 3 do readln(hlist); read(hlist,helminlength); if helminlength > para.minlength then begin writeln(output,' the minimum length in repp is less than in hlist'); writeln(output,' program will continue with minlength equal to the'); writeln(output,' minimum length from the helix program.'); para.minlength := helminlength end; readln(hlist); read(hlist,blank); read(hlist,gupairs); if gupairs = 'g' then if para.mode = 'i' then begin writeln(output,' gu pairs are allowed in hlist.'); writeln(output,' is that what you intended?'); writeln(output,' the program will run even if that is not', ' what you wanted. ') end else begin writeln(output,' your hlist has gu pairs. analysis of direct'); writeln(output,' repeats cannot use an hlist that allows gupairs.'); (* this is because the helices do not necessarily come in pairs when gu pairs are allowed, because of the asymmetry of the gu pair. that is, g in one strand can form a gu pair, but c in the complement cannot. *) halt end; readln(hlist); read(hlist,blank); read(hlist,energycalculated); if (energycalculated = 'e') and (para.energy = 'e') then getenergy := true else getenergy := false; readln(hlist); end; (* end module rep.checkhlistparams version = 1.63; (* of rep 1984 june 5 *) (* begin module rep.foutheading *) procedure foutheading(var hlist: text; (* a list of helices from helix *) var fout: text; (* output file*) var getenergy: boolean; (* true if 'energies' will be reported *) var para: prec); (* parameters from repp *) (* heading for fout file that contains repeat information *) var line: integer; (* loop control variable *) begin rewrite(fout); reset(hlist); writeln(fout,' rep ',version:4:2); for line:=1 to headerlines do begin write(fout,' '); copyaline(hlist,fout) end; case para.mode of 'i': writeln(fout,' i : mode, inverted repeats found'); 'd': writeln(fout,' d : mode, direct repeats found'); end; case para.typbk of 'u': writeln(fout,' u : xbook and ybook are unrelated'); 'r': writeln(fout,' r : xbook and ybook are related'); end; case para.energy of 'e': if getenergy then writeln(fout,' e : "energies" reported in fout') else writeln(fout,' "energies" requested but not available', ' from hlist'); 'n': writeln(fout,' n : no "energies" reported in fout'); end; writeln(fout,' ', para.minlength:3,' = minimum repeat length'); writeln(fout,' ', para.maxlength:3,' = maximum repeat length'); writeln(fout,' ', para.dmin:3,' = minimum distance between repeats'); writeln(fout,' ', para.dmax:3,' = maximum distance between repeats'); writeln(fout); end; (* foutheading *) (* end module rep.foutheading version = 1.63; (* of rep 1984 june 5 *) (* begin module rep.poutheading *) procedure poutheading(var hlist: text; (* a list of helices from helix *) var pout: text; (* output file*) var getenergy: boolean; (* true if 'energies' will be reported *) var para: prec); (* parameters from repp *) (* heading for pout file that contains palindrome information *) var line: integer; (* loop control variable *) begin rewrite(pout); reset(hlist); writeln(pout,' rep ',version:4:2); for line:=1 to headerlines do begin write(pout,' '); copyaline(hlist,pout) end; case para.energy of 'e': if getenergy then writeln(pout,' e: "energies" reported in pout') else writeln(pout,' "energies" requested but not available', ' from hlist'); 'n': writeln(pout,' n: no "energies" reported in pout'); end; writeln(pout); writeln(pout,' ', para.minlength:3,' = minimum palindrome length'); writeln(pout,' ', para.maxlength:3,' = maximum palindrome length'); writeln(pout); writeln(pout,' note that the half-repeat length of a palindrome'); writeln(pout,' is half that of an inverted repeat. thus if the'); writeln(pout,' minimum repeat length is 4, agct will be reported'); writeln(pout,' in pout but agnnnct will not be reported in fout'); writeln(pout); end; (* poutheading *) (* end module rep.poutheading version = 1.63; (* of rep 1984 june 5 *) (* begin module rep.checkseqdata *) procedure checkseqdata(para: prec; (* parameters from repp *) xseq,yseq: pieceptr; (* for getting information *) (* about coordinates in xbook and ybook *) var fout: text); (* output file *) (* checks to make sure that the book coordinates are consistent with *) (* the parameters designated by the user in repp *) begin (* checkseqdata *) if (para.typbk = 'r') and ((xseq^.key.mapbeg <> yseq^.key.mapbeg) or (xseq^.key.coocon <> yseq^.key.coocon) or (xseq^.key.coodir <> yseq^.key.coodir) or (xseq^.key.coobeg <> yseq^.key.coobeg) or (xseq^.key.cooend <> yseq^.key.cooend)) then begin writeln(output,' book coordinates indicate that xbook and ybook'); writeln(output,' do not meet the criteria of relatedness,'); writeln(output,' although the parameter file calls'); writeln(output,' for them to be treated as related.'); writeln(output,' please see the explanation of'); writeln(output,' *related* in delman.describe.rep.'); halt end; if (para.typbk = 'r') and (para.mode = 'i') and (xseq^.key.piedir <> yseq^.key.piedir) then begin writeln(output,' analysis of inverted repeats with'); writeln(output,' rep requires that the books have the '); writeln(output,' same orientation'); halt end; if (para.typbk = 'r') and (para.mode = 'd') and (xseq^.key.piedir = yseq^.key.piedir) then begin writeln(output,' analysis of direct repeats with rep'); writeln(output,' requires that the books have opposite'); writeln(output,' directions.'); halt end; (* note below that when the books are not related, the user is *) (* allowed latitude in assignment of parameters. that is because *) (* when the books are unrelated, the parameter 'i' vs 'd' is essentially *) (* meaningless. the warning is to alert the user to the possibility *) (* that the file fout may contain something other than what was *) (* intended. *) if (para.typbk = 'u') and (para.mode = 'd') and (xseq^.key.piedir = yseq^.key.piedir) then begin writeln(output,' generally speaking, analysis of direct repeats'); writeln(output,' requires that xbook and ybook have'); writeln(output,' different directions. here xbook and'); writeln(output,' ybook have the same direction. are you'); writeln(output,' sure that no mistake has been made?'); writeln(output,' the program continues, but caution is urged'); writeln(fout,' * * * warning * * * '); writeln(fout); writeln(fout,' generally speaking, analysis of direct repeats'); writeln(fout,' requires that xbook and ybook have'); writeln(fout,' different directions. here xbook and'); writeln(fout,' ybook have the same direction. are you'); writeln(fout,' sure that no mistake has been made?'); writeln(fout,' the program continues, but caution is urged'); writeln(fout); writeln(fout,' note also: this warning adds an additional'); writeln(fout,' 19 lines of text to the body of the file fout'); writeln(fout,' from the program rep. should you wish to'); writeln(fout,' use a fout file that contains this warning'); writeln(fout,' as input for a program designed to use as input'); writeln(fout,' the standard fout file from program rep,'); writeln(fout,' it may be necessary first to delete 19 lines'); writeln(fout,' beginning with * * * warning * * *'); writeln(fout); writeln(fout) end; if (para.typbk = 'u') and (para.mode = 'i') and (xseq^.key.piedir <> yseq^.key.piedir) then begin writeln(output,' generally speaking, analysis of inverted '); writeln(output,' repeats requires that xbook and ybook have the'); writeln(output,' same direction. here xbook and ybook have '); writeln(output,' different directions. are you sure that no '); writeln(output,' mistake has been made? the program continues,'); writeln(output,' but caution is urged.'); writeln(fout,' * * * warning * * * '); writeln(fout); writeln(fout,' generally speaking, analysis of inverted '); writeln(fout,' repeats requires that xbook and ybook have the '); writeln(fout,' same direction. here xbook and ybook have '); writeln(fout,' different directions. are you sure that no '); writeln(fout,' mistake has been made? the program continues,'); writeln(fout,' but caution is urged.'); writeln(fout); writeln(fout,' note also: this warning adds an additional'); writeln(fout,' 19 lines of text to the body of the file fout'); writeln(fout,' from the program rep. should you wish to'); writeln(fout,' use a fout file that contains this warning'); writeln(fout,' as input for a program designed to use as input'); writeln(fout,' the standard fout file from program rep,'); writeln(fout,' it may be necessary first to delete 19 lines'); writeln(fout,' beginning with * * * warning * * *'); writeln(fout); writeln(fout) end end; (* checkseqdata *) (* end module rep.checkseqdata version = 1.63; (* of rep 1984 june 5 *) (* begin module rep.anarep *) procedure anarep(para: prec;(* input parameters *) xseq,yseq: pieceptr; (* info about coordinates, etc. *) length: integer; (* length of the repeat *) energy: real; (* "energy" from hlist *) coextensive: boolean; (*do the sequences overlap completely?*) var xout,yout: integer; (* 5 prime external coordinates *) var rout: reprec; (* record of repeat information *) var palindrome: boolean; (* is the "repeat" a palindrome? *) var finished: boolean); (* is the repeat to be discarded? *) (* this procedure corrects xout or yout, if necessary. it calculates the distance between half repeats and recognizes palindromes. it throws out duplicates only when the two sequences examined overlap completely. it returns finished as true if the repeat does not meet the criteria of repp. *) var dist: integer; (* distance between repeats *) cirdist: integer; (* if the sequence is circular, this is the distance going around the circle the other way. *) begin (* anarep *) palindrome := false; finished := false; (* the next statement adjusts xout or yout: for direct repeats, one of the books must be in the minus direction. thus what is generally thought of as the 3 prime end is reported in hlist as the five prime end. the next statement makes xout and yout be at the same end of the half repeats when the sequences are viewed in the same direction. *) if para.mode = 'd' then begin if xseq^.key.piedir = minus then xout := (xout - length + 1) else if yseq^.key.piedir = minus then yout := (yout - length + 1); (* if the configuration is circular, the above statement could put xout or yout off the map, hence the following: *) if xout < xseq^.key.coobeg then xout := xout + xseq^.key.cooend - xseq^.key.coobeg + 1 else if yout < yseq^.key.coobeg then yout := yout + yseq^.key.cooend - yseq^.key.coobeg + 1 end; if (para.typbk = 'r') and (xout = yout) then if para.mode = 'i' then palindrome := true (* the "repeat" is a palindrome. *) else finished := true; (* if para.mode = "d" and xout = yout, then the "repeat" spans the entire sequence and should be ignored. *) if para.typbk = 'u' then dist := 0 else if (not finished) and (not palindrome) then begin if para.mode = 'd' then begin (* calculate distance of direct repeats *) dist := yout - xout; (*calculates dist assuming linearity*) if xseq^.key.coocon = circular then begin (* determines the shorter distance around the circle *) cirdist := ((xseq^.key.cooend - yout) + (xout - xseq^.key.coobeg) + 1); if cirdist < dist then dist := cirdist end end else (* para.mode = i *) begin (* calculate distance from 5 prime end to 3 prime end of inverted repeats - pseudo-loop distances *) dist := yout - xout - length + 1; if xseq^.key.coocon = circular then begin (* as above *) cirdist := ((xseq^.key.cooend) - (xseq^.key.coobeg) - (dist) - (2*length) + 3); if cirdist < dist then dist := cirdist end end end; (* before putting the stuff we want into rout, we get rid of the stuff we do not want. *) if coextensive and (dist < 0) and (not palindrome) then finished := true; if (length < para.minlength) or (length > para.maxlength) then finished := true; if (para.typbk = 'r') and (not palindrome) then if (dist < para.dmin) or (dist > para.dmax) then finished := true; if (not finished ) then with rout do begin (* at last we put data into rout *) x5p:= xout; y5p:= yout; rlength:= length; distance:= dist; deltagee:= energy end; end; (* anarep *) (* end module rep.anarep version = 1.63; (* of rep 1984 june 5 *) (* begin module rep.writetofile *) procedure writetofile(var para: prec; (* input parameters *) var rout: reprec; (* record of repeat info *) var bout: text; (* output file *) getenergy: boolean; (* true if "energy" is reported *) palindrome: boolean); (* is it a palindrome ? *) (* writes repeat information to bout, which can be either fout or pout, depending on how writetofile is called *) begin (* writetofile *) write(bout,' ',rout.x5p:7); if not palindrome then write(bout,' ',rout.y5p:8); write(bout,' ',rout.rlength:8); if (para.typbk = 'r') and (not palindrome) then write(bout,' ', rout.distance:9,' '); if getenergy then write(bout,' ',rout.deltagee:9:2,' kcal'); writeln(bout) end; (* writetofile *) (* end module rep.writetofile version = 1.63; (* of rep 1984 june 5 *) (* begin module rep.procrep *) procedure procrep(var hlist: text; (* list of helices from helix *) var para: prec; (* input parameters *) var getenergy: boolean; (* true if "energies` are reported *) var xseq,yseq: pieceptr; (* the sequences in the books *) var fout: text; (* output file for repeats *) var pout: text); (* output file for palindromes *) (* procedure uses gethelix to get the information from hlist. then it analyzes the data with anarep which also tosses out data if necessary (see anarep). next it uses writetofile to put palindromes into pout and repeats into fout. *) function overlap(var xseq,yseq: pieceptr; (*info about where the pieces start and end *) var para: prec) (* has parameters from repp *) : boolean; (* returns true if the two sequences overlap completely and false otherwise *) begin (* overlap *) overlap := false; (* except as below *) if para.typbk = 'r' then case para.mode of 'd': if (xseq^.key.piebeg = yseq^.key.pieend) and (xseq^.key.pieend = yseq^.key.piebeg) then overlap := true; 'i': if (xseq^.key.piebeg = yseq^.key.piebeg) and (xseq^.key.pieend = yseq^.key.pieend) then overlap := true; end end; (* overlap *) var energy:real; (* the "energy" of the repeat *) done:boolean; (* done with book comparisons *) xout,yout:integer; (* 5 prime ends of repeat from procedure gethelix *) length:integer; (* length of repeat *) rout:reprec; (* info about repeat in a record *) palindrome: boolean; (* true if the "repeat" is a palindrome *) finished: boolean; (* true if the repeat is not to be reported *) coextensive: boolean; (* true if the sequences overlap completely *) begin coextensive := overlap(xseq,yseq,para); if coextensive or (para.typbk='u') then writeln(fout) else writeln(fout,' repeats (or parts of repeats) may be', ' duplicated in regions of sequence overlap'); write(fout,' '); if eof(hlist) then begin writeln(output,'premature end of hlist'); halt end else copyaline(hlist,fout); write(fout,'5':6,prime,'x:','5':6,prime,'y:','length:':9); if para.typbk = 'r' then write(fout,'distance:':10); if getenergy then write(fout,'energy:':13); writeln(fout); if (para.mode = 'i') and (para.typbk = 'r') then begin (* put stuff into pout *) write(pout,' '); write(pout,'5':2,prime,' end:'); write(pout,'length:':9); if getenergy then write(pout,'energy:':13); writeln(pout) end else rewrite(pout); (* empties pout if no palindromes *) repeat gethelix(hlist,xout,yout,length,getenergy,energy,done); if (not done) then begin anarep(para,xseq,yseq,length,energy,coextensive,xout,yout, rout,palindrome,finished); if not palindrome and not finished then writetofile(para,rout,fout,getenergy,palindrome) else if not finished (* palindrome is true *) then writetofile(para,rout,pout,getenergy,palindrome) end; until done end; (* procrep *) (* end module rep.procrep version = 1.63; (* of rep 1984 june 5 *) (* begin module rep.themain *) procedure themain(var hlist, (* listing from program "helix" *) xbook,ybook, (* books from delila *) fout, (* output of repeated sequences *) repp: text); (* input parameters *) (* this procedure first checks that the parameters requested in repp are consistent with the input files, hlist, xbook, and ybook. the program will halt with error messages to output if the inconsistency is unacceptable. other inconsistencies, e.g. gupairs in hlist if inverted repeats are requested, may actually be what the user intended. on the other hand, they may not. here again, error messages appear in output and sometimes in fout. finally, the procedure sets up fout (output file for repeats) and pout (output file for palindromes) and processes the repeats. it compares the first piece in xbook to each piece in ybook, then the second piece in xbook to each piece in ybook, and so on. repeats and palindromes that meet the criteria of repp are put into the appropriate files. if the two sequences inspected overlap completely, duplicate entries in hlist are recognized and not reported. *) var xseq,yseq: pieceptr; (* sequences in the book *) para: prec; (* a record of the parameter variables *) getenergy: boolean; (* are energies reported? *) begin (* themain *) writeln(output,' rep ',version:4:2); initbook(xbook,ybook,xseq,yseq); hlistvsbooks(hlist,xbook,ybook); readparameters(repp,para); checkhlistparams(hlist,para,getenergy); foutheading(hlist,fout,getenergy,para); poutheading(hlist,pout,getenergy,para); while not (eof(xbook)) do begin getpiece(xbook,xseq); if not (eof(xbook)) then begin reset(ybook); while not (eof(ybook)) do begin getpiece(ybook,yseq); if not (eof(ybook)) then begin checkseqdata(para,xseq,yseq,fout); procrep(hlist,para,getenergy,xseq,yseq,fout,pout) end; clearpiece(yseq) end end; clearpiece(xseq) end; end; (* themain *) (* end module rep.themain version = 1.63; (* of rep 1984 june 5 *) begin (* rep *) themain(hlist,xbook,ybook,fout,repp); 1 : end. (* rep *)