program mergemarks(book, marks1, marks2, mergemarksp, marksout, output); (* mergemarks: merge lister marks files Thomas D. Schneider, Ph.D. National Institutes of Health National Cancer Institute Gene Regulation and Chromosome Biology Laboratory Molecular Information Theory Group Frederick, Maryland 21702-1201 schneidt@mail.nih.gov toms@alum.mit.edu (permanent) http://alum.mit.edu/www/toms (permanent) *) label 1; (* end of program *) const (* begin module version *) version = 1.32; (* of mergemarks.p 2012 Apr 13 2012 Apr 13, 1.32: merge bug solved 2012 Mar 26, 1.31: merge bug? 2012 Mar 26, 1.30: merge bug? - coordinate fixed 2011 Oct 27, 1.29: upgrade modules 2011 Oct 27, 1.28: cleanup 2011 Oct 27, 1.27: failed to use marks2 data when marks1 data did not exist 2011 Oct 27, 1.26: cleanup 2011 Oct 27, 1.25: update address, fix crash when one input file has no marks 2001 Jan 11, 1.24: improve bug reporting in parsestring 2000 Oct 10, 1.22: non-integer mark positions not recommended 2000 Aug 22, 1.21: add notes synchronization 2000 Aug 21, 1.20: gpc compatible 2000 Aug 21, 1.19: marks2 overwrites marks1 when they are identical 1999 June 6, 1.12: read the book to know which mark to put next. 1999 June 6, 1.06: make the program skip ahead when 'p' is encountered origin 1997 Feb 9 *) updateversion = 1.00; (* defines lowest acceptable current parameter file *) (* end module version *) (* begin module describe.mergemarks *) (* name mergemarks: merge lister marks files synopsis mergemarks(book, marks1, marks2, mergemarksp: in, marksout: out, output: out) files book: a book from the delila system marks1: a marks file for the lister program, corresponding to the book marks2: a marks file for the lister program, corresponding to the book marksout: marks1 merged to marks2 in order. mergemarksp: parameters to control the program. The file must contain the following parameters, one per line: 0. The version number of the program. This allows the user to be warned if an old parameter file is used. There are no parameters (yet?). output: messages to the user description The lister program requires a marks file that defines marks in the order of the sequence. This is necessary to allow marks on multiple pieces, but it makes it difficult to merge marks together. The mergemarks program will merge two marks files together. When two marks have the same position exactly, the marks in marks1 will be written first and those in marks2 will overwrite them. If the marks overlap on the lister map, this means that marks2 will be written on top of marks1. Synchronization of Marks. The program uses the character 'p' at the beginning of a line to indicate that the next piece is to be read from the book and that following marks are for that next piece. If you are generating marks by hand, be sure to put 'p' between marks for each piece. Programs like live and delila do this automatically so that the marks are synchronized. examples mergemarksp: 1.00 version of shell that this parameter file is designed for. (Currently this only keeps track of the version number.) documentation see also {Example parameter file:} mergemarksp {The program that uses the marks:} lister.p {Programs that make marks that can be merged:} live.p, delila.p {The program that makes features for walkers:} scan.p author Thomas Dana Schneider bugs technical notes 1999 June 6. Since lister now can skip ahead to the next piece when a 'p' is encountered at the beginning of a marks file line, this program can use that to help merge two marks files together. The book is used as a guide for the order of the marks relative to each other. Without the book it is not possible to tell which mark should come first because the coordinate system is not defined in the marks file. WARNING: the code for decreasing coordinate systems does not always work right for marks that are not in integer positions. Fortunately one can always place marks at integer positions and then move them to the right spot. So the current recommendation (2000 Oct 10) is not to use non-integer coordinates to place the marks. *) (* end module describe.mergemarks *) (* begin module mergemarks.const *) bignumber = 1e15; (* a large real number that is so big that no chromosome will ever be that long *) (* end module mergemarks.const *) (* begin module interact.const *) (* begin module string.const *) maxstring = 2000; (* the maximum string *) (* end module string.const version = 4.86; (@ of prgmod.p 2004 Sep 8 *) (* end module interact.const version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module book.const *) (* constants needed for book manipulations *) dnamax = 1024; (* length of dna arrays *) namelength = 100; (* maximum key name length *) linelength = 200; (* maximum line readable in book *) (* end module book.const version = 7.72; {of delmod.p 2007 Jul 23} *) type (* begin module interact.type *) (* begin module string.type *) stringptr = ^string; (* pointer to a string *) string = record (* a string of characters *) letters: array[1..maxstring] of char; (* the letters in the string *) length: integer; (* the number of characters in the string *) current: integer; (* the letter we are working on *) next: stringptr; (* the next string in a series *) end; (* end module string.type version = 4.86; (@ of prgmod.p 2004 Sep 8 *) (* end module interact.type version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module book.type *) (* types needed for book manipulations *) chset = set of 'a'..'z'; (* types defined in book definition *) alpha = packed array[1..namelength] of char; (* this is not alfa *) (* name is a left justified string with blanks following the characters *) name = record letters: alpha; length: 0..namelength (* zero means an unspecified structure *) end; lineptr = ^line; line = record (* a line of characters *) letters: packed array [1..linelength] of char; length: 0..linelength; next: lineptr end; direction = (plus, minus, dircomplement, dirhomologous); configuration = (linear, circular); state = (on, off); header = record (* header of key *) keynam: name; (* key name of structure *) fulnam: lineptr; (* full name of structure *) note: lineptr (* note key *) end; (* begin module base.type *) (* define the four nucleotide bases *) base = (a,c,g,t); (* end module base.type version = 7.72; {of delmod.p 2007 Jul 23} *) (* sequence types *) dnaptr = ^dnastring; dnarange = 0..dnamax; seq = packed array[1..dnamax] of base; dnastring = record part: seq; length: dnarange; next: dnaptr end; orgkey = record (* organism key *) hea: header; mapunit: lineptr (* genetic map units *) end; chrkey = record (* chromosome key *) hea: header; mapbeg: real; (* number of genetic map beginning *) mapend: real (* number of genetic map ending *) end; pieceptr = ^piece; piekey = record (* piece key *) hea: header; mapbeg: real; (* genetic map beginning *) coocon: configuration; (* configruation (circular/linear) *) coodir: direction; (* direction (+/-) relative to genetic map *) coobeg: integer; (* beginning nucleotide *) cooend: integer; (* ending nucleotide *) piecon: configuration; (* configruation (circular/linear) *) piedir: direction; (* direction (+/-) relative to coordinates *) piebeg: integer; (* beginning nucleotide *) pieend: integer; (* ending nucleotide *) end; piece = record key: piekey; dna: dnaptr end; reference = record pienam : name; (* name of piece referred to *) mapbeg : real; (* genetic map beginning *) refdir : direction; (* direction relative to coordinates *) refbeg : integer; (* beginning nucleotide *) refend : integer; (* ending nucleotide *) end; genkey = record (* gene key *) hea : header; ref : reference; end; trakey = record (* transcript key *) hea : header; ref : reference; end; markerptr = ^marker; markey = record (* marker key *) hea : header; ref : reference; sta : state; phenotype : lineptr; next : markerptr; end; marker = record key : markey; dna : dnaptr; end; (* end module book.type version = 7.72; {of delmod.p 2007 Jul 23} *) var book, (* file used by this program *) marks1, marks2, (* files used by this program *) mergemarksp, (* file used by this program *) marksout: text; (* file used by this program *) (* begin module book.var *) (* ************************************************************************ *) (* global variables needed for book manipulations *) (* free storage: *) freeline: lineptr; (* unused lines *) freedna: dnaptr; (* unused dnas *) readnumber: boolean; (* whether to read a number from the notes, or to read in the notes *) number: integer; (* the number of the item just read *) numbered: boolean; (* true when the item just read is numbered *) skipunnum: boolean; (* a control variable to allow skipping of un-numbered items in the book *) (* ************************************************************************ *) (* end module book.var version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module package.getocp *) (* ************************************************************************ *) (* begin module package.brpiece *) (* ************************************************************************ *) (* begin module book.basis *) (* procedures needed for book manipulations *) (* get procedures should be used for all linked lists of records *) procedure getline(var l: lineptr); (* obtain a line from the free line list or by making a new one *) begin if freeline<>nil then begin l:=freeline; freeline:=freeline^.next end else new(l); l^.length:=0; l^.next:=nil end; procedure getdna(var l: dnaptr); begin if freedna<>nil then begin l:=freedna; freedna:=freedna^.next end else new(l); l^.length:=0; l^.next:=nil end; (* clear procedures should be called each time the records are no longer needed failure to do this may result in a stack overflow. *) procedure clearline(var l: lineptr); (* return a line to the free line list *) var lptr: lineptr; begin if l<>nil then begin lptr:=l; l:=l^.next; lptr^.next:=freeline; freeline:=lptr end end; procedure writeline(var afile: text; l: lineptr; carriagereturn: boolean); (* write a line to a file, with carriage return if carriagereturn is true. *) var index: integer; (* index to characters in l *) begin with l^ do begin for index := 1 to length do write(afile, letters[index]); end; if carriagereturn then writeln(afile); end; procedure showfreedna; (* show the freedna list *) var counter: integer; (* count of freedna list *) l: dnaptr; (* pointer into freedna list *) begin l := freedna; counter := 0; while l <> nil do begin counter := succ(counter); write(output,counter:1); write(output, ', length = ',l^.length:1); { This is illegal according to gpc because one cannot write a pointer to a text file. It can be unearthed for debugging. write(output, ', pointer id: ',l:1); } writeln(output); l := l^.next end; end; procedure cleardna(var l: dnaptr); (* clear the dna strutures to the free list *) var lptr: dnaptr; begin if l<>nil then begin lptr:=l; l:=l^.next; lptr^.next:=freedna; freedna:=lptr end end; procedure clearheader(var h: header); (* clear the header h (remove lines to free storage) *) begin with h do begin clearline(fulnam); while note<>nil do clearline(note) end end; procedure clearpiece(var p: pieceptr); (* clear the dna of the piece *) begin while p^.dna<>nil do cleardna(p^.dna); clearheader(p^.key.hea) end; function chartobase(ch:char):base; (* convert a character into a base *) begin case ch of 'a': chartobase:=a; 'c': chartobase:=c; 'g': chartobase:=g; 't': chartobase:=t end end; function basetochar(ba:base):char; (* convert a base into a character *) begin case ba of a: basetochar:='a'; c: basetochar:='c'; g: basetochar:='g'; t: basetochar:='t'; end end; function complement(ba:base):base; (* take the complement of ba *) begin case ba of a: complement:=t; c: complement:=g; g: complement:=c; t: complement:=a; end end; function chomplement(b: char): char; (* create the character complement of base b. I must be getting hungry! *) begin chomplement := basetochar(complement(chartobase(b))); end; function pietoint(p: integer; pie: pieceptr): integer; (* p is a coordinate on the piece. we want to transform p into a number from 1 to n: an internal coordinate system for easy manipulation of piece coordinates *) (* Note: the dirhomologous and dircomplement are treated as plus and minus directions, which MIGHT NOT BE RIGHT! *) var i: integer; (* an intermediate value *) begin with pie^.key do begin case piedir of dirhomologous, plus: if p>=piebeg then i:=p-piebeg+1 else i:=(p-coobeg)+(cooend-piebeg)+2; dircomplement, minus: if p<=piebeg then i:=piebeg-p+1 else i:=(cooend-p)+(piebeg-coobeg)+2 end; pietoint:=i {BUBBA ;if i > 1000 then begin writeln(output); writeln(output, 'FAIL: p = ',p:1, '-> i = ',i:1); case piedir of dirhomologous, plus: begin writeln(output,'dirhom, plus'); end; dircomplement, minus: begin writeln(output,'dircom, minus'); end; end; writeln(output, 'piebeg: ',piebeg:1); writeln(output, 'piebeg-p+1: ',piebeg-p+1:1); writeln(output); halt; end; } end end; function inttopie(i: integer; pie: pieceptr):integer; (* i is in the range 1 to some maximum. it is an internal coordinate system for the program. we want to do a coordinate transformation to obtain a value in the range of the piece called pie: i=1 corresponds to piebeg and i=its maximum corresponds to pieend *) (* Note: the dirhomologous and dircomplement are treated as plus and minus directions, which MIGHT NOT BE RIGHT! *) var p: integer; (* an intermediate value *) begin with pie^.key do begin case piedir of dirhomologous, plus: begin p:=piebeg+(i-1); if p>cooend then if coocon=circular then p:=p-(cooend-coobeg+1) end; dircomplement, minus: begin p:=piebeg-(i-1); if p '*' then begin writeln(output,' procedure skipstar: bad book'); writeln(output,' "*" expected as first character on the line, but "', thefile^,'" was found'); halt end; get(thefile); (* skip the star *) if thefile^ <> ' ' then begin writeln(output,' procedure skipstar: bad book'); writeln(output,' "* " expected on a line but "*', thefile^,'" was found'); halt end; get(thefile) (* skip the blank *) end end; (* skipstar *) (* end module book.skipstar version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module book.brreanum *) procedure brreanum(var thefile: text; var theline: integer; var reanum: real); (* read a real number from the file *) begin skipstar(thefile); readln(thefile,reanum); theline := succ(theline) end; (* end module book.brreanum version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module book.brnumber *) procedure brnumber(var thefile: text; var theline: integer; var num: integer); (* read a number from the file *) begin skipstar(thefile); readln(thefile,num); theline := succ(theline) end; (* end module book.brnumber version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module book.brname *) procedure brname(var thefile: text; var theline: integer; var nam: name); (* read a name from the file *) var i: integer; (* an index to the name *) c: char; (* a character read *) begin (* brname *) skipstar(thefile); with nam do begin length:=0; repeat length:=succ(length); read(thefile,c); letters[length] := c until (eoln(thefile)) or (length>=namelength) or (letters[length]=' '); if letters[length]=' ' then length:=length-1; if length ',linelength:1,' characters'); writeln(output,'* Only ',linelength:1,' characters read from book'); writeln(output,'***********************************************'); end; l^.length:=i; l^.next:=nil; readln(thefile); theline := succ(theline) end; (* end module book.brline version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module book.brdirect *) procedure brdirect(var thefile: text; var theline: integer; var direct: direction); (* read a direction *) var ch: char; begin skipstar(thefile); readln(thefile,ch); theline := succ(theline); if ch='+' then direct:=plus else direct:=minus end; (* end module book.brdirect version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module book.brconfig *) procedure brconfig(var thefile: text; var theline: integer; var config: configuration); (* read a configuration *) var ch: char; begin skipstar(thefile); readln(thefile,ch); theline := succ(theline); if ch='l' then config:=linear else config:=circular end; (* end module book.brconfig version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module book.brnotenumber *) procedure brnotenumber(var thefile: text; var theline: integer; var note: lineptr); (* book note reading to obtain the number of the object. the procedure returns the value of the number as a global. (this is not such a good practice, but we are stuck with it for now.) *) begin (* brnotenumber *) note:=nil; numbered := false; number := 0; (* force number to zero if there is no number at all *) (* the next character is n or * depending on whether there are notes *) if thefile^ = 'n' then begin readln(thefile); theline := succ(theline); if thefile^ <> 'n' then begin skipstar(thefile); if not eoln(thefile) then begin if thefile^ = '#' then begin numbered := true; get(thefile); (* move past the number symbol *) read(thefile,number); end end; repeat readln(thefile); theline := succ(theline) until thefile^ = 'n'; readln(thefile); theline := succ(theline) end else begin readln(thefile); theline := succ(theline) end end end; (* brnotenumber *) (* end module book.brnotenumber version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module book.brnote *) procedure brnote(var thefile: text; var theline: integer; var note: lineptr); (* read note key *) var newnote: lineptr; (* the new note *) previousnote: lineptr; (* the last line of the notes *) begin (* brnote *) note:=nil; if thefile^ = 'n' then begin (* enter note *) readln(thefile); theline := succ(theline); if thefile^ <> 'n' then begin (* abort null note (n/n) *) getline(note); newnote:=note; while thefile^ <> 'n' do begin (* wait until end of note *) brline(thefile,theline,newnote); previousnote:=newnote; (* get next note *) getline(newnote^.next); newnote:=newnote^.next; end; (* last note was not used, so: *) clearline(newnote); previousnote^.next:=nil; readln(thefile); theline := succ(theline); end else begin readln(thefile); theline := succ(theline); end; end end; (* brnote *) (* end module book.brnote version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module book.brheader *) procedure brheader(var thefile: text; var theline: integer; var hea: header); (* read the header of a key. *) begin with hea do begin readln(thefile); (* move past the object name - new definition 1999 Mar 13 *) theline := succ(theline); {bbb} (* read key name *) brname(thefile,theline,keynam); (* read full name *) getline(fulnam); brline(thefile,theline,fulnam); (* read note key *) if readnumber then brnotenumber(thefile,theline,note) else brnote(thefile,theline,note) end end; (* end module book.brheader version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module book.copyheader *) procedure copyheader(fromhea: header; var tohea: header); (* copy the header fromhea into tohea. Note that the linked objects are NOT copied, but merely pointed to. *) begin tohea.keynam.letters := fromhea.keynam.letters; tohea.keynam.length := fromhea.keynam.length; tohea.note := fromhea.note; tohea.fulnam := fromhea.fulnam; end; (* end module book.copyheader version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module book.brpiekey *) procedure brpiekey(var thefile: text; var theline: integer; var pie: piekey); (* read piece key, track the line number *) begin with pie do begin brheader(thefile,theline,hea); brreanum(thefile,theline,mapbeg); brconfig(thefile,theline,coocon); brdirect(thefile,theline,coodir); brnumber(thefile,theline,coobeg); brnumber(thefile,theline,cooend); brconfig(thefile,theline,piecon); brdirect(thefile,theline,piedir); brnumber(thefile,theline,piebeg); brnumber(thefile,theline,pieend); end end; (* end module book.brpiekey version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module book.brdna *) procedure brdna(var thefile: text; var theline: integer; var dna: dnaptr); (* read in dna from thefile, track the line *) (* note: if the dna were circularized, by linking the last dnastring to the first, then the cleardna routine could not clear properly, and would loop forever... there is no reason to do that, since a simple mod function will allow one to access the circle. *) var ch: char; workdna: dnaptr; begin getdna(dna); workdna:=dna; ch:=getto(thefile,theline,['d']); readln(thefile); theline := succ(theline); read(thefile,ch); (* skipstar *) while (ch = '*') do begin read(thefile,ch); (* skip blank *) repeat read(thefile,ch); if ch in ['a','c','g','t'] then begin if workdna^.length=dnamax then begin getdna(workdna^.next); workdna:=workdna^.next end; workdna^.length:=succ(workdna^.length); workdna^.part[workdna^.length]:=chartobase(ch) end until eoln(thefile); readln(thefile); (* go to next line *) theline := succ(theline); read(thefile,ch); (* ch is either '*' or 'd' *) end; readln(thefile); (* read past the d *) theline := succ(theline); end; (* end module book.brdna version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module book.brpiece *) procedure brpiece(var thefile: text; var theline: integer; var pie: pieceptr); (* read in a piece, change theline to reflect the lines traversed *) begin { readln(thefile); (* move past the word 'piece' - new definition 1999 Mar 13 *) theline := succ(theline); (* BUG: was below! *) bbb} brpiekey(thefile,theline,pie^.key); if numbered or (not skipunnum) then brdna(thefile,theline,pie^.dna); readln(thefile); (* move past the word 'piece' - new definition 1999 Mar 13 *) theline := succ(theline); end; (* end module book.brpiece version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module book.brinit *) procedure brinit(var book: text; var theline: integer); (* check that the book is ok to read, and set up the global variables for br routines *) begin (* brinit *) (* halt if the book is bad (first word is 'halt') or the first character is not * *) reset(book); if not eof(book) then begin (* check for the date line *) if book^ <> '*' then begin if book^ <> 'h' then writeln(output, ' this is not the first line of a book:') else writeln(output, ' bad book:'); write(output, ' '); while not (eoln(book) or eof(book)) do begin write(output, book^); get(book) end; writeln(output); halt end end else begin writeln(output, ' book is empty'); halt end; (* initialize free storage *) freeline:=nil; freedna:=nil; readnumber:=true; (* usually we read in numbers for items *) number:=0; (* arbitrary value *) numbered:=false; (* the piece has no number (none yet read in) *) skipunnum:=false; theline := 1; end; (* brinit *) (* end module book.brinit version = 7.72; {of delmod.p 2007 Jul 23} *) (* ************************************************************************ *) (* end module package.brpiece version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module book.brorgkey *) procedure brorgkey(var thefile: text; var theline: integer; var org: orgkey); (* read organism key *) begin with org do begin {bbb} brheader(thefile,theline,hea); getline(mapunit); brline(thefile,theline,mapunit); end end; (* end module book.brorgkey version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module book.brchrkey *) procedure brchrkey(var thefile: text; var theline: integer; var chr: chrkey); (* read chromosome key *) begin with chr do begin {bbb} brheader(thefile,theline,hea); brreanum(thefile,theline,mapbeg); brreanum(thefile,theline,mapend); end end; (* end module book.brchrkey version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module book.getocp *) procedure getocp(var thefile: text; var theline: integer; var org: orgkey; var orgchange, orgopen: boolean; var chr: chrkey; var chrchange, chropen: boolean; var pie: pieceptr; var piechange, pieopen: boolean); (* Get the next piece and its organism and chromosome keys. The three change variables indicate whether or not a new organism, chromosome or piece name was found. If a piece is not found, then pieopen will be false. orgopen, chropen and pieopen are used by getocp to tell when it has entered an organism, chromosome or piece. All booleans should be set to false initially. There should be one triplet for each book read. It is important to initialize ALL variables, including pie: orgchange := false; orgopen := false; chrchange := false; chropen := false; piechange := false; pieopen := false; pie := nil; theline := 0; 1999 June 2 The book reading routines now treat data objects more precisely. Rather than test for eof, the endo of book occurs when pieopen is returned as false. A book reading loop now looks like this: repeat getocp(book, theline, org, orgchange, orgopen, chr, chrchange, chropen, pie, piechange, pieopen); writeln(output,'pieopen: ',pieopen); if pieopen then begin writeln(output,'piece at line: ',theline:1); end; until not pieopen; *) var ch: char; newchr: chrkey; neworg: orgkey; newpie: pieceptr; begin ch:='a'; while not (ch in [' ','p']) do begin ch:=getto(thefile,theline,['o','c','p']); if ch <> ' ' then begin case ch of 'o': if orgopen then begin readln(thefile); (* move past the word 'organism' - new definition 1999 Mar 13 *) orgopen:=false (* close organism *) end else begin brorgkey(thefile,theline,neworg); if (neworg.hea.keynam.letters <> org.hea.keynam.letters) and (neworg.hea.keynam.length <> org.hea.keynam.length) then begin { writeln(output,'--------orgchanged!'); write (output,'--------old org:"', org.hea.keynam.letters); writeln(output, '" ', org.hea.keynam.length:1); write (output,'--------new org:"',neworg.hea.keynam.letters); writeln(output, '" ',neworg.hea.keynam.length:1); } (*ccc*) orgchange:=true; copyheader(neworg.hea,org.hea); (* move the mapunit over to the org! *) org.mapunit := neworg.mapunit; clearline(neworg.mapunit); end else orgchange:=false; orgopen:=true; end; 'c': if chropen then begin readln(thefile); (* move past the word 'chromosome' - new definition 1999 Mar 13 *) chropen:=false (* close chromosome *) end else begin brchrkey(thefile,theline,newchr); if (newchr.hea.keynam.letters <> chr.hea.keynam.letters) and (newchr.hea.keynam.length <> chr.hea.keynam.length) then begin { writeln(output,'--------chrchanged!'); write (output,'--------old chr:"', chr.hea.keynam.letters); writeln(output, '" ', chr.hea.keynam.length:1); write (output,'--------new chr:"',newchr.hea.keynam.letters); writeln(output, '" ',newchr.hea.keynam.length:1); } chrchange:=true; copyheader(newchr.hea,chr.hea); (* move the map range over to the chr! *) chr.mapbeg := newchr.mapbeg; chr.mapend := newchr.mapend; end else chrchange:=false; chropen:=true; end; 'p': if pieopen then begin pieopen:=false; (* close last piece *) ch:='a' (* prevent falling out of the loop *) end else begin new(newpie); brpiece(thefile,theline,newpie); if pie = nil then piechange := true else begin if (newpie^.key.hea.keynam.letters <> pie^.key.hea.keynam.letters) and (newpie^.key.hea.keynam.length <> pie^.key.hea.keynam.length) then begin piechange:=true; end else piechange:=false; end; pieopen:=true; (* we always have to switch over to the new piece, because although the name may be the same, the DNA sequence could be different. That is, the book may contain two pieces with the same name, and we want to be sure to search the new one, not the old one. *) if pie <> nil then begin clearpiece(pie); (* save the links *) dispose(pie); (* close up shop *) end; pie := newpie; end end end else begin pieopen := false end end end; (* origin: search version = 6.39 *) (* end module book.getocp version = 7.72; {of delmod.p 2007 Jul 23} *) (* ************************************************************************ *) (* end module package.getocp version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module copyaline *) procedure copyaline(var fin, fout: text); (* copy a line from file fin to file fout *) begin (* copyaline *) while not eoln(fin) do begin fout^ := fin^; put(fout); get(fin) end; readln(fin); writeln(fout); end; (* copyaline *) (* end module copyaline version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module interact.clearstring *) (* begin module clearstring *) procedure clearstring(var ribbon: string); (* empty the string *) var index: integer; (* to the ribbon *) begin (* clearstring *) with ribbon do begin for index := 1 to maxstring do letters[index] := ' '; length := 0; current := 0; end end; (* clearstring *) procedure initializestring(var ribbon: string); (* start the string with a nil pointer. This routine should be called before doing linked list work. This allows the standard string routines to clear the string without killing the pointer. *) begin (* initializestring *) clearstring(ribbon); ribbon.next := nil; end; (* initializestring *) (* end module clearstring version = 4.86; (@ of prgmod.p 2004 Sep 8 *) (* end module interact.clearstring version = 7.72; {of delmod.p 2007 Jul 23} *) (* begin module interact.writestring *) procedure writestring(var tofile: text; var s: string); (* write the string s to file tofile, no writeln *) var i: integer; (* index to s *) begin (* writestring *) with s do for i := 1 to length do write(tofile, letters[i]) end; (* writestring *) (* end module interact.writestring version = 4.21; (@ of prgmod.p 1997 October 22 *) (* begin module interact.getstring *) procedure getstring(var afile: text; var buffer: string; var gotten: boolean); (* get a string from a file not using string calls. this lets one obtain lines from a file without interactive prompts *) var index: integer; (* of buffer *) begin (* getstring *) clearstring(buffer); if eof(afile) then gotten := false else begin index := 0; while (not eoln(afile)) and (index < maxstring) do begin index := succ(index); read(afile, buffer.letters[index]) end; if not eoln(afile) then begin writeln(output, ' getstring: a line exceeds maximum string size (', maxstring:1,')'); halt end; buffer.length := index; buffer.current := 1; readln(afile); gotten := true end end; (* getstring *) (* end module interact.getstring version = 4.21; (@ of prgmod.p 1997 October 22 *) (* begin module interact.figurestring *) procedure figurestring( var line: string; (* a string of characters to figure out *) var first: integer; (* first found non-blank character in the line *) var last: integer; (* last character before a blank after first *) var whzat: char; (* what the token is *) var c: char; (* the first character of the token *) var i: integer; (* integer value of token if it is integer; or 0 *) var r: real); (* the real value if it is real; or 0.0 *) (* figurestring figures out the tokens in a string. it recognizes words, integers, reals and poorly formed numbers. you can easily use it to parse lines. our goal is to figure out what thing is on a string. start looking at the current place on the line. first and last are the first 'token' in line after start. the current place is updated to the letter after last. the thing found is described by the value of whzat: 'c': character (when the token does not begin with a digit, '+', or '-') 'i': integer 'r': real ' ': blank line 'g': garbage, cannot figure it out and the value of the thing found is the appropriate variable *) var numbers: set of '0'..'9'; sign: integer; (* sign of a number *) numberstart: integer; (* the point a number starts, beyond its sign, if any *) point: integer; (* location of decimal point *) power: integer; (* of 10 representing a place value in the number *) l: integer; (* an index for dissecting numbers *) function figureinteger(first,last:integer):integer; (* figure the integer in the token *) var i: integer; (* index *) sum, increment: integer; begin (* figureinteger *) power:=1; (* start at ones place *) sum:=0; (* start sum at zero *) for i:=last downto first do begin case line.letters[i] of '0': increment:=0; '1': increment:=1; '2': increment:=2; '3': increment:=3; '4': increment:=4; '5': increment:=5; '6': increment:=6; '7': increment:=7; '8': increment:=8; '9': increment:=9 end; sum:=sum+power*increment; power:=power*10 end; figureinteger:=sum end; (* figureinteger *) begin (* figurestring *) numbers:=['0','1','2','3','4','5','6','7','8','9']; (* c:=' '; i:=0; r:=0.0; do not affect these variables unless necessary *) point:=0; whzat := '.'; (* assume that we have someting to work on *) (* now to see if that is true: *) with line do if (length = 0) or (current < 1) or (current > length) then whzat := ' ' else begin (* figure out where the first token is in the line *) first:=line.current; while (line.letters[first]=' ') and (first < line.length) do first:=succ(first); if (first = line.length) and (line.letters[first] = ' ') then whzat := ' '; end; if whzat <> ' ' then begin last:=first; while (line.letters[last]<>' ') and (last < line.length) do last:=succ(last); if line.letters[last] = ' ' then last := pred(last); (* the token is between inclusive first and last *) c:=line.letters[first]; if (c in numbers) or (c in ['+','-']) then begin if c in ['+','-'] then begin case c of '+': sign:=+1; '-': sign:=-1; end; numberstart:=succ(first) end else begin sign:=+1; numberstart:=first end; whzat:='i'; for l:=numberstart to last do begin if not(line.letters[l] in numbers) then if line.letters[l]='.' (* we found a period *) then if whzat='i' (* if so far it is numbers *) then begin whzat:='r'; (* it is actually real *) point:=l end else whzat:='g' (* it is a second '.', ie garbage *) else whzat:='g' (* it is garbage *) end; (* if it is only numbers, it is integer *) (* build number *) (* if it ends in a period, it is integer *) if (whzat = 'r') and (point = last) then whzat:='i'; if whzat = 'i' then begin if point = last (* had an ending decimal point *) then i:=sign * figureinteger(numberstart,pred(last)) else i:=sign * figureinteger(numberstart,last); r:=i end else if whzat = 'r' then begin i:=figureinteger(numberstart,point-1); r:=sign * (i+figureinteger(point+1,last)/power); i:=sign * i end end else begin whzat:='c'; end; (* move the start to just beyond the last character of the token *) line.current:=succ(last) end end; (* figurestring *) (* end module interact.figurestring version = 4.21; (@ of prgmod.p 1997 October 22 *) (* ************************************************************************** *) (* ************************************************************************** *) (* ************************************************************************** *) (* begin module parsestring *) procedure parsestring(var marks: text; var s: string; var m: real; var c1, c2: char; var done: boolean); (* Parse the string s. If the desired variables are found, done is true. If parsing failed, then fail is true *) var gotten: boolean; (* to a string *) (* variables for figurestring *) first: integer; (* first found non-blank character in the line *) last: integer; (* last character before a blank after first *) whzat: char; (* what the token is *) c: char; (* the first character of the token *) i: integer; (* integer value of token if it is integer; or 0 *) r: real; (* the real value if it is real; or 0.0 *) begin { write(marksout,'* parsestring: "'); writestring(marksout, s); writeln(marksout,'"'); writeln(marksout,'* first ',first:1); writeln(marksout,'* last ',last:1); } figurestring(s, first, last, whzat, c, i, r); { writeln(marksout,'* first ',first:1); writeln(marksout,'* last ',last:1); } c1 := c; (* basically c1 := s.letters[1]; but this gets figurestring to move to the next token *) if first <> last then c2 := s.letters[2] else c2 := ' '; { writeln(marksout,'* figurestring: c1 = ',c1 ); } if (c1 = '*') or (c1 = '%') or (c1 = 'o') then begin (* copy a comment line or o command *) writestring(marksout, s); writeln(marksout); end else if c1 = 'p' then begin (* copy the p synchronize as a comment line *) done := true; c2 := 'P'; m := bignumber; (* make it *really* low priority! *) { write(marksout,'* p found in parsestring: '); writestring(marksout, s); writeln(marksout); } end { else if c1 = 'o' then begin (* copy the o line *) writestring(marksout, s); writeln(marksout); write(marksout,'* o found in parsestring: '); writestring(marksout, s); writeln(marksout); writeln(marksout); end } else if c1 = 'u' then begin (* copy the user defined material *) writestring(marksout, s); writeln(marksout); gotten := true; while gotten and (s.letters[1] <> '!') do begin getstring(marks, s, gotten); if gotten then begin writestring(marksout, s); writeln(marksout); end end end else begin (* handle standard symbols *) { writeln(marksout,'* c2 = ',c2); } figurestring(s, first, last, whzat, c, i, r); { writeln(marksout,'* for m figurestring: r= ',r:5:2); } if not (whzat in ['i','r']) then begin writeln(output,'parsestring:', ' could not find a real number or integer.'); writeln(output,'The string is:'); write(output,'"'); writestring(output,s); writeln(output,'"'); for i := 1 to first do write(output,' '); writeln(output,'^ this is not the beginning of an integer'); { writeln(output,'first = ', first:1); writeln(output,'last = ', last:1); } writeln(output,'c1 = ', c1); writeln(output,'c2 = ', c2); done := true; end; m := r; done := true; end end; (* end module parsestring *) (* begin module markget *) procedure markget(var marks, marksout: text; var m: real; var c1, c2: char; var s: string; var gotten: boolean); (* step the marks file to the next mark, and capture it's coordinate and identification characters. This is done by first getting the entire line in string s and then parsing it. *) var done: boolean; (* done with this routine *) begin { writeln(marksout ,'* markget ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'); } done := false; gotten := false; while not done do begin { writeln(marksout,'* ```````````````````````````````'); } if eof(marks) then begin done := true; gotten := false end else begin if eoln(marks) then begin (* copy a blank line *) readln(marks); writeln(marksout); end else begin (* get the first character, c1 *) getstring(marks, s, gotten); { write(output,'string s = "'); writestring(output,s); writeln(output,'"'); } if gotten then parsestring(marks, s, m, c1, c2, done); end; end; end; end; (* end module markget *) (* begin module markput *) procedure markput(var marksout: text; var m: real; var c1, c2: char; var s: string); (* write out the mark, and then clear it *) begin { writeln(marksout,'* markput c1 = ',c1, ' c2 = ',c2, ' m = ',m:1:1); } if c1 = 'p' then begin { zq } writeln(marksout,'* p finished synchronization ',round(m):1); end else if c1 <> ' ' then begin writestring(marksout, s); writeln(marksout); end; m := bignumber; c1 := ' '; c2 := ' '; clearstring(s); end; (* end module markput *) (* begin module mergemarks.themain *) procedure themain(var book, marks1, marks2, mergemarksp, marksout: text); (* the main procedure of the program *) var done: boolean; (* done going through the files *) decreasing: boolean; (* the piece numbering is decreasing *) gotten: boolean; (* got the string s *) m1, m2: real; (* current coordinates *) m1c1, m1c2, m2c1, m2c2: char; (* current characters for the two marks files. m1 is for the marks 1 file; m2 is for marks2 file. c1 is the first character (identifier); c2 is the mark symbol type. *) s1, s2: string; (* entire lines for marks1 and marks2 *) parameterversion: real; (* parameter version number *) theline: integer; (* line number of the book *) waitfor1, waitfor2: boolean; (* wait for the marks file 1 or 2 to catch up to the other. At that point both have a 'p' and are therefore synchronized. *) (* getocp variables: *) org: orgkey; orgchange, orgopen: boolean; chr: chrkey; chrchange, chropen: boolean; pie: pieceptr; piechange, pieopen: boolean; position: integer; (* position in the book *) piecelengthpie: integer; (* piecelength(pie) computed once for efficiency *) pieceisdone: boolean; (* This piece of the book is done since we have found 'p' in both the marks1 and marks2 files. So SKIP to the end of the piece! If this is not done, the piece coordinates will not match the next marks and everything is a mess! *) procedure markcheck(position: integer; decreasing: boolean); (* check the marks and read or print depending on the internal position in a sequence of the book. Handle decreasing coordinates. *) var position1: integer; (* internal position in the book of a mark *) position2: integer; (* internal position in the book of a mark *) (* Note 2011 Oct 27: to handle the cases where there are NO marks in one file or the other, the position1 is set to a value -100 and position2 is set to -200. The point is that the position in internal coordinates is never negative, so these cases get handled properly *) d1, d2: real; (* difference between a mark position and an integer *) onefirst: boolean; (* the mark from file 1 gets to go before a file 2 mark *) twopiecelength: integer; (* twice the length of the piece pie *) procedure doifchain; (* do a chain of if tests *) var pietointroundm1pie: integer; (* precalculated internal position *) pietointroundm2pie: integer; (* precalculated internal position *) begin if (position1 = position2) and (position1 = position) then begin { writeln(output,'position = ',position :1 ); (* bUBBA *) writeln(output,'position1 = ',position1:1 ); (* bUBBA *) writeln(output,'position2 = ',position2:1 ); (* bUBBA *) } (* the two marks are fighting for position. Determine which one should win *) (* The function trunc removes digits after the decimal place, so goes towards zero. To avoid this, add twice the piecelength before taking the trunc. *) twopiecelength := 2 * piecelength(pie); case pie^.key.piedir of minus: begin d1 := trunc(m1+twopiecelength) - m1 - twopiecelength; d2 := trunc(m2+twopiecelength) - m2 - twopiecelength; onefirst := d1 < d2; end; plus: begin d1 := trunc(m1+twopiecelength) - m1 - twopiecelength; d2 := trunc(m2+twopiecelength) - m2 - twopiecelength; onefirst := not (d1 < d2); end; end; { write(output, ' d1 = ',d1:5:2); write(output, ' d2 = ',d2:5:2); write(output, ' m1 = ',m1:5:2); write(output, ' m2 = ',m2:5:2); } (* when they are EXACTLY the same make marks1 go first so that marks2 will overwrite them. *) if d1 = d2 then begin (* SAME *) onefirst := true; end; if onefirst then begin markput( marksout, m1, m1c1, m1c2, s1); markget(marks1, marksout, m1, m1c1, m1c2, s1, gotten); markput( marksout, m2, m2c1, m2c2, s2); markget(marks2, marksout, m2, m2c1, m2c2, s2, gotten); end else begin (* twowins *) markput( marksout, m2, m2c1, m2c2, s2); markget(marks2, marksout, m2, m2c1, m2c2, s2, gotten); markput( marksout, m1, m1c1, m1c2, s1); markget(marks1, marksout, m1, m1c1, m1c2, s1, gotten); end end else if position1 = position (* do just 1 *) then begin writeln(marksout,'* position1 = position (* do just 1 *)'); {zq} markput( marksout, m1, m1c1, m1c2, s1); markget(marks1, marksout, m1, m1c1, m1c2, s1, gotten); end else if position2 = position (* do just 2 *) then begin writeln(marksout,'* position2 = position (* do just 2 *)'); {zq} markput( marksout, m2, m2c1, m2c2, s2); markget(marks2, marksout, m2, m2c1, m2c2, s2, gotten); end; end; (* end doifchain *) begin {if m1c1 <> '*' then writeln(marksout,'* m1c1 = ',m1c1);} if m1c1 = 'p' then begin { writeln(marksout,'* found p in 1'); } waitfor2 := true; end; if m2c1 = 'p' then begin { writeln(marksout,'* found p in 2'); } waitfor1 := true; end; if waitfor1 and waitfor2 then begin (* both are waiting, so they are synchronized! *) writeln(marksout,'p - SYNCHRONOUS FIND'); pieceisdone := true; writeln(marksout,'* pieceisdone ----> waitfor1 and waitfor2'); (* move past the 'p' *) markget(marks1, marksout, m1, m1c1, m1c2, s1, gotten); { 2012 Apr 13: DO NOT MOVE - let other code figure it out!!! (* print the mark *) markput( marksout, m1, m1c1, m1c2, s1); (* prepare for the next mark *) markget(marks1, marksout, m1, m1c1, m1c2, s1, gotten); } markget(marks2, marksout, m2, m2c1, m2c2, s2, gotten); { 2012 Apr 13: DO NOT MOVE - let other code figure it out!!! markput( marksout, m2, m2c1, m2c2, s2); markget(marks2, marksout, m2, m2c1, m2c2, s2, gotten); } {m1 has been set to the big number, so this doesn't work if abs(m1-m2) > 2000 then begin writeln(output, 'ERROR: far apart marks: ',m1:1:1,' ',m2:1:1); halt; end; } waitfor1 := false; waitfor2 := false; end else if waitfor1 then begin (* 2 is waiting for 1 *) markput( marksout, m1, m1c1, m1c2, s1); markget(marks1, marksout, m1, m1c1, m1c2, s1, gotten); if m1c1 = 'p' then begin writeln(marksout,'p - in mark1 finally found'); pieceisdone := true; writeln(marksout,'* pieceisdone ----> waitfor1'); markget(marks2, marksout, m2, m2c1, m2c2, s2, gotten); markget(marks1, marksout, m1, m1c1, m1c2, s1, gotten); waitfor1 := false; end end else if waitfor2 then begin (* 1 is waiting for 2 *) markput( marksout, m2, m2c1, m2c2, s2); markget(marks2, marksout, m2, m2c1, m2c2, s2, gotten); if m2c1 = 'p' then begin writeln(marksout,'p - in mark2 finally found'); pieceisdone := true; writeln(marksout,'* pieceisdone ----> waitfor2'); markget(marks1, marksout, m1, m1c1, m1c2, s1, gotten); markget(marks2, marksout, m2, m2c1, m2c2, s2, gotten); waitfor2 := false; end end else if (not waitfor1) and not (waitfor2) then begin { write (marksout,'* CASE 4'); write (marksout,' m1 = ',m1:1:1); write (marksout,' m2 = ',m2:1:1); writeln(marksout); } { This stuff doesn't work in all cases: if decreasing then begin position1 := pietoint(round(m1-0.5),pie); position2 := pietoint(round(m2-0.5),pie); end else begin position1 := pietoint(round(m1+0.5),pie); position2 := pietoint(round(m2+0.5),pie); end; original: } if m1 <> bignumber then position1 := pietoint(round(m1),pie) else position1 := -100; {zqq} if m2 <> bignumber then position2 := pietoint(round(m2),pie) else position2 := -200; if position1 > 1000 then begin writeln(marksout,'* position = ',position :1 ); (* bUBBA *) writeln(marksout,'* position1 = ',position1:1 ); (* bUBBA *) writeln(marksout,'* position2 = ',position2:1 ); (* bUBBA *) writeln(marksout,'* number = ',number:1 ); (* bUBBA *) writeln(marksout,'* m1 = ',m1:1:1); (* bUBBA *) writeln(marksout,'* m2 = ',m2:1:1); (* bUBBA *) halt; end; { BUBBA } if (position1 <> -100) or (position2 <> -200) then doifchain; end else begin writeln(output,'HUNH?? This case is supposed to be impossible!'); halt end; { writeln(marksout,'* waitfor1: ',waitfor1:5,' m1 = ',m1:1:0); writeln(marksout,'* waitfor2: ',waitfor2:5,' m2 = ',m2:1:0); } end; procedure checkending(var marks, marksout: text; var m: real; var c1, c2: char; var s: string; var gotten: boolean; number: integer); (* check for extra marks left over after the book is done. *) var warned: boolean; (* have we warned the user yet? *) begin { this is overkill if (c1 <> ' ') and (c1 <> 'p') then begin (* P's are ok.*) writeln(output); writeln(output,'WARNING: Extra unused mark in marks', number:1); writestring(output, s); writeln(output); end; } warned := false; if (not eof(marks)) then begin while not eof(marks) do begin markget(marks, marksout, m, c1, c2, s, gotten); if gotten then begin if not warned then begin writeln(output); writeln(output,'WARNING: marks',number:1, ' is not at end-of-file', ' when the book is finished!'); writeln(output,'The extra lines are:'); warned := true; end; writestring(output, s); writeln(output); markput(marksout, m, c1, c2, s); end end; end; end; begin writeln(output,'mergemarks ',version:4:2); reset(mergemarksp); readln(mergemarksp, parameterversion); if parameterversion < updateversion then begin writeln(output, 'You have an old parameter file!'); halt end; brinit(book, theline); orgchange := false; orgopen := false; chrchange := false; chropen := false; piechange := false; pieopen := false; pie := nil; reset(marks1); reset(marks2); rewrite(marksout); writeln(marksout,'* mergemarks ',version:4:2); m1 := bignumber; m1c1 := ' '; m1c2 := ' '; m2 := bignumber; m2c1 := ' '; m2c2 := ' '; clearstring(s1); clearstring(s2); waitfor1 := false; waitfor2 := false; markget(marks1, marksout, m1, m1c1, m1c2, s1, gotten); if m1 = bignumber then begin {if eof(marks1) then writeln(output,'at marks1 eof');} writeln(output,'No marks to use in marks1'); end else begin writeln(output,'Found the first mark at ',m1:10:1, ' in marks1, type: ', m1c1, m1c2); end; markget(marks2, marksout, m2, m2c1, m2c2, s2, gotten); if m2 = bignumber then begin {if eof(marks2) then writeln(output,'at marks2 eof');} writeln(output,'No marks to use in marks2'); { 2012 Mar 26: removed because not symmetrical with marks1: writeln(output,'failed to get m2'); } end else begin writeln(output,'Found the first mark at ',m2:10:1, ' in marks2, type: ', m2c1, m2c2); end; {zq} done := false; while not done do begin (* loop through the book and assign marks *) getocp(book, theline, org, orgchange, orgopen, chr, chrchange, chropen, pie, piechange, pieopen); with pie^.key do decreasing := (piedir <> coodir); if pieopen then begin writeln(marksout,'* piece ',number:1, ' ------------------------------------'); position := 1; pieceisdone := false; piecelengthpie := piecelength(pie); while (position <= piecelengthpie) and (not pieceisdone) do begin markcheck(position, decreasing); position := succ(position); end; writeln(marksout); {if number = 3 then halt;} end else done := true end; (* check for extra marks left over after the book is done *) checkending(marks1, marksout, m1, m1c1, m1c2, s1, gotten, 1); checkending(marks2, marksout, m2, m2c1, m2c2, s2, gotten, 2); end; (* end module mergemarks.themain *) begin themain(book, marks1, marks2, mergemarksp, marksout); 1: end.