program sepa(presites, mixture, sites, nonsites, output); (* sepa: separates delila instruction sets Dr. Thomas D. Schneider National Institutes of Health National Cancer Institute Center for Cancer Research Nanobiology Program Molecular Information Theory Group Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu (use only if first address fails) http://www.ccrnp.ncifcrf.gov/~toms/ module libraries required: delman, delmods *) label 1; (* end of program *) const (* begin module version *) version = 2.18; (* of sepa.p 2005 Sep 13 2005 Sep 13; 2.18: spelling: asterisk 2000 Oct 9; 2.17: no blank before org/chr 1999 Oct 6: 2.16: bug fix about names 1998 Jul 27: program upgraded to accept delila instructions with 'same' in them. 1998 July 10: the progam was producing piece names without gets. These would cause alist do be unable to correlate pieces. The revision in printpie is to only print piece names if there are gets. previous change: 1994 sep 5 origin before 1982 july 7 *) (* end module version *) (* begin module describe.sepa *) (* name sepa: separates delila instruction sets synopsis sepa(presites: in, mixture: in, sites: out, nonsites: out, output: out) files presites: delila instructions for sites of interest. they are in any order and may contain several references to the same place. (let us call this a.) mixture: delila instructions for both sites and nonsites, as obtained from the search program. (let us call this b.) sites: the presites are reordered and redundant requests are removed. (these reordered instructions we will call a".) nonsites: the mixture is reordered, redundant requests and requests in the presites instructions are removed. (using the previous notation, this would be (b-a)".) output: messages to the user. description The separate program has two main purposes: 1) to eliminate redundancy in both the site and the nonsite sets. 2) to eliminate the sites from the nonsite set. The delila instructions must be in the form output by the search program (as in delmods book.iw modules). Once the separation is completed, you may obtain the aligned book by using delila. documentation delman.use.data.flow, nar 10(9): 2971 and 2997 1982 see also search.p, delila.p, alist.p author Thomas D. Schneider bugs Sepa can not tell that these instructions are identical: get from 56 -10 to 56 +10 direction -; get from 56 -10 to 56 +10; because the second one may not be direction -. This potential problem can be avoided by always giving the direction. Also, it is advisable to make aligned listings with alist to be sure that the new aligned book is correct. Names assigned to a piece and that change for a given piece may get overwritten. *) (* end module describe.sepa *) (* internal strategy of separate. the presite instruction set is read in. duplications of previous specifications are ignored. new specifications are stored in a specification tree: * the organism specifications are a linked list of names. * each organism specification points to a linked list of chromosome specification names. * each chromosome specification points to a linked list of piece specification names. * each piece specification points to a linked list of get requests. * each get request contains: a relative amount from, a position, a relative amount to and a direction. as the presite instructions are read, the site tree grows. gets are put in numerical order. unused instructions (not put in the tree) are printed directly to the site file. the site instructions are then printed out to the sites file. now the presite instructions are read, but the sites tree is refered to, to eliminate sites from the nonsite tree that is built. the nonsite instructions are then printed out to the nonsites file. abbreviations used: org = organism chr = chromosome pie = piece get = get request spec = specification rec = request ptr = pointer *) (* more constants *) (* begin module sepa.const *) verbose = false; (* give debugging information *) (* end module sepa.const *) (* begin module book.const *) (* constants needed for book manipulations *) dnamax = 3000; (* length of dna arrays *) namelength = 20; (* maximum key name length *) linelength = 80; (* maximum line readable in book *) (* end module book.const version = 'delmod 6.51 85 apr 17 tds/gds' *) type (* array to store names (is normally from book.type) *) alpha = packed array[1..namelength] of char; (* instruction tree types *) orgspecptr = ^orgspec; chrspecptr = ^chrspec; piespecptr = ^piespec; getreqptr = ^getreq; orgspec = record nam: alpha; chr: chrspecptr; next: orgspecptr end; chrspec = record nam: alpha; pie: piespecptr; next: chrspecptr end; piespec = record nam: alpha; (* name of the piece *) isinstname: boolean; (* there is a name instruction *) instname: alpha; (* name in the prior name instruction *) get: getreqptr; next: piespecptr end; getreq = record fr: integer; (* from relative to po *) po: integer; (* position *) tr: integer; (* to relative to po *) dp: boolean; (* direction is plus *) givedirection: boolean; (* to give the direction as in the instructions read, or not *) next: getreqptr end; var presites, mixture, sites, nonsites: text; (* the entire instruction sets stored in internal format: *) sitetree, nonsitetree: orgspecptr; none: orgspecptr; (* used to make a null sites *) totalgets: integer; (* total number of gets counted in the prints *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 4.39; (@ of prgmod.p 1999 November 28 *) (* ************************************************************************ *) (* start: procedures for initialization *) procedure startname(var nam: alpha); var index: integer; begin for index:=1 to namelength do nam[index]:=' ' end; procedure startorg(var org: orgspecptr); begin new(org); with org^ do begin startname(nam); chr:=nil; next:=nil end end; procedure startchr(var chr: chrspecptr); begin new(chr); with chr^ do begin startname(nam); pie:=nil; next:=nil end end; procedure startpie(var pie: piespecptr); begin new(pie); with pie^ do begin startname(nam); get:=nil; next:=nil end end; procedure startget(var get: getreqptr); begin new(get); with get^ do begin fr:=0; po:=0; tr:=0; dp:=true; (* default position direction *) givedirection:=false; next:=nil end end; procedure startset(var pre, post: text; var org: orgspecptr); begin reset(pre); rewrite(post); org:=nil end; (* ************************************************************************ *) (* the next set of procedures are used to read instructions *) (* begin module findblank *) procedure findblank(var afile: text); (* read a file to find the next blank character *) var ch: char; begin repeat read(afile,ch) until ch = ' ' end; (* end module findblank version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module findnonblank *) procedure findnonblank(var afile: text; var ch: char); (* find the next non blank character in a file, return it in ch. *) begin ch:=' '; while (not eof(afile)) and (ch = ' ') do begin read(afile,ch); if eoln(afile) then readln(afile) end end; (* end module findnonblank version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module skipblanks *) procedure skipblanks(var thefile: text); (* skip over blanks until a non-blank, or end of line, is found *) begin while (thefile^ = ' ') and not eoln(thefile) do get(thefile); end; procedure skipnonblanks(var thefile: text); (* skip over nonblanks until a blank, or end of line, is found *) begin while (thefile^ <> ' ') and not eoln(thefile) do get(thefile); end; procedure skipcolumn(var thefile: text); (* skip over a data column *) begin skipblanks(thefile); skipnonblanks(thefile) end; (* end module skipblanks version = 4.21; (@ of prgmod.p 1997 October 22 *) procedure readname(var afile: text; var nam: alpha); (* read name from a file *) var index: integer; ch: char; begin {write(output,'readname:"');} startname(nam); ch:=' '; index:=0; while ch <> ';' do begin read(afile,ch); {write(output,ch);} if ch<>';' then begin index:=succ(index); nam[index]:=ch end end {;writeln(output,'"'); } end; procedure readorg(var afile: text; var org: orgspecptr); begin if org = nil then startorg(org); findblank(afile); with org^ do begin readname(afile,nam); chr:=nil; next:=nil end end; procedure readchr(var afile: text; var chr: chrspecptr); begin if chr = nil then startchr(chr); findblank(afile); with chr^ do begin readname(afile,nam); pie:=nil; next:=nil end end; procedure readpie(var afile: text; var pie: piespecptr); begin if pie = nil then startpie(pie); findblank(afile); with pie^ do begin readname(afile,nam); get:=nil; next:=nil end end; procedure readget(var afile: text; var get: getreqptr); var ch: char; begin if get = nil then startget(get); findblank(afile); (* get past 'get' *) findnonblank(afile,ch); (* get to 'from' *) findblank(afile); (* get past 'from' *) with get^ do begin read(afile,po); read(afile,fr); findnonblank(afile,ch); (* get to 'to' *) findblank(afile); (* get past 'to' *) skipblanks(afile); (* get up to the "same" or a number *) if afile^ = 's' then begin skipnonblanks(afile); (* get past the "same" *) end else begin read(afile,po); (* oh well, duplicate read ... *) end; read(afile,tr); findnonblank(afile,ch); if ch = 'd' then begin (* pick up direction *) givedirection:=true; findblank(afile); (* get past 'direction' *) findnonblank(afile,ch); (* pick up '+' or '-' *) if ch = '-' then dp:=false else dp:=true end else givedirection:=false; while ch <> ';' do read(afile,ch) end end; (* ************************************************************************ *) (* the next procedures find an object in the tree and put objects into the tree *) procedure moveorg(var org, ref, cur: orgspecptr); (* move the current pointer cur in ref to the name org, or set cur to nil *) var index: orgspecptr; found: boolean; begin found:=false; index:=ref; while (not found) and (index <> nil) do begin found:=(org^.nam = index^.nam); if not found then index:=index^.next end; if found then cur:=index (* current sites *) else cur:=nil (* none found *) end; procedure movechr(var chr, ref, cur: chrspecptr); (* move the current pointer cur in ref to the name chr, or set cur to nil *) var index: chrspecptr; found: boolean; begin found:=false; index:=ref; while (not found) and (index <> nil) do begin found:=(chr^.nam = index^.nam); if not found then index:=index^.next end; if found then cur:=index (* current sites *) else cur:=nil (* none found *) end; procedure movepie(var pie, ref, cur: piespecptr); (* move the current pointer cur in ref to the name pie, or set cur to nil *) var index: piespecptr; found: boolean; begin found:=false; index:=ref; while (not found) and (index <> nil) do begin found:=(pie^.nam = index^.nam); if not found then index:=index^.next end; if found then cur:=index (* current sites *) else cur:=nil (* none found *) end; function findget(var get, ref: getreqptr): boolean; (* find get in the sites list. return true if it was found, and set cur (current sites) to point to the found get. *) var index: getreqptr; found: boolean; begin found:=false; index:=ref; while (not found) and (index <> nil) do begin found:= (get^.po = index^.po) and (* position and ... *) ( ( (get^.dp = index^.dp) and get^.givedirection and index^.givedirection ) or ( (not get^.givedirection) and (not index^.givedirection)) ); (* ... directions are the same and both directions are given or neither direction is given *) if not found then index:=index^.next end; findget:=found end; procedure putorg(var org, ref, cur: orgspecptr); (* put org into the built sites list (ref) if it is not already there. set cur to org or the found org *) var preindex, index: orgspecptr; found: boolean; begin if ref = nil then begin (* first insertion *) ref:=org; cur:=org; org:=nil end else begin preindex:=nil; index:=ref; found:=false; while (not found) and (index <> nil) do begin found:=(org^.nam = index^.nam); preindex:=index; index:=index^.next end; if found then cur:=preindex (* move current pointer *) else begin (* put on end *) preindex^.next:=org; cur:=org; org:=nil end end end; procedure putchr(var chr, ref, cur: chrspecptr); (* put chr into the built sites list (ref) if it is not already there. set cur to chr or the found chr *) var preindex, index: chrspecptr; found: boolean; begin {zzz} if ref = nil then begin (* first insertion *) ref:=chr; cur:=chr; chr:=nil end else begin preindex:=nil; index:=ref; found:=false; while (not found) and (index <> nil) do begin found:=(chr^.nam = index^.nam); preindex:=index; index:=index^.next end; if found then cur:=preindex (* move current pointer *) else begin (* put on end *) preindex^.next:=chr; cur:=chr; chr:=nil end end end; procedure putpie(var pie, ref, cur: piespecptr); (* put pie into the built sites list (ref) if it is not already there. set cur to pie or the found pie *) var preindex, index: piespecptr; found: boolean; begin if ref = nil then begin (* first insertion *) ref:=pie; cur:=pie; pie:=nil end else begin preindex:=nil; index:=ref; found:=false; while (not found) and (index <> nil) do begin found:=(pie^.nam = index^.nam); preindex:=index; index:=index^.next end; if found then cur:=preindex (* move current pointer *) else begin (* put on end *) preindex^.next:=pie; cur:=pie; pie:=nil end end end; procedure putget(var get, gbui: getreqptr); var preindex, index: getreqptr; equal, after: boolean; begin preindex:=nil; index:=gbui; equal:=false; after:=false; while (index<>nil) and (not equal) and (not after) do begin equal:=(index^.po = get^.po); after:=(index^.po > get^.po); if not after then begin preindex:=index; index:=index^.next end end; if not equal then begin if index=nil then begin (* not found or no list *) if preindex=nil then gbui:=get (* first insertion *) else preindex^.next:=get; (* tack on at end *) get:=nil end else if after then begin if preindex = nil then begin (* insert before first *) get^.next:=index; gbui:=get end else begin (* insert in middle *) preindex^.next:=get; get^.next:=index end; get:=nil end end end; (* ************************************************************************ *) (* the following procedures write out instructions *) procedure iwritename(var thefile: text; thename: alpha); var index: integer; begin index:=1; while (thename[index] <> ' ') and (index <= namelength) do begin write(thefile, thename[index]); index:=succ(index) end end; procedure iwriteorg(var afile: text; org: orgspecptr); (* write an organism specification. *) begin write(afile,'organism '); iwritename(afile,org^.nam); writeln(afile,';'); end; procedure iwritechr(var afile: text; chr: chrspecptr); (* write an chromosome specification. *) begin write(afile,'chromosome '); iwritename(afile,chr^.nam); writeln(afile,';'); end; procedure iwritepie(var afile: text; pie: piespecptr); (* write a piece specification *) begin write(afile,'piece '); iwritename(afile,pie^.nam); writeln(afile,';'); end; procedure iwriteget(var afile: text; g: getreqptr); (* print a get delila instruction from g^.fr to g^.tr around g^.po. *) procedure iwriteplace(relative: integer); begin (* iwriteplace *) write(afile,' ',g^.po:1); if relative >= 0 then write(afile,' +', relative:1) else if relative < 0 then write(afile,' ',relative :1) end; begin (* iwriteget *) write(afile,'get from'); iwriteplace(g^.fr); write(afile,' to'); iwriteplace(g^.tr); if g^.givedirection then begin write(afile,' direction '); if g^.dp then write(afile,'+') else write(afile,'-'); end; writeln(afile,';'); totalgets:=succ(totalgets); (* global variable *) end; (* this procedure is best not coded: procedure printget(var afile: text; g: getreqptr); begin iwriteget(afile, g) end; since it is so simple *) procedure printpie(var afile: text; p: piespecptr); (* print the piece information, only if there are gets *) var index: getreqptr; begin index:=p^.get; if index <> nil then begin iwritepie(afile,p); while index <> nil do begin if p^.isinstname then begin write(afile,'name '); iwritename(afile,p^.instname); write(afile,'; '); end; iwriteget(afile, index); (* printget *) index:=index^.next end end end; procedure printchr(var afile: text; c: chrspecptr); (* print this chromosome and its underlings *) var index: piespecptr; begin iwritechr(afile,c); index:=c^.pie; while index<>nil do begin printpie(afile,index); index:=index^.next end end; procedure printorg(var afile: text; o: orgspecptr); (* print this organism and its underlings *) var index: chrspecptr; begin iwriteorg(afile,o); index:=o^.chr; while index<>nil do begin printchr(afile,index); index:=index^.next end end; procedure printinst(var afile: text; root: orgspecptr); (* print the instructions of this tree to afile *) var index: orgspecptr; begin index:=root; while index<>nil do begin (* writeln(afile); do not add extra lines *) printorg(afile,index); index:=index^.next end end; procedure readinst(var pre, post: text; var ref, bui: orgspecptr); (* read in instructions from pre file, copying extra statements into post (so comments and titles and etc will be all in post.) build the instruction tree into obui. *) var (* order of these variables: currently specified sites for elimination currently manipulated objects currently built object *) oref, o, obui: orgspecptr; cref, c, cbui: chrspecptr; pref, p, pbui: piespecptr; g : getreqptr; ch: char; (* reading character *) isname: boolean; (* was there a name instruction? *) n: alpha; (* captures the current name *) { comment1: boolean; (* a comment of type 1, from left parenthesis-asterisk to asterisk-right parenthesis *) comment2: boolean; (* a comment of type 2, from left curlie bracket to right curlie bracket *) wascomment: boolean; (* one of the comment types has been seen *) } previous: char; (* the character before ch *) done: boolean; (* done skipping instructions or comments *) begin o:=nil; oref:=nil; obui:=nil; c:=nil; cref:=nil; cbui:=nil; p:=nil; pref:=nil; pbui:=nil; g:=nil; isname := false; while not eof(pre) do begin if eoln(pre) then begin readln(pre); (* writeln(post); do not write automatically! 2000 oct 9 *) if verbose then writeln(output) end else begin findnonblank(pre,ch); {writeln(output,ch);} if not eoln(pre) then begin if ch in ['n','o','c','p','g'] then case ch of 'o': begin if verbose then writeln(output,'[org found]'); readorg(pre,o); moveorg(o,ref,oref); putorg(o,bui,obui) end; 'c': begin if verbose then writeln(output,'[chr found]'); readchr(pre,c); {zzzz} {writeln(output,'done readchr');} if oref <> nil then movechr(c,oref^.chr,cref); {writeln(output,'done if');} if obui = nil then begin writeln(output,'chr instruction found', ' but there is no org instruction!'); halt; end; putchr(c,obui^.chr,cbui) {;writeln(output,'done putchr');} end; 'n': begin (* read the name for the next piece *) if verbose then writeln(output,'[name found]'); isname := true; {yyy} findblank(pre); findnonblank(pre,ch); readname(pre,n); end; 'p': begin if verbose then writeln(output,'[piece found]'); readpie(pre,p); (* if there was a name instruction, insert now *) if isname then begin {yyy} p^.instname := n; p^.isinstname := true; isname := false; (* turn off flag *) end else p^.isinstname := false; if cref <> nil then movepie(p,cref^.pie,pref); putpie(p,cbui^.pie,pbui) end; 'g': begin if verbose then writeln(output,'[get found]'); readget(pre,g); if pref = nil then putget(g,pbui^.get) else if not findget(g,pref^.get) then putget(g,pbui^.get) end end else begin (* copy any other instruction or comments to post *) if (ch = '{') then begin (* deal with curlie comments *) write(post,ch); done := false; while not done do begin if eoln(pre) then begin readln(pre); writeln(post); end else begin read(pre,ch); write(post,ch); if ch = '}' then done := true end end; (* cr after the comment if needed *) if eoln(pre) then begin readln(pre); writeln(post); end end else if (ch = '(') then begin (* deal with paren comments *) write(post,ch); done := false; while not done do begin if eoln(pre) then begin readln(pre); writeln(post); end else begin {zzzyyy} previous := ch; read(pre, ch); write(post,ch); if (previous = '*') and (ch = ')') then done := true end end; (* cr after the comment if needed *) if eoln(pre) then begin readln(pre); writeln(post); end end else begin (* copy a statement out to the ";" *) write(post,ch); done := false; while not done do begin if eoln(pre) then begin readln(pre); writeln(post); end else begin read(pre, ch); write(post,ch); if ch = ';' then done := true end end end; (* break lines 2000 oct 9 *) if ch = ';' then writeln(post); { This old method could not handle multiple instructions per line. while not eoln(pre) do begin read(pre,ch); write(post,ch) end; readln(pre); writeln(post) } end end end end end; procedure writeversion(var afile: text); (* write the version to a file *) begin writeln(afile,'(* sepa ', version:4:2, ' *)'); end; begin (* sepa *) writeln(output,'sepa ',version:4:2); totalgets:=0; none:=nil; startset(presites, sites, sitetree); readinst(presites, sites, none, sitetree); if verbose then writeln(output,'done first readinst'); writeln(sites,'(* sites *)'); writeversion(sites); printinst(sites, sitetree); writeln(output,' gets written in sites: ',totalgets:1); writeln(sites); writeln(sites,'(* gets written in sites: ',totalgets:1,' *)'); totalgets:=0; startset(mixture, nonsites, nonsitetree); readinst(mixture, nonsites, sitetree, nonsitetree); if verbose then writeln(output,'done second readinst'); writeln(nonsites,'(* nonsites *)'); writeversion(nonsites); printinst(nonsites, nonsitetree); writeln(nonsites); writeln(nonsites,'(* gets written in nonsites: ',totalgets:1,' *)'); writeln(output,' gets written in nonsites: ',totalgets:1); 1: end. (* sepa *)