program auxmod(hst,cmp,patt,output); (* auxiliary modules Gary Stormo and Thomas Schneider Dr. Thomas D. Schneider National Cancer Institute Laboratory of Experimental and Computational Biology Molecular Information Theory Group Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu (use only if first address fails) http://www.lecb.ncifcrf.gov/~toms/ *) label 1; (* end of program *) const (* begin module version *) version = 1.41; (* of delila.p 2004 Sep 8 2004 Sep 8, 1.41 upgrade for GPC 1994 Sep 5, 1.40 last major change 1986 dec 12: origin *) (* end module version *) (* begin module describe.auxmod *) (* name auxmod: modules for auxiliary programs synopsis auxmod(hst: in, cmp: in, patt: in, output: out) files hst: a histogram from hist for testing, or empty cmp: a composition from comp for testing, or empty patt: a pattern matrix from patlrn for testing, or empty output: the version of auxmod is printed. test results are printed. successful compilation and running of the program indicates that the modules are correct. description auxmod is a collection of modules used only rarely in various auxiliary programs. it includes modules for reading compositions (comp.), histograms (hist.), helix lists (findcolon and gethelix) and pattern matrices (matrix.). see also delmod.p, module.p, hist.p, comp.p, patlrn.p author gary d. stormo and thomas d. schneider bugs none known *) (* end module describe.auxmod *) (* begin module hist.const *) maxhistwidth = 120; (* end module hist.const *) (* begin module matrix.const *) maxmatrix = 120; (* the maximum size of a pattern matrix *) (* end module matrix.const *) type (* begin module base.type *) (* declare the type base to avoid need for modules from delmods *) base = (a, c, g, t); (* end module base.type *) (* begin module hist.type *) histptr = ^histarray; histarray = array[1..maxhistwidth] of integer; histogram = record zero : histptr; mono : array[base] of histptr; di : array[base,base] of histptr; tri : array[base,base,base] of histptr; end; (* end module hist.type *) (* begin module comp.type *) (* the composition is stored in a tree of these nodes *) compnodeptr = ^compnode; (* points to a node of the tree *) compnode = record (* a node of the composition tree *) count : integer; (* the number of oligos for this node *) son : array[base] of compnodeptr; (* the pointers to 'descendants' of this node of the tree *) end; (* spiders are used to make the composition tree *) spiderptr = ^spider; (* points to a 'spider' *) spider = record (* a spider climbs the composition tree, its path determined by the sequences, and increments 'count' at all the nodes it passes, thereby determining the composition *) depth : integer; (* the level of the node now at *) place : compnodeptr; (* a pointer to the current node *) next : spiderptr; (* the next spider in the collection *) end; (* the total number of composition entries at a given level is stored in the linked list of type comptotal *) compptr = ^comptotal; comptotal = record count: integer; (* the number at a given level *) next: compptr; (* pointer to the next level totals *) end; (* 'path' is used in printing the tree *) pathptr = ^path; (* pointer into a path on the tree *) path = record (* the path of bases to get to a particular node *) bas : base; next : pathptr; end; (* end module comp.type version = 'auxmod 1.25 82 dec 12 gds/tds'; *) (* begin module matrix.type *) matrix = array[base,1..maxmatrix] of integer; (* a pattern matrix *) (* end module matrix.type *) var hst, (* an input histogram *) cmp, (* an input composition *) patt: text; (* an input pattern matrix *) (* variables for histogram reading *) histmin, histmax, histwidth, histbegin, numseqs: integer; histo: histogram; (* variables for composition reading *) compmax, readmax: integer; root: compnodeptr; monocomptotal: compptr; (* variables for pattern matrix reading *) beginning, pattwidth: integer; wmatrix: matrix; (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 'prgmod 3.96 85 mar 18 tds'; *) (* begin module basetochar *) function basetochar(ba:base):char; (* convert a base into a character *) begin case ba of a: basetochar:='a'; c: basetochar:='c'; g: basetochar:='g'; t: basetochar:='t'; end end; (* end module basetochar *) (* begin module skipoligo *) procedure skipoligo(var thefile: text); (* this procedure is used to skip over an oligonucleotide string. it reads until it comes to a base, and then continues to read until it comes to a blank. no checking is done for non-base characters in between. *) var ch : char; begin repeat read(thefile,ch) until (ch in ['a','c','g','t']); repeat read(thefile,ch) until (ch = ' '); end; (* end module skipoligo *) (* begin module hist.readhist *) procedure readhist(var hist: text; var histmin, histmax, histwidth, histbegin, numseqs: integer; var histo: histogram); (* this procedure requires modules: hist.const, hist.type, skipoligo; this procedure takes a file 'hist', which is the output of the hist program, and returns the information in that file. the variables are: histmin: the shortest oligo counted in the histogram; histmax: the longest oligo counted in the histogram; histwidth: the width of the histogram; histbegin: the first position in the histogram; numseqs: the number of sequences counted in the histogram. the actual histogram data is returned in the variable 'histo'. *) var b1,b2,b3 : base; i : integer; begin (* get the histogram parameters *) reset(hist); readln(hist); readln(hist); readln(hist,histmin); readln(hist,histmax); readln(hist,numseqs); readln(hist,histwidth); readln(hist); readln(hist,histbegin); readln(hist); (* get the histogram data from histmin to histmax *) if (histmin = 0) then begin new(histo.zero); for i := 1 to histwidth do read(hist,histo.zero^[i]) end else histo.zero := nil; if (histmin <= 1) and (histmax >= 1) then for b1 := a to t do begin skipoligo(hist); new(histo.mono[b1]); for i := 1 to histwidth do read(hist,histo.mono[b1]^[i]) end else for b1 := a to t do histo.mono[b1] := nil; if ((histmin <= 2) and (histmax >= 2)) then for b1 := a to t do for b2 := a to t do begin skipoligo(hist); new(histo.di[b1,b2]); for i := 1 to histwidth do read(hist,histo.di[b1,b2]^[i]) end else for b1 := a to t do for b2 := a to t do histo.di[b1,b2] := nil; if (histmax >= 3) then for b1 := a to t do for b2 := a to t do for b3 := a to t do begin skipoligo(hist); new(histo.tri[b1,b2,b3]); for i := 1 to histwidth do read(hist,histo.tri[b1,b2,b3]^[i]) end else for b1 := a to t do for b2 := a to t do for b3 := a to t do histo.tri[b1,b2,b3] := nil; end; (* end module hist.readhist *) (* begin module comp.readcomp *) (* begin module skipoligo *) (* end module skipoligo *) procedure readcomp(var comp: text; var compmax: integer; readmax: integer; var root: compnodeptr; var monocomptotal: compptr); (* this procedure requires modules: comp.type, skipoligo, halt; this procedure reads from file 'comp' a composition and puts it into the tree pointed to by the 'root' pointer. 'compmax' is the depth of the composition tree which is stored. it is the minimum of 'readmax', the requested depth, and 'detcomp', the depth for which the input file composition was determined. 'monocomptotal' points to the beginning (for the monos) of a linked list which gives the totals for each level of the composition. *) type (* for efficient reading of the information into the tree, each node that has a successor is stored in a queue as well as put into the tree. the variables of the type list are the queue elements. *) listptr = ^list; list = record item: compnodeptr; (* points to the composition tree node *) next: listptr; (* points to the next list item in the queue *) end; var freeitem, (* the list of unused list pointers *) listitem, (* an item in the queue *) first, (* the first item in the queue *) last: listptr; (* the last item in the queue *) comptot, (* the comp total for a given level *) newcomptot: compptr; (* for adding to the string of comptot"s *) detcomp, (* the level to which the input composition was determined *) level, (* of the composition being read, i.e., monos, dis, ... *) number: integer; (* read from the 'comp' file *) ch: char; (* for reading from 'comp' *) ba: base; (* an index *) (* getitem and clearitem provide efficient use of linked list storage by keeping a list of unused pointers that can be allocated instead of always creating 'new' ones *) procedure getitem(var l: listptr); (* obtain a listitem from the free list or by making a new one *) begin if freeitem<>nil then begin l:=freeitem; freeitem:=freeitem^.next end else new(l); l^.next:=nil end; procedure clearitem(var l: listptr); (* return a listitem to the free list *) var lptr: listptr; begin if l<>nil then begin lptr:=l; l:=l^.next; lptr^.next:=freeitem; freeitem:=lptr end end; begin (* readcomp *) reset(comp); if eof(comp) then begin writeln(output,' error: no composition file provided'); halt; end; readln(comp); (* skip the program identification *) readln(comp); (* skip the book identification *) readln(comp,detcomp); (* obtain the determined composition *) readln(comp); (* skip the blank line *) (* determine the level of composition to be stored *) if (readmax < 1) then begin writeln(output); writeln(output,' warning: 0 or negative oligo length requested'); writeln(output,' composition used is depth ',detcomp:1); writeln(output); compmax := detcomp end else if (readmax > detcomp) then begin writeln(output); writeln(output,' warning: requested composition oligo', ' length (',readmax:1,')'); writeln(output,' is larger than the determined composition', ' oligo length (',detcomp:1,').'); writeln(output,' composition used is to depth ',detcomp:1); writeln(output); compmax := detcomp end else compmax := readmax; new(root); new(monocomptotal); new(comptot); new(first); new(last); first^.item := root; first^.next := nil; last := first; new(freeitem); freeitem^.next := nil; (* read in the total number of bases from the composition *) readln(comp); (* skip the * *) readln(comp); (* skip the information line *) readln(comp,number); root^.count := number; repeat read(comp,ch); (* skip the space *) read(comp,ch); (* the ch determines what to do next *) if (ch = '*') then begin (* determine the level about to be read *) readln(comp); readln(comp,level); if (level = 1) then monocomptotal := comptot else begin new(newcomptot); comptot^.next := newcomptot; comptot := newcomptot; end; comptot^.count := 0; comptot^.next := nil end else begin (* read the composition values *) for ba := a to t do begin skipoligo(comp); read(comp,number); if (number <> 0) then begin new(first^.item^.son[ba]); first^.item^.son[ba]^.count := number; comptot^.count := comptot^.count + number; getitem(listitem); last^.next := listitem; last := listitem; last^.next := nil; last^.item := first^.item^.son[ba] end else first^.item^.son[ba] := nil; end; clearitem(first); readln(comp); end; until level = compmax; (* for the last level of compositions to be read we don"t need to store the nodes in the queue because their successors will not be read in. *) repeat for ba := a to t do begin skipoligo(comp); read(comp,number); if (number <> 0) then begin new(first^.item^.son[ba]); first^.item^.son[ba]^.count := number; comptot^.count := comptot^.count + number; end else first^.item^.son[ba] := nil; end; clearitem(first); readln(comp); until (first = nil); end; (* readcomp *) function getcount(root: compnodeptr; start: pathptr): integer; (* this function follows the tree from the node 'root' (which may be the root of a subtree) along the path initiated with 'start', and returns the count of the resulting node. if the path through the tree ever hits a null node in the process the count is returned to be zero. *) var place: compnodeptr; (* a place in the composition tree *) point: pathptr; (* a point in the path through the tree *) begin point:= start; place := root; while ((place <> nil) and (point <> nil)) do begin place := place^.son[point^.bas]; point := point^.next; end; if (place = nil) then getcount := 0 else getcount := place^.count; end; (* end module comp.readcomp version = 'auxmod 1.25 82 dec 12 gds/tds'; *) (* ************************************************************************ *) (* begin module findcolon *) procedure findcolon(var thefile: text); (* move the file to the characters just past ': ' (colon, space) *) var found: boolean; procedure die; begin writeln(output,' no helix list data'); halt end; begin found:=false; while not found do begin if eof(thefile) then die else if thefile^ = ':' then begin get(thefile); if eof(thefile) then die else if thefile^ = ' ' then begin get(thefile); found:=true end end else get(thefile) end end; (* end module findcolon version = 3.13; (* of matrix 1985 march 13 *) (* begin module gethelix *) procedure gethelix(var hlist: text; var x,y: integer; var length: integer; getenergy: boolean; var energy: real; var done: boolean); (* get the (x,y,length) info on some helix listed in hlist. if the x,y pair changes, then done is true and no values are returned. checknames may be used at this point to start the next pair. if getenergy is true, then the routine expects that there will be an energy on the helix line *) begin (* gethelix *) if eof(hlist) then done:=true else begin get(hlist); (* skip the blank *) if hlist^ <> ' ' then done:=true else begin done:=false; findcolon(hlist); read(hlist,x); findcolon(hlist); read(hlist,y); findcolon(hlist); read(hlist,length); if getenergy then read(hlist, energy) else energy := 0; readln(hlist) end end end; (* gethelix *) (* end module gethelix version = 3.13; (* of matrix 1985 march 13 *) (* begin module complines *) function complines(var a,b: text): boolean; (* compare one line from each of files a and b starting at the current file positions. both lines are compared up to and including the end-of-line mark (i.e., they are both readln-ed). return true if the lines are identical and false if they are not. ignore trailing blanks. this is done by letting chara or charb assume the value of a blank when eoln is true. when both lines are eoln, the next line is inspected. if one line is at end-of-file and the other is not, 'false' is returned. if both lines are at eof, 'true' is returned. *) var chara,charb: char; (* characters in files a and b *) done: boolean; (* stop looking for identity *) begin if eof(a) and eof(b) then complines := true else if eof(a) or eof(b) then complines := false else begin (* neither file is eof. *) done := false; while not done do begin if eoln(a) then chara := ' ' else read(a,chara); if eoln(b) then charb := ' ' else read(b,charb); if eoln(a) and eoln(b) then begin done := true; complines := true end else if chara <> charb then begin done := true; complines := false end; end; readln(a); readln(b); end end; (* complines *) (* end module complines version = 'prgmod 3.96 85 mar 18 tds'; *) (* begin module hlistvsbooks *) procedure hlistvsbooks (var hlist : text; (* list of helices from helix *) var xbook,ybook : text (* the delila books *) ); (* checks the title line in xbook with the corresponding line in hlist, ditto ybook. if they are not identical, error messages are sent to output and the program halts. written by britta singer *) var xasx, (* true if xbook input file corresponds to xbook in hlist *) yasy, (* true if ybook input file corresponds to ybook in hlist *) xasy, (* true if xbook input file corresponds to ybook in hlist *) yasx: (* true if ybook input file corresponds to xbook in hlist *) boolean; procedure gettitleline(var xbook,ybook,hlist : text); (* gets to the lines in the xbook, ybook, and hlist input files that contain the titles and thus are to be compared *) begin (* gettitleline *) reset(xbook); reset(ybook); reset(hlist); readln(hlist) end; (* gettitleline *) procedure findtitle(var hlist : text); (* places the cursor at the beginning of the title in hlist *) var n : integer; (* counter for skipping characters *) begin (* findtitle *) for n := 1 to 4 do get(hlist) end; (* findtitle *) begin (* hlistvsbooks *) gettitleline(xbook,ybook,hlist); findtitle(hlist); xasx := complines(xbook,hlist); findtitle(hlist); yasy := complines(ybook,hlist); gettitleline(xbook,ybook,hlist); findtitle(hlist); yasx := complines(ybook,hlist); findtitle(hlist); xasy := complines(xbook,hlist); if not xasx then writeln(output, ' xbook input file does not correspond to xbook in hlist.'); if not yasy then writeln(output, ' ybook input file does not correspond to ybook in hlist.'); if xasy and (not xasx) then writeln(output, ' xbook input file corresponds to ybook in hlist.'); if yasx and (not yasy) then writeln(output, ' ybook input file corresponds to xbook in hlist.'); if xasy and yasx and (not xasx) and (not yasy) then writeln(output, ' xbook and ybook input files are reversed. try again.'); if not (xasx and yasy) then halt end; (* hlistvsbooks *) (* end module hlistvsbooks *) (* ************************************************************************ *) (* begin module matrix.readmatrix *) procedure findlearnend(var pattern: text); (* locate the end of the cyclic learning part of the pattern file *) const learnmax = 16; (* the size of learnend *) var (* this array contains the pattern that defines the end *) learnend: packed array[1..learnmax] of char; state: integer; (* how close we are to finding learnend *) begin (* findlearnend *) reset(pattern); learnend := 'end of learning.'; state := 1; while not eof(pattern) and (state < learnmax) do begin if learnend[state] = pattern^ then state := succ(state) else state :=1; if eoln(pattern) then readln(pattern) else get(pattern) end; if eof(pattern) then begin write (output,'pattern matrix does not contain "'); writeln(output,learnend,'" signal'); halt end end; (* findlearnend *) procedure getcolon(var f: text); (* move to the next colon in file f *) begin (* getcolon *) while f^<>':' do begin get(f); if eof(f) then begin writeln(output,'pattern is missing colons'); halt end end; get(f); (* move past the colon *) end; (* getcolon *) procedure readmatrix(var thefile: text; var wmatrix: matrix; var beginning, width: integer); (* read a pattern matrix wmatrix from the file thefile, returning the aligned position for the first position (beginning) and the width *) var i : integer; (* an index *) ba : base; (* an index *) begin (* readmatrix *) findlearnend(thefile); (* skip min and max values *) getcolon(thefile); getcolon(thefile); getcolon(thefile); (* skip to range *) read(thefile,beginning); (* read first position of the matrix *) getcolon(thefile); (* skip to width *) read(thefile,width); (* read width of the matrix *) if (width > maxmatrix) then begin writeln(output,'input matrix too large'); halt; end; for i := 1 to width do (* read in matrix *) begin getcolon(thefile); (* skip position label *) for ba := a to t do read(thefile,wmatrix[ba,i]); readln(thefile); end; (* set all the unused elements of wmatrix to 0 *) for i := (width + 1) to maxmatrix do for ba := a to t do wmatrix[ba,i] := 0; end; (* readmatrix *) (* end module matrix.readmatrix *) (* begin module matrix.readthresholds *) procedure readthresholds(var pattern: text; var funcmin, nfuncmax: integer); (* find the funcmin and nfuncmax of the pattern *) begin (* readthresholds *) reset(pattern); findlearnend(pattern); getcolon(pattern); readln(pattern,funcmin); getcolon(pattern); readln(pattern,nfuncmax) end; (* readthresholds *) (* end module matrix.readthresholds *) (* ************************************************************************ *) (* ************************************************************************ *) procedure demohistread; (* a demonstration of the hist reading procedures *) begin readhist(hst,histmin,histmax,histwidth,histbegin,numseqs,histo); writeln(output,' the histogram has the following features:'); writeln(output,histmin:10,' is the minimum oligo searched for'); writeln(output,histmax:10,' is the maximum oligo searched for'); writeln(output,histwidth:10,' is the width of the histogram'); writeln(output,histbegin:10,' is the first position of the histogram'); writeln(output,numseqs:10, ' is the number of sequences analyzed'); end; procedure democompread; (* a demonstration of the composition reading procedures *) var comptot: compptr; (* the linked list of composition totals *) begin readmax:= 100; (* to be sure we pick all the composition *) readcomp(cmp,compmax,readmax,root,monocomptotal); writeln(output,' the composition has the following features:'); writeln(output,compmax:10,' is the maximum length determined'); writeln(output,' the total number of oligos at each level,', ' beginning with the monos, is:'); new(comptot); comptot := monocomptotal; while (comptot <> nil) do begin writeln(output,comptot^.count:10); comptot := comptot^.next; end; end; procedure demomatrixread; (* a demonstration of the pattern matrix reading procedures *) var funcmin, nfuncmax: integer; (* for readthresholds *) begin writeln(output,' the pattern matrix has the following features:'); readmatrix(patt,wmatrix,beginning,pattwidth); writeln(output,pattwidth:10,' is the width of the matrix'); writeln(output,beginning:10,' is the first position of the matrix'); readthresholds(patt,funcmin,nfuncmax); writeln(output,funcmin:10,' is the functional minimum'); writeln(output,nfuncmax:10,' is the non-functional maximum'); end; begin (* auxmod *) writeln(output,version); writeln(output,'library of auxiliary modules'); reset(hst); writeln(output); if not eof(hst) then demohistread else writeln(output,'demonstration of histogram reading requires', ' non-empty hst file.'); reset(cmp); writeln(output); if not eof(cmp) then democompread else writeln(output,'demonstration of composition reading requires', ' non-empty cmp file.'); reset(patt); writeln(output); if not eof(patt) then demomatrixread else writeln(output,'demonstration of matrix reading requires', ' non-empty patt file. '); 1: end. (* auxmod *)