program coda(cmp,data,codap,output); (* coda: composition file to data for genhis by thomas dana schneider, copyright 1986 module libraries required: delman, delmods, auxmods *) label 1; (* end of program *) const (* begin module version *) version = 2.05; (* of coda, 1994 Sep 5 origin 1985 apr 20 *) (* end module version *) (* begin module describe.coda *) (* name coda: composition file to data for genhis synopsis coda(cmp: in, data: out, codap: in, output: out) files cmp: a composition, the output of program comp data: identification lines are followed by the number of occurences of each oligo and the sequence of the oligo, one pair per line. the form of the file can be changed using the parameters in codap. codap: parameter file. four parameters, one per line. 1. composition depth to be used in the data file (integer) 2. the least frequent oligo to record in data (integer). 3. the most frequent oligo to record in data (integer). 4. if the first character is 'b', the number of each oligo is given before the oligo, 'a' means after. 'n' means do not give the number. 's' means the data file will be used as input to the search program. no numbers are given and commands to search are made which will result in a list of the locations of the selected oligos. if parameters 2 to 4 are missing they default to 0 100000 b. output: messages to the user description coda converts a composition file from the comp program into a list of oligos. unlike the original composition file, this list may contain all oligos of the length desired (to save space, comp removes an n-long oligo when the two n-1 long oligos inside it do not exist). however, coda can be told to only include frequent or infrequent oligos using the parameter file. two ways to use the data are: 1. use the data file as input to genhis to determine the distribution of the composition. 2. use the 's' feature to generate instructions for the search program. search converts the list of oligos to locations in a sequence. unshi then is used to remove the extra blanks and genhis then gives a map of the locations of rare or common oligos. example file: datat7 see also comp.p, genhis.p, search.p, unshi.p author thomas dana schneider bugs none known *) (* end module describe.coda *) type (* begin module coda.type *) base = (a,c,g,t); (* the components of each oligo *) parameters = record (* variables to control the program *) depth: integer; (* the depth of the composition requested to be put in the data file *) lower: integer; (* lowest frequency oligo to include in data *) upper: integer; (* highest frequency oligo to include in data *) mode: char; (* a - numbers after the oligo b - numbers before the oligo n - no numbers s - data file is input file of search *) end; (* end module coda.type *) (* begin module comp.type *) (* the composition is stored in a tree of these nodes *) compnodeptr = ^compnode; (* points to a node of the tree *) compnode = record (* a node of the composition tree *) count : integer; (* the number of oligos for this node *) son : array[base] of compnodeptr; (* the pointers to 'descendants' of this node of the tree *) end; (* spiders are used to make the composition tree *) spiderptr = ^spider; (* points to a 'spider' *) spider = record (* a spider climbs the composition tree, its path determined by the sequences, and increments 'count' at all the nodes it passes, thereby determining the composition *) depth : integer; (* the level of the node now at *) place : compnodeptr; (* a pointer to the current node *) next : spiderptr; (* the next spider in the collection *) end; (* the total number of composition entries at a given level is stored in the linked list of type comptotal *) compptr = ^comptotal; comptotal = record count: integer; (* the number at a given level *) next: compptr; (* pointer to the next level totals *) end; (* 'path' is used in printing the tree *) pathptr = ^path; (* pointer into a path on the tree *) path = record (* the path of bases to get to a particular node *) bas : base; next : pathptr; end; (* end module comp.type version = 'auxmod 1.37 85 apr 4 gds/tds'; *) var (* begin module var *) cmp,data,codap: text; (* end module var *) (* begin module package.primitive *) (* ************************************************************************ *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module unlimitln *) procedure unlimitln(var afile: text); (* this procedure removes a stupid system dependent limit on the number of lines that one can write to a file. you may remove it from the code if your system does not want or need this. suggested method: place comments around the contents of the procedure. *) begin (* linelimit(afile, maxint); (@ set 'infinite' lines allowed for afile *) end; (* end module unlimitln version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module copyaline *) procedure copyaline(var fin, fout: text); (* copy a line from file fin to file fout *) begin (* copyaline *) while not eoln(fin) do begin fout^ := fin^; put(fout); get(fin) end; readln(fin); writeln(fout); end; (* copyaline *) (* end module copyaline version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module copylines *) function copylines(var fin, fout: text; n: integer): integer; (* copy n lines of file fin to file fout. the actual number of lines copied is returned. *) var index: integer; (* the current line number *) begin (* copylines *) index := 0; while (not eof(fin)) and (index < n) do begin copyaline(fin, fout); index := succ(index) end; copylines := index end; (* copylines *) (* end module copylines version = 'delmod 6.51 85 apr 17 tds/gds' *) (* ************************************************************************ *) (* end module package.primitive version = 'delmod 6.51 85 apr 17 tds/gds' *) (* begin module comp.readcomp *) (* begin module skipoligo *) procedure skipoligo(var thefile: text); (* this procedure is used to skip over an oligonucleotide string. it reads until it comes to a base, and then continues to read until it comes to a blank. no checking is done for non-base characters in between. *) var ch : char; begin repeat read(thefile,ch) until (ch in ['a','c','g','t']); repeat read(thefile,ch) until (ch = ' '); end; (* end module skipoligo version = 'auxmod 1.37 85 apr 4 gds/tds'; *) procedure readcomp(var comp: text; var compmax: integer; readmax: integer; var root: compnodeptr; var monocomptotal: compptr); (* this procedure requires modules: comp.type, skipoligo, halt; this procedure reads from file 'comp' a composition and puts it into the tree pointed to by the 'root' pointer. 'compmax' is the depth of the composition tree which is stored. it is the minimum of 'readmax', the requested depth, and 'detcomp', the depth for which the input file composition was determined. 'monocomptotal' points to the beginning (for the monos) of a linked list which gives the totals for each level of the composition. *) type (* for efficient reading of the information into the tree, each node that has a successor is stored in a queue as well as put into the tree. the variables of the type list are the queue elements. *) listptr = ^list; list = record item: compnodeptr; (* points to the composition tree node *) next: listptr; (* points to the next list item in the queue *) end; var freeitem, (* the list of unused list pointers *) listitem, (* an item in the queue *) first, (* the first item in the queue *) last: listptr; (* the last item in the queue *) comptot, (* the comp total for a given level *) newcomptot: compptr; (* for adding to the string of comptot"s *) detcomp, (* the level to which the input composition was determined *) level, (* of the composition being read, i.e., monos, dis, ... *) number: integer; (* read from the 'comp' file *) ch: char; (* for reading from 'comp' *) ba: base; (* an index *) (* getitem and clearitem provide efficient use of linked list storage by keeping a list of unused pointers that can be allocated instead of always creating 'new' ones *) procedure getitem(var l: listptr); (* obtain a listitem from the free list or by making a new one *) begin if freeitem<>nil then begin l:=freeitem; freeitem:=freeitem^.next end else new(l); l^.next:=nil end; procedure clearitem(var l: listptr); (* return a listitem to the free list *) var lptr: listptr; begin if l<>nil then begin lptr:=l; l:=l^.next; lptr^.next:=freeitem; freeitem:=lptr end end; begin (* readcomp *) reset(comp); if eof(comp) then begin writeln(output,' error: no composition file provided'); halt; end; readln(comp); (* skip the program identification *) readln(comp); (* skip the book identification *) readln(comp,detcomp); (* obtain the determined composition *) readln(comp); (* skip the blank line *) (* determine the level of composition to be stored *) if (readmax < 1) then begin writeln(output); writeln(output,' warning: 0 or negative oligo length requested'); writeln(output,' composition used is depth ',detcomp:1); writeln(output); compmax := detcomp end else if (readmax > detcomp) then begin writeln(output); writeln(output,' warning: requested composition oligo', ' length (',readmax:1,')'); writeln(output,' is larger than the determined composition', ' oligo length (',detcomp:1,').'); writeln(output,' composition used is to depth ',detcomp:1); writeln(output); compmax := detcomp end else compmax := readmax; new(root); new(monocomptotal); new(comptot); new(first); new(last); first^.item := root; first^.next := nil; last := first; new(freeitem); freeitem^.next := nil; (* read in the total number of bases from the composition *) readln(comp); (* skip the * *) readln(comp); (* skip the information line *) readln(comp,number); root^.count := number; repeat read(comp,ch); (* skip the space *) read(comp,ch); (* the ch determines what to do next *) if (ch = '*') then begin (* determine the level about to be read *) readln(comp); readln(comp,level); if (level = 1) then monocomptotal := comptot else begin new(newcomptot); comptot^.next := newcomptot; comptot := newcomptot; end; comptot^.count := 0; comptot^.next := nil end else begin (* read the composition values *) for ba := a to t do begin skipoligo(comp); read(comp,number); if (number <> 0) then begin new(first^.item^.son[ba]); first^.item^.son[ba]^.count := number; comptot^.count := comptot^.count + number; getitem(listitem); last^.next := listitem; last := listitem; last^.next := nil; last^.item := first^.item^.son[ba] end else first^.item^.son[ba] := nil; end; clearitem(first); readln(comp); end; until level = compmax; (* for the last level of compositions to be read we don"t need to store the nodes in the queue because their successors will not be read in. *) repeat for ba := a to t do begin skipoligo(comp); read(comp,number); if (number <> 0) then begin new(first^.item^.son[ba]); first^.item^.son[ba]^.count := number; comptot^.count := comptot^.count + number; end else first^.item^.son[ba] := nil; end; clearitem(first); readln(comp); until (first = nil); end; (* readcomp *) function getcount(root: compnodeptr; start: pathptr): integer; (* this function follows the tree from the node 'root' (which may be the root of a subtree) along the path initiated with 'start', and returns the count of the resulting node. if the path through the tree ever hits a null node in the process the count is returned to be zero. *) var place: compnodeptr; (* a place in the composition tree *) point: pathptr; (* a point in the path through the tree *) begin point:= start; place := root; while ((place <> nil) and (point <> nil)) do begin place := place^.son[point^.bas]; point := point^.next; end; if (place = nil) then getcount := 0 else getcount := place^.count; end; (* end module comp.readcomp version = 'auxmod 1.37 85 apr 4 gds/tds'; *) (* begin module coda.init *) procedure init(var cmp,data,codap: text); (* set up the files *) begin (* init *) writeln(output,' coda ',version:4:2); reset(cmp); rewrite(data); reset(codap); unlimitln(data); end; (* init *) (* end module coda.init *) (* begin module coda.copystart *) procedure copystart(var cmp,data: text); (* pick up the first lines of the cmp file to the data file *) procedure aline; (* copy a line *) begin if copylines(cmp,data,1)<>1 then begin writeln(output,' composition file too short'); halt end; end; begin writeln(data,'* coda ',version:4:2,' from: '); write(data,'*'); aline; (* composition id line *) if not eoln(cmp) then get(cmp); (* skip the space *) aline; (* book id line *) end; (* end module coda.copystart *) (* begin module coda.getparam *) procedure getparam(var codap: text; var param: parameters); (* get the depth of the composition from the codap file *) begin with param do begin if eof(codap) then begin writeln(output,' codap is empty'); halt end; readln(codap,depth); if depth < 0 then begin write(output,' composition depth cannot be negative'); halt end; if eof(codap) then begin (* set up default values *) lower := 0; upper := 100000; mode := 'b' end else begin (* pick up the rest of the parameters *) readln(codap,lower); if lower < 0 then begin writeln(output,' lower limit of number of oligos is 0.'); halt end; if eof(codap) then begin writeln(output,' missing upper parameter'); halt end; readln(codap, upper); if upper < lower then begin writeln(output,' upper boundary cannot be below lower.'); halt end; if eof(codap) then begin writeln(output,' missing mode parameter'); halt end; readln(codap,mode); if (mode <> 'a') and (mode <> 'b') and (mode <> 'n') and (mode <> 's') then begin writeln(output, ' mode must be one of abns.'); halt end end end end; (* end module coda.getparam *) (* begin module coda.puttodata *) procedure puttodata(var data: text; root: compnodeptr; param: parameters); (* put the composition information in the tree root to the data file. use only the level specified by depth, and include all zero values. the output must be in a form acceptable to the genhis program *) const spacer = 10; (* space before each datum *) var count: integer; (* the composition count of an oligo *) d: integer; (* an index to the depth *) oligosput: integer; (* number of oligos put to data file *) p, (* index to the composition path *) proot, (* the root of the composition path *) pwrite: (* index for writing the path *) pathptr; totalcount: integer; (* total of the counts of oligos put *) procedure down(p: pathptr); (* work our way down through the oligo and at the bottom print the count and the oligo out. p is the current pointer to the depth *) var b: base; (* index to the current depth *) dwrite: integer; (* an index for writing the oligo *) begin for b := a to t do begin p^.bas := b; if p^.next <> nil then down(p^.next) else with param do begin count := getcount(root, proot); if lower <= count then if count <= upper then begin (* dump the stuff out to the data file *) if mode='b' then write(data,count:spacer); write(data,' '); pwrite := proot; for dwrite := 1 to depth do begin case pwrite^.bas of a: write(data,'a'); c: write(data,'c'); g: write(data,'g'); t: write(data,'t'); end; pwrite := pwrite^.next end; if mode='a' then write(data,count:spacer); writeln(data); oligosput := oligosput + 1; totalcount := totalcount + count end end end end; (* down *) begin (* puttodata *) with param do writeln(data,'* ',depth:1,' long oligos', ' with occurences from ',lower:1,' to ',upper:1); if param.mode='s' then begin writeln(data,'* coda search mode'); writeln(data,'view nothing view position'); end; (* construct the path for reading at the depth *) new(proot); (* this is the zero depth of the path *) p := proot; for d:=1 to param.depth-1 do begin new(p^.next); p := p^.next end; p^.next := nil; (* start up the counting *) oligosput := 0; totalcount := 0; if param.depth > 0 then down(proot) (* begin at the start of the path *) else writeln(data,getcount(root,nil):spacer); if param.mode='s' then writeln(data,'quit'); writeln(output,' ',oligosput:1,' oligos put to data file'); writeln(output,' representing ',totalcount:1, ' positions in the original sequence(s)'); end; (* end module coda.puttodata *) (* begin module coda.themain *) procedure themain(var cmp,data,codap: text); (* the main procedure of the program *) var compmax: integer; (* deepest composition in cmp *) monoct: compptr; (* keeps readcomp happy here *) param: parameters; (* for controlling the program *) root: compnodeptr; (* the composition tree *) begin (* themain *) init(cmp,data,codap); copystart(cmp,data); getparam(codap,param); readcomp(cmp,compmax,param.depth,root,monoct); if compmax < param.depth then begin writeln(output,' requested depth does not exist.'); halt end; puttodata(data,root,param); end; (* themain *) (* end module coda.themain *) begin (* coda *) themain(cmp,data,codap); 1: end. (* coda *)