program count(book,list,output); (* count the amount of sequence in a book Dr. Thomas D. Schneider National Institutes of Health National Cancer Institute Center for Cancer Research Nanobiology Program Molecular Information Theory Group Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu (use only if first address fails) http://www.ccrnp.ncifcrf.gov/~toms/ modules from: delman, delmods *) label 1; (* end of program *) const (* begin module version *) version = 3.08; (* of count.p 2007 Mar 14 2007 Mar 14, 3.08: output fullname if available 1999 Jul 15, 3.08: output number of sequences 1991 Aug 6, 3.07: previous changes 1982 jun 13, 1.00: origin before this date *) (* end module version *) (* begin module describe.count *) (* name count: count the amount of sequence in a Delila system book synopsis count(book: in, list: out, output: out); files book: any book from the delila system list: list of the book contents. Each line: key name of each piece (keynam) full name of each piece (fulnam, if it exists) the number of bases in each piece At the end preceeded by '*': the total length of sequence in the book the total number of bases output: messages to the user: count [version number] sequences bases description Count is a tiny tool, much like a tooth pick, that is handy to have around. The count is based on the coordinate system of each piece, not on the actual number of bases. documentation see also {program that checks piece coordinate systems:} catal.p author Thomas D. Schneider bugs technical notes If the number of bases does not match the coordinate system, then no warning is given to the user. If you want to check this, use the catal program. *) (* end module describe.count *) (* begin module book.const *) (* constants needed for book manipulations *) dnamax = 1024; (* length of dna arrays *) namelength = 100; (* maximum key name length *) linelength = 200; (* maximum line readable in book *) (* end module book.const version = 7.70; {of delmod.p 2005 Sep 15} *) type (* begin module book.type *) (* types needed for book manipulations *) chset = set of 'a'..'z'; (* types defined in book definition *) alpha = packed array[1..namelength] of char; (* this is not alfa *) (* name is a left justified string with blanks following the characters *) name = record letters: alpha; length: 0..namelength (* zero means an unspecified structure *) end; lineptr = ^line; line = record (* a line of characters *) letters: packed array [1..linelength] of char; length: 0..linelength; next: lineptr end; direction = (plus, minus, dircomplement, dirhomologous); configuration = (linear, circular); state = (on, off); header = record (* header of key *) keynam: name; (* key name of structure *) fulnam: lineptr; (* full name of structure *) note: lineptr (* note key *) end; (* begin module base.type *) (* define the four nucleotide bases *) base = (a,c,g,t); (* end module base.type version = 7.70; {of delmod.p 2005 Sep 15} *) (* sequence types *) dnaptr = ^dnastring; dnarange = 0..dnamax; seq = packed array[1..dnamax] of base; dnastring = record part: seq; length: dnarange; next: dnaptr end; orgkey = record (* organism key *) hea: header; mapunit: lineptr (* genetic map units *) end; chrkey = record (* chromosome key *) hea: header; mapbeg: real; (* number of genetic map beginning *) mapend: real (* number of genetic map ending *) end; pieceptr = ^piece; piekey = record (* piece key *) hea: header; mapbeg: real; (* genetic map beginning *) coocon: configuration; (* configruation (circular/linear) *) coodir: direction; (* direction (+/-) relative to genetic map *) coobeg: integer; (* beginning nucleotide *) cooend: integer; (* ending nucleotide *) piecon: configuration; (* configruation (circular/linear) *) piedir: direction; (* direction (+/-) relative to coordinates *) piebeg: integer; (* beginning nucleotide *) pieend: integer; (* ending nucleotide *) end; piece = record key: piekey; dna: dnaptr end; reference = record pienam : name; (* name of piece referred to *) mapbeg : real; (* genetic map beginning *) refdir : direction; (* direction relative to coordinates *) refbeg : integer; (* beginning nucleotide *) refend : integer; (* ending nucleotide *) end; genkey = record (* gene key *) hea : header; ref : reference; end; trakey = record (* transcript key *) hea : header; ref : reference; end; markerptr = ^marker; markey = record (* marker key *) hea : header; ref : reference; sta : state; phenotype : lineptr; next : markerptr; end; marker = record key : markey; dna : dnaptr; end; (* end module book.type version = 7.70; {of delmod.p 2005 Sep 15} *) var (* begin module book.var *) (* ************************************************************************ *) (* global variables needed for book manipulations *) (* free storage: *) freeline: lineptr; (* unused lines *) freedna: dnaptr; (* unused dnas *) readnumber: boolean; (* whether to read a number from the notes, or to read in the notes *) number: integer; (* the number of the item just read *) numbered: boolean; (* true when the item just read is numbered *) skipunnum: boolean; (* a control variable to allow skipping of un-numbered items in the book *) (* ************************************************************************ *) (* end module book.var version = 7.70; {of delmod.p 2005 Sep 15} *) book, list: text; apiece: pieceptr; (* a piece *) length: integer; (* the length of apiece *) total: integer; (* total length of the sequences in a book *) sequences: integer; (* number of the sequences in a book *) theline: integer; (* the current line number in the book *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 7.70; {of delmod.p 2005 Sep 15} *) (* begin module package.getpiece *) (* ************************************************************************ *) (* begin module package.brpiece *) (* ************************************************************************ *) (* begin module book.basis *) (* procedures needed for book manipulations *) (* get procedures should be used for all linked lists of records *) procedure getline(var l: lineptr); (* obtain a line from the free line list or by making a new one *) begin if freeline<>nil then begin l:=freeline; freeline:=freeline^.next end else new(l); l^.length:=0; l^.next:=nil end; procedure getdna(var l: dnaptr); begin if freedna<>nil then begin l:=freedna; freedna:=freedna^.next end else new(l); l^.length:=0; l^.next:=nil end; (* clear procedures should be called each time the records are no longer needed failure to do this may result in a stack overflow. *) procedure clearline(var l: lineptr); (* return a line to the free line list *) var lptr: lineptr; begin if l<>nil then begin lptr:=l; l:=l^.next; lptr^.next:=freeline; freeline:=lptr end end; procedure writeline(var afile: text; l: lineptr; carriagereturn: boolean); (* write a line to a file, with carriage return if carriagereturn is true. *) var index: integer; (* index to characters in l *) begin with l^ do begin for index := 1 to length do write(afile, letters[index]); end; if carriagereturn then writeln(afile); end; procedure showfreedna; (* show the freedna list *) var counter: integer; (* count of freedna list *) l: dnaptr; (* pointer into freedna list *) begin l := freedna; counter := 0; while l <> nil do begin counter := succ(counter); write(output,counter:1); write(output, ', length = ',l^.length:1); { This is illegal according to gpc because one cannot write a pointer to a text file. It can be unearthed for debugging. write(output, ', pointer id: ',l:1); } writeln(output); l := l^.next end; end; procedure cleardna(var l: dnaptr); (* clear the dna strutures to the free list *) var lptr: dnaptr; begin if l<>nil then begin lptr:=l; l:=l^.next; lptr^.next:=freedna; freedna:=lptr end end; procedure clearheader(var h: header); (* clear the header h (remove lines to free storage) *) begin with h do begin clearline(fulnam); while note<>nil do clearline(note) end end; procedure clearpiece(var p: pieceptr); (* clear the dna of the piece *) begin while p^.dna<>nil do cleardna(p^.dna); clearheader(p^.key.hea) end; function chartobase(ch:char):base; (* convert a character into a base *) begin case ch of 'a': chartobase:=a; 'c': chartobase:=c; 'g': chartobase:=g; 't': chartobase:=t end end; function basetochar(ba:base):char; (* convert a base into a character *) begin case ba of a: basetochar:='a'; c: basetochar:='c'; g: basetochar:='g'; t: basetochar:='t'; end end; function complement(ba:base):base; (* take the complement of ba *) begin case ba of a: complement:=t; c: complement:=g; g: complement:=c; t: complement:=a; end end; function chomplement(b: char): char; (* create the character complement of base b. I must be getting hungry! *) begin chomplement := basetochar(complement(chartobase(b))); end; function pietoint(p: integer; pie: pieceptr): integer; (* p is a coordinate on the piece. we want to transform p into a number from 1 to n: an internal coordinate system for easy manipulation of piece coordinates *) (* Note: the dirhomologous and dircomplement are treated as plus and minus directions, which MIGHT NOT BE RIGHT! *) var i: integer; (* an intermediate value *) begin with pie^.key do begin case piedir of dirhomologous, plus: if p>=piebeg then i:=p-piebeg+1 else i:=(p-coobeg)+(cooend-piebeg)+2; dircomplement, minus: if p<=piebeg then i:=piebeg-p+1 else i:=(cooend-p)+(piebeg-coobeg)+2 end; pietoint:=i end end; function inttopie(i: integer; pie: pieceptr):integer; (* i is in the range 1 to some maximum. it is an internal coordinate system for the program. we want to do a coordinate transformation to obtain a value in the range of the piece called pie: i=1 corresponds to piebeg and i=its maximum corresponds to pieend *) (* Note: the dirhomologous and dircomplement are treated as plus and minus directions, which MIGHT NOT BE RIGHT! *) var p: integer; (* an intermediate value *) begin with pie^.key do begin case piedir of dirhomologous, plus: begin p:=piebeg+(i-1); if p>cooend then if coocon=circular then p:=p-(cooend-coobeg+1) end; dircomplement, minus: begin p:=piebeg-(i-1); if p '*' then begin writeln(output,' procedure skipstar: bad book'); writeln(output,' "*" expected as first character on the line, but "', thefile^,'" was found'); halt end; get(thefile); (* skip the star *) if thefile^ <> ' ' then begin writeln(output,' procedure skipstar: bad book'); writeln(output,' "* " expected on a line but "*', thefile^,'" was found'); halt end; get(thefile) (* skip the blank *) end end; (* skipstar *) (* end module book.skipstar version = 7.70; {of delmod.p 2005 Sep 15} *) (* begin module book.brreanum *) procedure brreanum(var thefile: text; var theline: integer; var reanum: real); (* read a real number from the file *) begin skipstar(thefile); readln(thefile,reanum); theline := succ(theline) end; (* end module book.brreanum version = 7.70; {of delmod.p 2005 Sep 15} *) (* begin module book.brnumber *) procedure brnumber(var thefile: text; var theline: integer; var num: integer); (* read a number from the file *) begin skipstar(thefile); readln(thefile,num); theline := succ(theline) end; (* end module book.brnumber version = 7.70; {of delmod.p 2005 Sep 15} *) (* begin module book.brname *) procedure brname(var thefile: text; var theline: integer; var nam: name); (* read a name from the file *) var i: integer; (* an index to the name *) c: char; (* a character read *) begin (* brname *) skipstar(thefile); with nam do begin length:=0; repeat length:=succ(length); read(thefile,c); letters[length] := c until (eoln(thefile)) or (length>=namelength) or (letters[length]=' '); if letters[length]=' ' then length:=length-1; if length ',linelength:1,' characters'); writeln(output,'* Only ',linelength:1,' characters read from book'); writeln(output,'***********************************************'); end; l^.length:=i; l^.next:=nil; readln(thefile); theline := succ(theline) end; (* end module book.brline version = 7.70; {of delmod.p 2005 Sep 15} *) (* begin module book.brdirect *) procedure brdirect(var thefile: text; var theline: integer; var direct: direction); (* read a direction *) var ch: char; begin skipstar(thefile); readln(thefile,ch); theline := succ(theline); if ch='+' then direct:=plus else direct:=minus end; (* end module book.brdirect version = 7.70; {of delmod.p 2005 Sep 15} *) (* begin module book.brconfig *) procedure brconfig(var thefile: text; var theline: integer; var config: configuration); (* read a configuration *) var ch: char; begin skipstar(thefile); readln(thefile,ch); theline := succ(theline); if ch='l' then config:=linear else config:=circular end; (* end module book.brconfig version = 7.70; {of delmod.p 2005 Sep 15} *) (* begin module book.brnotenumber *) procedure brnotenumber(var thefile: text; var theline: integer; var note: lineptr); (* book note reading to obtain the number of the object. the procedure returns the value of the number as a global. (this is not such a good practice, but we are stuck with it for now.) *) begin (* brnotenumber *) note:=nil; numbered := false; number := 0; (* force number to zero if there is no number at all *) (* the next character is n or * depending on whether there are notes *) if thefile^ = 'n' then begin readln(thefile); theline := succ(theline); if thefile^ <> 'n' then begin skipstar(thefile); if not eoln(thefile) then begin if thefile^ = '#' then begin numbered := true; get(thefile); (* move past the number symbol *) read(thefile,number); end end; repeat readln(thefile); theline := succ(theline) until thefile^ = 'n'; readln(thefile); theline := succ(theline) end else begin readln(thefile); theline := succ(theline) end end end; (* brnotenumber *) (* end module book.brnotenumber version = 7.70; {of delmod.p 2005 Sep 15} *) (* begin module book.brnote *) procedure brnote(var thefile: text; var theline: integer; var note: lineptr); (* read note key *) var newnote: lineptr; (* the new note *) previousnote: lineptr; (* the last line of the notes *) begin (* brnote *) note:=nil; if thefile^ = 'n' then begin (* enter note *) readln(thefile); theline := succ(theline); if thefile^ <> 'n' then begin (* abort null note (n/n) *) getline(note); newnote:=note; while thefile^ <> 'n' do begin (* wait until end of note *) brline(thefile,theline,newnote); previousnote:=newnote; (* get next note *) getline(newnote^.next); newnote:=newnote^.next; end; (* last note was not used, so: *) clearline(newnote); previousnote^.next:=nil; readln(thefile); theline := succ(theline); end else begin readln(thefile); theline := succ(theline); end; end end; (* brnote *) (* end module book.brnote version = 7.70; {of delmod.p 2005 Sep 15} *) (* begin module book.brheader *) procedure brheader(var thefile: text; var theline: integer; var hea: header); (* read the header of a key. *) begin with hea do begin readln(thefile); (* move past the object name - new definition 1999 Mar 13 *) theline := succ(theline); {bbb} (* read key name *) brname(thefile,theline,keynam); (* read full name *) getline(fulnam); brline(thefile,theline,fulnam); (* read note key *) if readnumber then brnotenumber(thefile,theline,note) else brnote(thefile,theline,note) end end; (* end module book.brheader version = 7.70; {of delmod.p 2005 Sep 15} *) (* begin module book.copyheader *) procedure copyheader(fromhea: header; var tohea: header); (* copy the header fromhea into tohea. Note that the linked objects are NOT copied, but merely pointed to. *) begin tohea.keynam.letters := fromhea.keynam.letters; tohea.keynam.length := fromhea.keynam.length; tohea.note := fromhea.note; tohea.fulnam := fromhea.fulnam; end; (* end module book.copyheader version = 7.70; {of delmod.p 2005 Sep 15} *) (* begin module book.brpiekey *) procedure brpiekey(var thefile: text; var theline: integer; var pie: piekey); (* read piece key, track the line number *) begin with pie do begin brheader(thefile,theline,hea); brreanum(thefile,theline,mapbeg); brconfig(thefile,theline,coocon); brdirect(thefile,theline,coodir); brnumber(thefile,theline,coobeg); brnumber(thefile,theline,cooend); brconfig(thefile,theline,piecon); brdirect(thefile,theline,piedir); brnumber(thefile,theline,piebeg); brnumber(thefile,theline,pieend); end end; (* end module book.brpiekey version = 7.70; {of delmod.p 2005 Sep 15} *) (* begin module book.brdna *) procedure brdna(var thefile: text; var theline: integer; var dna: dnaptr); (* read in dna from thefile, track the line *) (* note: if the dna were circularized, by linking the last dnastring to the first, then the cleardna routine could not clear properly, and would loop forever... there is no reason to do that, since a simple mod function will allow one to access the circle. *) var ch: char; workdna: dnaptr; begin getdna(dna); workdna:=dna; ch:=getto(thefile,theline,['d']); readln(thefile); theline := succ(theline); read(thefile,ch); (* skipstar *) while (ch = '*') do begin read(thefile,ch); (* skip blank *) repeat read(thefile,ch); if ch in ['a','c','g','t'] then begin if workdna^.length=dnamax then begin getdna(workdna^.next); workdna:=workdna^.next end; workdna^.length:=succ(workdna^.length); workdna^.part[workdna^.length]:=chartobase(ch) end until eoln(thefile); readln(thefile); (* go to next line *) theline := succ(theline); read(thefile,ch); (* ch is either '*' or 'd' *) end; readln(thefile); (* read past the d *) theline := succ(theline); end; (* end module book.brdna version = 7.70; {of delmod.p 2005 Sep 15} *) (* begin module book.brpiece *) procedure brpiece(var thefile: text; var theline: integer; var pie: pieceptr); (* read in a piece, change theline to reflect the lines traversed *) begin { readln(thefile); (* move past the word 'piece' - new definition 1999 Mar 13 *) theline := succ(theline); (* BUG: was below! *) bbb} brpiekey(thefile,theline,pie^.key); if numbered or (not skipunnum) then brdna(thefile,theline,pie^.dna); readln(thefile); (* move past the word 'piece' - new definition 1999 Mar 13 *) theline := succ(theline); end; (* end module book.brpiece version = 7.70; {of delmod.p 2005 Sep 15} *) (* begin module book.brinit *) procedure brinit(var book: text; var theline: integer); (* check that the book is ok to read, and set up the global variables for br routines *) begin (* brinit *) (* halt if the book is bad (first word is 'halt') or the first character is not * *) reset(book); if not eof(book) then begin (* check for the date line *) if book^ <> '*' then begin if book^ <> 'h' then writeln(output, ' this is not the first line of a book:') else writeln(output, ' bad book:'); write(output, ' '); while not (eoln(book) or eof(book)) do begin write(output, book^); get(book) end; writeln(output); halt end end else begin writeln(output, ' book is empty'); halt end; (* initialize free storage *) freeline:=nil; freedna:=nil; readnumber:=true; (* usually we read in numbers for items *) number:=0; (* arbitrary value *) numbered:=false; (* the piece has no number (none yet read in) *) skipunnum:=false; theline := 1; end; (* brinit *) (* end module book.brinit version = 7.70; {of delmod.p 2005 Sep 15} *) (* ************************************************************************ *) (* end module package.brpiece version = 7.70; {of delmod.p 2005 Sep 15} *) (* begin module book.getpiece *) procedure getpiece(var thefile: text; var theline: integer; var pie: pieceptr); (* move to and read in the next piece in the book *) var ch: char; begin ch:=getto(thefile,theline,['p']); (* get to the next p(iece) in the book *) if ch<>' ' then begin brpiece(thefile,theline,pie); { 1999 june 2: removed this: ch:=getto(thefile,theline,['p']); (* read to end of p *) } { bbb - now done in brpiece readln(thefile); (* read past piece *) theline := succ(theline); } end else clearpiece(pie); end; (* end module book.getpiece version = 7.70; {of delmod.p 2005 Sep 15} *) (* ************************************************************************ *) (* end module package.getpiece version = 7.70; {of delmod.p 2005 Sep 15} *) (* begin module copyaline *) procedure copyaline(var fin, fout: text); (* copy a line from file fin to file fout *) begin (* copyaline *) while not eoln(fin) do begin fout^ := fin^; put(fout); get(fin) end; readln(fin); writeln(fout); end; (* copyaline *) (* end module copyaline version = 7.70; {of delmod.p 2005 Sep 15} *) (* begin module writename *) procedure writename(var afile: text; s: name); (* write the string s to afile *) var i: integer; (* index to the string s *) begin with s do for i := 1 to length do write(afile,letters[i]); end; (* end module writename version = 9.25; (@ of lister.p 1999 June 9 *) begin (* count *) writeln(output,'count ',version:4:2); brinit(book, theline); rewrite(list); writeln(list,'* count ',version:4:2); copyaline(book,list); new(apiece); total:=0; sequences:=0; writeln(list,'* keynam fulnam bases'); while not eof(book) do begin getpiece(book,theline,apiece); if not eof(book) then begin length:=piecelength(apiece); total:=total+length; sequences:=succ(sequences); { original: writeln(list,' ',apiece^.key.hea.keynam.letters, ' ',length:6,' bases'); } writename(list,apiece^.key.hea.keynam); if (apiece^.key.hea.fulnam^.length > 0) then begin write(list, ' '); writeline(list, apiece^.key.hea.fulnam, false); end else begin write(list, ' -'); end; write(list, ' ',length:1); writeln(list); clearpiece(apiece) (* clear the piece for reuse *) end end; writeln(list,'*'); { writeln(list,'* the total length of sequence in the book is ', total:1,' bases.'); writeln(list,'* the number of sequence in the book is ', sequences:1); } writeln(list,'* ',sequences:1, ' sequences are in the book'); writeln(list,'* ',total:1, ' bases are in the book'); writeln(output,sequences:1, ' sequences are in the book'); writeln(output,total:1, ' bases are in the book'); 1: end. (* count *)