program aran(book, aranp, list, sequ, output); (* aran: aligned random sequences by Thomas Schneider copyright 1990 module libraries: delman, delmods, prgmods *) label 1; (* end of program *) const (* begin module version *) version = 1.16; (* of aran.p 1994 Sep 5 origin 1990 Oct 2 from alist *) (* end module version *) (* begin module describe.aran *) (* name aran: aligned random sequences synopsis aran(book: in, aranp: in, list: out, sequ: out, output: out) files book: the book generated by Delila aranp: Parameters to control the program. The FIRST LINE must contain one real number which is the degree of conservation. For example, if this is 0.85, then each base will have 85% chance of being the same, while the other bases will be 5% each. The SECOND LINE must contain the number of sequences to generate. list: details of the run. sequ: the aligned sequences, for input to makebk output: messages to the user description Aran takes a sequence as a starting point and generates random sequences from it. The program simulates a very simple dirty synthesis of the sequence. The synthesis is to be mostly the bases given in the sequence. The probability of conserving each base (f) is defined in the parameter file. If a particular base is not conserved, then the other three bases are assigned probabilities of (1-f)/3. documentation delman.use.aligned.books see also alist.p author Thomas D. Schneider bugs See alist technical notes The program constant seqmax defines the length of the longest sequence that can be created. *) (* end module describe.aran *) (* begin module aran.const *) seqmax = 100; (* maximum size of the sequence array *) (* end module aran.const *) (* begin module book.const *) (* constants needed for book manipulations *) dnamax = 3000; (* length of dna arrays *) namelength = 20; (* maximum key name length *) linelength = 80; (* maximum line readable in book *) (* end module book.const version = 'delmod 6.58 90 Jan 2 tds/gds' *) type (* begin module book.type *) (* types needed for book manipulations *) chset = set of 'a'..'z'; (* types defined in book definition *) alpha = packed array[1..namelength] of char; (* this is not alfa *) (* name is a left justified string with blanks following the characters *) name = record letters: alpha; length: 0..namelength (* zero means an unspecified structure *) end; lineptr = ^line; line = record (* a line of characters *) letters: packed array [1..linelength] of char; length: 0..linelength; next: lineptr end; direction = (plus, minus, dircomplement, dirhomologous); configuration = (linear, circular); state = (on, off); header = record (* header of key *) keynam: name; (* key name of structure *) fulnam: lineptr; (* full name of structure *) note: lineptr (* note key *) end; (* base types *) base = (a,c,g,t); dnaptr = ^dnastring; dnarange = 0..dnamax; seq = packed array[1..dnamax] of base; dnastring = record part: seq; length: dnarange; next: dnaptr end; orgkey = record (* organism key *) hea: header; mapunit: lineptr (* genetic map units *) end; chrkey = record (* chromosome key *) hea: header; mapbeg: real; (* number of genetic map beginning *) mapend: real (* number of genetic map ending *) end; pieceptr = ^piece; piekey = record (* piece key *) hea: header; mapbeg: real; (* genetic map beginning *) coocon: configuration; (* configruation (circular/linear) *) coodir: direction; (* direction (+/-) relative to genetic map *) coobeg: integer; (* beginning nucleotide *) cooend: integer; (* ending nucleotide *) piecon: configuration; (* configruation (circular/linear) *) piedir: direction; (* direction (+/-) relative to coordinates *) piebeg: integer; (* beginning nucleotide *) pieend: integer; (* ending nucleotide *) end; piece = record key: piekey; dna: dnaptr end; reference = record pienam : name; (* name of piece referred to *) mapbeg : real; (* genetic map beginning *) refdir : direction; (* direction relative to coordinates *) refbeg : integer; (* beginning nucleotide *) refend : integer; (* ending nucleotide *) end; genkey = record (* gene key *) hea : header; ref : reference; end; trakey = record (* transcript key *) hea : header; ref : reference; end; markerptr = ^marker; markey = record (* marker key *) hea : header; ref : reference; sta : state; phenotype : lineptr; next : markerptr; end; marker = record key : markey; dna : dnaptr; end; (* end module book.type version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* begin module aran.type *) seqmatrix = record data: array[a..t,1..seqmax] of real; (* creation matrix *) length: integer; (* portion of the matrix in use *) end; (* end module aran.type *) var book, (* the book to be randomized *) list, (* details of the run *) sequ, (* sequences generated *) aranp: (* parameters *) text; (* begin module book.var *) (* ************************************************************************ *) (* global variables needed for book manipulations *) (* free storage: *) freeline: lineptr; (* unused lines *) freedna: dnaptr; (* unused dnas *) readnumber: boolean; (* whether to read a number from the notes, or to read in the notes *) number: integer; (* the number of the item just read *) numbered: boolean; (* true when the item just read is numbered *) skipunnum: boolean; (* a control variable to allow skipping of un-numbered items in the book *) (* ************************************************************************ *) (* end module book.var version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* begin module copyaline *) procedure copyaline(var fin, fout: text); (* copy a line from file fin to file fout *) begin (* copyaline *) while not eoln(fin) do begin fout^ := fin^; put(fout); get(fin) end; readln(fin); writeln(fout); end; (* copyaline *) (* end module copyaline version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* ************************************************************************ *) (* begin module package.nextbase *) (* ************************************************************************ *) (* begin module package.getpiece *) (* ************************************************************************ *) (* begin module package.brpiece *) (* ************************************************************************ *) (* begin module book.basis *) (* procedures needed for book manipulations *) (* get procedures should be used for all linked lists of records *) procedure getline(var l: lineptr); (* obtain a line from the free line list or by making a new one *) begin if freeline<>nil then begin l:=freeline; freeline:=freeline^.next end else new(l); l^.length:=0; l^.next:=nil end; procedure getdna(var l: dnaptr); begin if freedna<>nil then begin l:=freedna; freedna:=freedna^.next end else new(l); l^.length:=0; l^.next:=nil end; (* clear procedures should be called each time the records are no longer needed failure to do this may result in a stack overflow. *) procedure clearline(var l: lineptr); (* return a line to the free line list *) var lptr: lineptr; begin if l<>nil then begin lptr:=l; l:=l^.next; lptr^.next:=freeline; freeline:=lptr end end; procedure cleardna(var l: dnaptr); var lptr: dnaptr; begin if l<>nil then begin lptr:=l; l:=l^.next; lptr^.next:=freedna; freedna:=lptr end end; procedure clearheader(var h: header); (* clear the header h (remove lines to free storage) *) begin with h do begin clearline(fulnam); while note<>nil do clearline(note) end end; procedure clearpiece(var p: pieceptr); (* clear the dna of the piece *) begin while p^.dna<>nil do cleardna(p^.dna); clearheader(p^.key.hea) end; function chartobase(ch:char):base; (* convert a character into a base *) begin case ch of 'a': chartobase:=a; 'c': chartobase:=c; 'g': chartobase:=g; 't': chartobase:=t end end; function basetochar(ba:base):char; (* convert a base into a character *) begin case ba of a: basetochar:='a'; c: basetochar:='c'; g: basetochar:='g'; t: basetochar:='t'; end end; function complement(ba:base):base; (* take the complement of ba *) begin case ba of a: complement:=t; c: complement:=g; g: complement:=c; t: complement:=a; end end; function pietoint(p: integer; pie: pieceptr): integer; (* p is a coordinate on the piece. we want to transform p into a number from 1 to n: an internal coordinate system for easy manipulation of piece coordinates *) var i: integer; (* an intermediate value *) begin with pie^.key do begin case piedir of plus: if p>=piebeg then i:=p-piebeg+1 else i:=(p-coobeg)+(cooend-piebeg)+2; minus: if p<=piebeg then i:=piebeg-p+1 else i:=(cooend-p)+(piebeg-coobeg)+2 end; pietoint:=i end end; function inttopie(i: integer; pie: pieceptr):integer; (* i is in the range 1 to some maximum. it is an internal coordinate system for the program. we want to do a coordinate transformation to obtain a value in the range of the piece called pie: i=1 corresponds to piebeg and i=its maximum corresponds to pieend *) var p: integer; (* an intermediate value *) begin with pie^.key do begin case piedir of plus: begin p:=piebeg+(i-1); if p>cooend then if coocon=circular then p:=p-(cooend-coobeg+1) end; minus: begin p:=piebeg-(i-1); if p '*' then begin writeln(output,' procedure skipstar: bad book'); writeln(output,' "*" expected as first character on the line, but "', thefile^,'" was found'); halt end; get(thefile); (* skip the star *) if thefile^ <> ' ' then begin writeln(output,' procedure skipstar: bad book'); writeln(output,' "* " expected on a line but "*', thefile^,'" was found'); halt end; get(thefile) (* skip the blank *) end; (* skipstar *) (* end module book.skipstar version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* begin module book.brreanum *) procedure brreanum(var thefile: text; var reanum: real); (* read a real number from the file *) begin skipstar(thefile); readln(thefile,reanum); end; (* end module book.brreanum version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* begin module book.brnumber *) procedure brnumber(var thefile: text; var num: integer); (* read a number from the file *) begin skipstar(thefile); readln(thefile,num) end; (* end module book.brnumber version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* begin module book.brname *) procedure brname(var thefile: text; var nam: name); (* read a name from the file *) var i: integer; (* an index to the name *) c: char; (* a character read *) begin (* brname *) skipstar(thefile); with nam do begin length:=0; repeat length:=succ(length); read(thefile,c); letters[length] := c until (eoln(thefile)) or (length>=namelength) or (letters[length]=' '); if letters[length]=' ' then length:=length-1; if length 'n' then begin skipstar(thefile); if not eoln(thefile) then begin if thefile^ = '#' then begin numbered := true; get(thefile); (* move past the number symbol *) read(thefile,number); end end; repeat readln(thefile) until thefile^ = 'n'; readln(thefile) end else readln(thefile) end end; (* brnotenumber *) (* end module book.brnotenumber version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* begin module book.brnote *) procedure brnote(var thefile: text; var note: lineptr); (* read note key *) var newnote: lineptr; (* the new note *) previousnote: lineptr; (* the last line of the notes *) begin (* brnote *) note:=nil; if thefile^ = 'n' then begin (* enter note *) readln(thefile); if thefile^ <> 'n' then begin (* abort null note (n/n) *) getline(note); newnote:=note; while thefile^ <> 'n' do begin (* wait until end of note *) brline(thefile,newnote); previousnote:=newnote; (* get next note *) getline(newnote^.next); newnote:=newnote^.next; end; (* last note was not used, so: *) clearline(newnote); previousnote^.next:=nil; readln(thefile) end else readln(thefile) end end; (* brnote *) (* end module book.brnote version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* begin module book.brheader *) procedure brheader(var thefile: text; var hea: header); (* read the header of a key. *) begin with hea do begin (* read key name *) brname(thefile,keynam); (* read full name *) getline(fulnam); brline(thefile,fulnam); (* read note key *) if readnumber then brnotenumber(thefile,note) else brnote(thefile,note) end end; (* end module book.brheader version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* begin module book.brpiekey *) procedure brpiekey(var thefile: text; var pie: piekey); (* read piece key *) begin with pie do begin brheader(thefile,hea); brreanum(thefile,mapbeg); brconfig(thefile,coocon); brdirect(thefile,coodir); brnumber(thefile,coobeg); brnumber(thefile,cooend); brconfig(thefile,piecon); brdirect(thefile,piedir); brnumber(thefile,piebeg); brnumber(thefile,pieend); end end; (* end module book.brpiekey version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* begin module book.brdna *) procedure brdna(var thefile: text; var dna: dnaptr); (* read in dna from thefile *) (* note: if the dna were circularized, by linking the last dnastring to the first, then the cleardna routine could not clear properly, and would loop forever... there is no reason to do that, since a simple mod function will allow one to access the circle. *) var ch: char; workdna: dnaptr; begin getdna(dna); workdna:=dna; ch:=getto(thefile,['d']); read(thefile,ch); (* skipstar *) while (ch = '*') do begin read(thefile,ch); (* skip blank *) repeat read(thefile,ch); if ch in ['a','c','g','t'] then begin if workdna^.length=dnamax then begin getdna(workdna^.next); workdna:=workdna^.next end; workdna^.length:=succ(workdna^.length); workdna^.part[workdna^.length]:=chartobase(ch) end until eoln(thefile); readln(thefile); (* go to next line *) read(thefile,ch); (* ch is either '*' or 'd' *) end; readln(thefile) end; (* end module book.brdna version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* begin module book.brpiece *) procedure brpiece(var thefile: text; var pie: pieceptr); (* read in a piece *) begin brpiekey(thefile,pie^.key); if numbered or (not skipunnum) then brdna(thefile,pie^.dna) end; (* end module book.brpiece version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* begin module book.brinit *) procedure brinit(var book: text); (* check that the book is ok to read, and set up the global variables for br routines *) begin (* brinit *) (* halt if the book is bad (first word is 'halt') or the first character is not * *) reset(book); if not eof(book) then begin (* check for the date line *) if book^ <> '*' then begin if book^ <> 'h' then writeln(output, ' this is not the first line of a book:') else writeln(output, ' bad book:'); write(output, ' '); while not (eoln(book) or eof(book)) do begin write(output, book^); get(book) end; writeln(output); halt end end else begin writeln(output, ' book is empty'); halt end; (* initialize free storage *) freeline:=nil; freedna:=nil; readnumber:=true; (* usually we read in numbers for items *) number:=0; (* arbitrary value *) numbered:=false; (* the piece has no number (none yet read in) *) skipunnum:=false; end; (* brinit *) (* end module book.brinit version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* ************************************************************************ *) (* end module package.brpiece version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* begin module book.getpiece *) procedure getpiece(var thefile: text; var pie: pieceptr); (* move to and read in the next piece in the book *) var ch: char; begin ch:=getto(thefile,['p']); (* get to the next p(iece) in the book *) if ch<>' ' then begin brpiece(thefile,pie); ch:=getto(thefile,['p']); (* read past closing p *) end end; (* end module book.getpiece version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* ************************************************************************ *) (* end module package.getpiece version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* begin module book.stepbase *) function stepbase(startdna: dnaptr; var dna: dnaptr; var d: dnarange): base; (* advance d by one base in dna and then return the base at the new d. (this means that one should initialize d to zero) if we go past the last base, we restart at startdna. note: d is not the number of the base... it is used as a record for stepbase. do not mess with it, and do not use it to find out what base you are on. use a separate counter. *) begin if (d=dnamax) or (d=dna^.length) then begin d:=1; dna:=dna^.next; if dna=nil then dna:=startdna end else d:=succ(d); stepbase:=dna^.part[d] end; (* end module book.stepbase version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* begin module nextbase *) procedure initnextbase(var book: text; var pie: pieceptr; var lastbase, endofbook: boolean); (* initialize variables for function nextbase. book is the book to be read, pie is the piece, lastbase is the flag that is true when we are at the last base of a piece, and endofbook is true if we are at the end of the book (see nextbase). *) begin brinit(book); new(pie); with pie^ do begin with key.hea do begin fulnam:=nil; note:=nil end; dna:=nil end; lastbase:=true; (* this will trigger reading of the next piece *) if eof(book) then endofbook:=true else endofbook:=false end; function nextbase(var book: text; (* the book being read *) (* the next variables can be ignored *) var pie: pieceptr; (* the piece *) var dnalink: dnaptr; (* the current link we are on *) var dnalinkspot: dnarange; (* the spot in the dnalink *) (* these are useful to the general user: *) var dnaspot, (* integer in 1 to length, which base this is *) length: integer; (* length of this piece *) var lastbase, (* true if the base was the last one on the piece. *) endofbook: boolean) (* true when we have reached the end of the book *) : base; (* the user simply declares variables for book, pie, dnalink, dnalinkspot (of the appropriate type) note: you can convert the valuespot to published coordinates by pubcoords:= inttopie(dnaspot,pie); warning: if the end of the book has been reached, then endofbook is true, but the value returned by the function has no meaning. *) begin if not lastbase then begin nextbase:=stepbase(pie^.dna,dnalink,dnalinkspot); dnaspot:=succ(dnaspot); if dnaspot = length then lastbase:=true end else begin (* we are at the last base of the previous piece *) clearpiece(pie); getpiece(book,pie); if not eof(book) then begin dnalink:=pie^.dna; dnalinkspot:=0; dnaspot:=0; length:=piecelength(pie); lastbase:=false; endofbook:=false; nextbase:=nextbase(book,pie,dnalink,dnalinkspot, dnaspot,length,lastbase,endofbook) end else begin endofbook:=true; (* we have reached the end of the book *) lastbase:=false; (* we are no longer at the last base *) nextbase:=a (* a fake value *) end end end; (* end module nextbase version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* ************************************************************************ *) (* end module package.nextbase version = 'delmod 6.58 90 Jan 2 tds/gds' *) (* begin module random *) procedure random(var seed: real); (* random generator 2. version = 1.01 of random.2 1990 Oct 2 origin 1986 December 31 Test this routine with the program tstrnd. written by David Masternarde *) (* This random number generator is based on a shift register with a single bit of feedback, as described in Electronics for Neurobiologists, by Brown, Maxfield and Moraff, MIT press 1973, referencing Random Process Simulation and Measurement by Korn, McGraw-Hill 1966. The random seed rand, a number between 0 and 1 exclusive, is converted to an integer between 1 and 2**23-1, inclusive. This 23-bit number is shifted right one bit and the output of the last (23rd) bit and the 9th bit are added modulo 2 (exclusive orred) and fed back into the new first bit. This is done between 4 and 11 times, depending on the last 3 bits of the original number. The result is converted back to a real number between 0 and 1 from which the 23 bit integer can be recovered on the next call. The 23-bit shift register goes through all 2**23-1 values before repeating; the repetition frequency of this algorithm could be less or greater depending on the seed, because of the random number of multiple shifts per call. *) (* powers of 2 *) const pow14=16384; pow15=32768; pow22= 4194304; pow23=8388608; var iseed, (* integer shift register *) i, nrep: integer; (* index, number of times to do shift *) begin (* random *) iseed := round(seed*pow23); (* convert to 23 bit number *) if (iseed<1) or (iseed>=pow23) then iseed := 1; nrep := 4 + iseed mod 8; (* do it 4 to 11 times based on last 3 bits *) for i:= 1 to nrep do (* if last bit and 9th bit are equal, feed back a 0, otherwise a 1 *) if( odd(iseed) = ((iseed mod pow15) >= pow14)) then iseed := iseed div 2 else iseed := (iseed div 2) + pow22; seed := iseed/pow23; end; (* random *) (* end module random *) (* begin module aran.readparameters *) procedure readparameters(var aranp: text; var fraction: real; var number: integer); (* read the parameters *) begin reset(aranp); if eof(aranp) then begin writeln(output,'aranp must contain two parmeters'); halt end; readln(aranp,fraction); if (fraction < 0.0) or (fraction > 1.0) then begin writeln(output,'The fraction given in aranp must be between 0 and 1'); halt end; if eof(aranp) then begin writeln(output,'aranp must contain the number of sequences to generate'); halt end; readln(aranp,number); if number < 1 then begin writeln(output,'at least one sequence must be generated'); halt end; end; (* end module aran.readparameters *) (* begin module aran.getseqmatrix *) procedure getseqmatrix(var book: text; var fout: text; conserved: real; unconserved: real; var sm: seqmatrix); (* Get the sequence matrix. The procedure is a modification of the demonextbase routine in delmod.p *) var b: base; (* a base of the sequence and index to the sm matrix *) l: integer; (* index to the sm matrix *) (* these variables are all defined in nextbase *) pie: pieceptr; dnalink: dnaptr; dnalinkspot: dnarange; dnaspot, length: integer; lastbase, endofbook: boolean; (* this variable is needed to catch the value of nextbase, so that we can check that we have not reached the end of the book. *) character: char; begin (* getseqmatrix *) (* clear the matrix completely *) for b := a to t do for l := 1 to seqmax do sm.data[b,l] := unconserved; l := 0; (* obtain the sequence, and store it in the matrix *) initnextbase(book,pie,lastbase,endofbook); repeat b := nextbase(book,pie,dnalink,dnalinkspot, dnaspot,length,lastbase,endofbook); if length > seqmax then begin writeln(output,'sequence length (',length:1, ') exceeds seqmax constant (',seqmax:1,')'); halt end; l := l + 1; sm.data[b,l] := conserved; (* insert the base into the matrix *) character:=basetochar(b); write(fout,character); (* if not endofbook then writeln(fout,' ',character,' ',dnaspot:1, ' ',inttopie(dnaspot,pie):1); if lastbase then writeln(fout,' end of a piece ',length:1,'bp') *) until lastbase; sm.length := l; writeln(fout,'.'); writeln(fout,sm.length:1,' bases'); writeln(fout,'----'); writeln(fout,'The frequency matrix:'); for l := 1 to sm.length do begin for b := a to t do write(fout,' ',sm.data[b,l]:4:2); writeln(fout); end; writeln(fout,'----'); (* now sum across the matrix so things go faster during generation *) writeln(fout,'Sum across the matrix for faster sequence generation:'); for l := 1 to sm.length do for b := c to t do sm.data[b,l] := sm.data[b,l] + sm.data[pred(b),l]; for l := 1 to sm.length do begin for b := a to t do write(fout,' ',sm.data[b,l]:4:2); writeln(fout); end; writeln(fout,'----'); end; (* getseqmatrix *) (* end module aran.getseqmatrix *) (* begin module aran.generate *) procedure generate(var seed: real; sm: seqmatrix; number: integer; var sequ: text); (* generate a number of sequences to the sequ file, given the matrix sm. seed is a random number between 0 and 1 *) const pagewidth = 60; (* width of a page for the sequence output *) var l: integer; (* index to the sm matrix *) n: integer; (* index to sequences *) begin rewrite(sequ); for n := 1 to number do begin for l := 1 to sm.length do begin if (l mod pagewidth) = 0 then writeln(sequ); random(seed); (* get a random number *) if sm.data[a,l] > seed then write(sequ,'a') else if sm.data[c,l] > seed then write(sequ,'c') else if sm.data[g,l] > seed then write(sequ,'g') else write(sequ,'t') (* must be *) end; writeln(sequ,'.'); end; end; (* end module aran.generate *) (* begin module aran.themain *) procedure themain(var book, list, sequ, aranp: text); (* the main procedure of the program *) var conserved: real; (* fraction of conserved bases *) number: integer; (* number of sequences to generate *) unconserved: real; (* fraction of each base not conserved *) seed: real; (* a random number to start the random series *) sm: seqmatrix; (* the matrix for generating the sequences *) begin writeln(output,'aran ',version:4:2); rewrite(sequ); readparameters(aranp,conserved,number); unconserved := (1.0 - conserved) / 3.0; rewrite(list); writeln(list,'aran ',version:4:2); writeln(list, ' Conserved fraction: ',conserved:4:2); writeln(list, 'unConserved fraction: ',unconserved:4:2); getseqmatrix(book,list,conserved,unconserved,sm); writeln(list, number:1,' sequences generated'); seed := 0.5; generate(seed,sm,number,sequ); end; (* end module aran.themain *) begin themain(book, list, sequ, aranp); 1: end. (* aran *)