program malin(optinst, optalign, inst, malinp, cinst, distribution, output); (* malin: make delila instructions from nth alignment of malign Dr. Thomas D. Schneider National Institutes of Health National Cancer Institute Center for Cancer Research Nanobiology Program Molecular Information Theory Group Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu (use only if first address fails) http://www.ccrnp.ncifcrf.gov/~toms/ *) label 1; (* end of program *) const (* begin module version *) version = 1.14; (* of malin.p 2009 Apr 11 2009 Apr 11, 1.14: put version next to malin in distribution output file 2005 Sep 28, 1.13: malinp example 2001 May 7, 1.12: clean up output 1.12 2001 May 7: clean up output 1.10 2000 Dec 14: handle quotes and {} comments in inst. 1.09 1999 June 15: allow: "get from 5 -0 to piece end -5;" format 1.08 1997 April 10: program generates distribution file. 1.07 1997 April 10: ability to change zero base, version control. origin 1995 October 6 *) updateversion = 1.07; (* defines lowest acceptable current parameter file *) (* end module version *) (* begin module describe.malin *) (* name malin: make delila instructions from nth alignment of malign synopsis malin(optinst: in, optalign: in, inst: in, malinp: in, cinst: out, distribution: out, output: out) files optinst: output of malign program containing absolute alignments optalign: output of malign program containing relative alignments inst: Delila instructions Allowed forms: get from 5 -5 to 5 +5; get from 5 -5 to same +5; get from 5 -5 to piece end -5; malinp: parameters to control the program first line: The version number of the program. This allows the user to be warned if an old parameter file is used. second line: one integer that defines which alignment to use to create the cinst. third line: one integer that defines how much to add to move the location of the zero base in the new instructions. cinst: Delila instructions of inst converted to the alignment of optinst chosen in malinp distribution: The distribution of the realignment. Lines that begin with "*" are comments. Otherwise, one integer per line, which is the separation in bases between the initial and final alignments. output: output program without private text description This program allows one to select one of the alignments created by malign and to make the corresponding Delila instructions. Because it copies the inst file it keeps the organism and chromosome information (along with all comments) so it is better than the "bestinst" file created by malign! examples documentation see also {example parameter file:} malinp {related programs:} malign.p, malopt.p author Thomas Dana Schneider bugs WARNING: This program does not use a book and so the coordinate shift of the third parameter will not work on coordinates that jump. technical notes NOTE: THIS PROGRAM WILL NOT HANDLE COMMENTS WITHIN THE DELILA INSTRUCTION! It must be of the form: get from 193 -20 to 193 +21; comments are allowed outside these statements. *) (* end module describe.malin *) (* begin module malin.const *) maxstring = 1500; (* the maximum string *) (* end module malin.const *) fillermax = 10; (* the size of the filler array for a string *) type (* begin module interact.type *) string = record (* a string of characters *) letters: array[1..maxstring] of char; (* the letters in the string *) length: integer; (* the number of characters in the string *) current: integer; (* the letter we are working on *) end; (* end module interact.type version = 4.16; (@ of prgmod.p 1996 August 12 *) (* begin module filler.type *) (* the following is an array used to fill a string. it is convenient to have it much shorter than the maxstring, so that it is easy to fill the string using procedure fillstring. the user must declare the value of constant fillermax. *) filler = packed array[1..fillermax] of char; (* end module filler.type version = 4.16; (@ of prgmod.p 1996 August 12 *) (* begin module trigger.type *) trigger = record (* an object to be searched for *) seek: string; (* the characters looked for *) state: integer; (* how close to triggering we are *) skip: boolean; (* trigger not found- skip the line *) found: boolean (* the trigger was found *) end; (* end module trigger.type version = 4.16; (@ of prgmod.p 1996 August 12 *) var optinst, optalign, inst, malinp, cinst, distribution: text; (* files of the program *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 4.16; (@ of prgmod.p 1996 August 12 *) (* begin module interact.clearstring *) procedure clearstring(var ribbon: string); (* empty the string *) var index: integer; (* to the ribbon *) begin (* clearstring *) with ribbon do begin for index := 1 to maxstring do letters[index] := ' '; length := 0; current := 0; end end; (* clearstring *) (* end module interact.clearstring version = 4.16; (@ of prgmod.p 1996 August 12 *) (* begin module interact.getstring *) procedure getstring(var afile: text; var buffer: string; var gotten: boolean); (* get a string from a file not using string calls. this lets one obtain lines from a file without interactive prompts *) var index: integer; (* of buffer *) begin (* getstring *) clearstring(buffer); if eof(afile) then gotten := false else begin index := 0; while (not eoln(afile)) and (index < maxstring) do begin index := succ(index); read(afile, buffer.letters[index]) end; if not eoln(afile) then begin writeln(output, ' getstring: a line exceeds maximum string size (', maxstring:1,')'); halt end; buffer.length := index; buffer.current := 1; readln(afile); gotten := true end end; (* getstring *) (* end module interact.getstring version = 4.16; (@ of prgmod.p 1996 August 12 *) (* begin module interact.writestring *) procedure writestring(var tofile: text; var s: string); (* write the string s to file tofile, no writeln *) var i: integer; (* index to s *) begin (* writestring *) with s do for i := 1 to length do write(tofile, letters[i]) end; (* writestring *) (* end module interact.writestring version = 4.16; (@ of prgmod.p 1996 August 12 *) (* begin module filler.fillstring *) procedure fillstring(var s: string; a: filler); (* this procedure makes it reasonably easy to fill the string s with characters. one calls the procedure as: *) (* 1 2 3 4 5 *) (* 12345678901234567890123456789012345678901234567890 *) (* fillstring(s, 'this-is-the-string '); the two comments make it easy to line the characters up. also, for this example, it was assumed that the length of filler as defined by the constant fillermax was 50. *) var length: integer; (* of the string without trailing blanks *) index: integer; (* of s *) begin (* fillstring *) clearstring(s); length := fillermax; while (length > 1) and (a[length] = ' ') do length := pred(length); if (length = 1) and (a[length] = ' ') then begin writeln(output, 'fillstring: the string is empty'); halt end; for index := 1 to length do s.letters[index] := a[index]; s.length := length; s.current := 1 end; (* fillstring *) (* end module filler.fillstring version = 4.16; (@ of prgmod.p 1996 August 12 *) (* begin module filler.filltrigger *) procedure filltrigger(var t: trigger; a: filler); (* fill the trigger t *) begin (* filltrigger *) fillstring(t.seek,a) end; (* fillstring *) (* end module filler.filltrigger version = 4.16; (@ of prgmod.p 1996 August 12 *) (* begin module trigger.proc *) (* this module allows one to scan a series of characters, as from an array or a file, and to "trigger" or detect a simple string in the series. the advantage of the trigger is that several triggers can "observe" a stream of characters at once, each looking for a different thing. some other modules required: interact.const, interact.type *) procedure resettrigger(var t: trigger); (* reset the trigger to ground state *) begin (* resettrigger *) with t do begin state := 0; skip := false; found := false end end; (* resettrigger *) procedure testfortrigger(ch: char; var t: trigger); (* look at the character ch. if it is part of the trigger (at the current trigger state), then the trigger state goes higher. if it is not part of the trigger then the trigger state is reset, skip is true and one should skip onward to find the trigger. if the trigger is found, found is true. *) begin (* testfortrigger *) with t do begin state := succ(state); (* if debugging then begin writestring(list,seek); writeln(list,'testfortrigger seek.letters[',state:1,']:', seek.letters[state],' ch:',ch); end;*) if seek.letters[state] = ch then begin skip := false; if state = seek.length then found := true else found := false end else begin (* reset trigger *) state := 0; skip := true; found := false end end end; (* testfortrigger *) (* end module trigger.proc version = 4.16; (@ of prgmod.p 1996 August 12 *) (* begin module skipblanks *) procedure skipblanks(var thefile: text); (* skip over blanks until a non-blank, or end of line, is found *) begin while (thefile^ = ' ') and not eoln(thefile) do get(thefile); end; procedure skipnonblanks(var thefile: text); (* skip over nonblanks until a blank, or end of line, is found *) begin while (thefile^ <> ' ') and not eoln(thefile) do get(thefile); end; procedure skipcolumn(var thefile: text); (* skip over a data column *) begin skipblanks(thefile); skipnonblanks(thefile) end; (* end module skipblanks version = 4.16; (@ of prgmod.p 1996 August 12 *) (* begin module malin.themain *) procedure themain(var optinst, optalign, inst, malinp, cinst, distribution: text); (* The main procedure of the program. NOTE: '@' represents '*' in comments below. *) var a: integer; (* index for reading through alignments *) alignments: integer; (* number of alignments in optinst *) analignment: integer; (* an alignment to use from optinst *) c: char; (* a character from the inst *) class: integer; (* index to the classes of alignments *) debugging: boolean; (* set to true if debugging *) fromvalue, fromrange: integer; (* the from coordinate and the range from the from *) H: real; (* uncertainty, bits *) occurences: integer; (* number of times an alignment appeared *) parameterversion: real; (* parameter version number *) s: integer; (* index for reading through alignment sequence numbers *) sequences: integer; (* number of sequences *) shift: integer; (* how much a sequence is shifted *) state: integer; (* state of the program. state = 0; scan and copy, outside comments when '(@' is found, move to state 1 t0a when 'get' is found, move to state 2 t0b when '{' is found, move to state 5 t0c when '"' is found, move to state 6 t0d when "'" is found, move to state 7 t0e state = 1; scan and copy, copy program comments when '@)' is found, move to state 0 t1a state = 2; scan and copy, find from when 'from' is found, move to state 3 t2a state = 3; read from value, relative from value, scan for 'to' when 'to' is found, move to state 4 t3a state = 4; read to values, relative to value move to state 0 state = 5; scan and copy {} comment when '}' is found, move to state 0 t5a state = 6; scan and copy " string when '"' is found, move to state 0 state = 7; scan and copy ' string when "'" is found, move to state 0 *) shutup: boolean; (* stop copying for a while *) t0a,t0b,t1a,{t1b,}t2a,{t2b,}t3a, t0c, t0d, t0e, t5a: (* new as of 2000 Dec 14 *) trigger; (* triggers for each state *) tovalue, torange: integer; (* the to coordinate and the range from the to *) theclass: integer; (* the current class according to optinst *) zerobase: integer; (* the new zero coordinate *) function sign(i: integer): char; begin if i >= 0 then sign := '+' else sign := '-' end; begin writeln(output, 'malin',version:5:2); debugging := false; (* set to true if debugging *) state := 0; filltrigger(t0a,'(* '); filltrigger(t0b,'get '); filltrigger(t0c,'{ '); filltrigger(t0d,'" '); filltrigger(t0e,''' '); filltrigger(t1a,'*) '); filltrigger(t2a,'from '); filltrigger(t3a,'to '); filltrigger(t5a,'} '); (* read parameters *) reset(malinp); readln(malinp, parameterversion); if parameterversion < updateversion then begin writeln(output, 'You have an old parameter file!'); halt end; readln(malinp, analignment); readln(malinp, zerobase); reset(optinst); readln(optinst, sequences, alignments); (* read through optinst to get to the desired alignment *) a := 1; while a < analignment do begin if eof(optinst) then begin writeln(output,'alignment ',analignment:1, ' does not exist'); halt end; a := succ(a); readln(optinst, occurences, H); for s := 1 to sequences do read(optinst, fromvalue); readln(optinst); end; readln(optinst, occurences, H); reset(inst); rewrite(cinst); rewrite(distribution); writeln(distribution, '* malin ',version:5:2,' alignment distribution'); writeln(distribution, '* alignment class: ',analignment:1); (* the method of reading optalign comes from malopt.p *) reset(optalign); {zzz move to start for reading optalign} while optalign^ = '*' do readln(optalign); readln(optalign); (* skip blank line *) readln(optalign); (* skip sequences, it should match the other file *) readln(optalign); (* skip max R line *) (* get distribution *) for class := 1 to analignment do begin if eof(optalign) then begin writeln(output, 'premature end of optalign'); halt end; readln(optalign); (* skip blank line *) readln(optalign,theclass); if theclass <> class then begin writeln(output, 'classes not being read correctly'); halt end; for s := 1 to sequences do begin read(optalign, shift); if class = analignment then begin if zerobase >= 0 then writeln(distribution,+(shift+zerobase):1) else writeln(distribution,-(shift+zerobase):1); {zzz That's how to handle the negative???} end end end; (* * relative aligned bases for the set of optimal alignments 23 sequences, 18 optimal alignments in 1001 runs 11.9820040 = H for original alignment 1) 874 occurrences, H = 7.2347730, relative aligned bases: -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 0 3 3 1 1 1 1 1 1 1 2) 72 occurrences, H = 7.4625137, relative aligned bases: 1 1 1 1 2 2 2 2 2 2 2 2 2 2 5 5 -5 3 3 3 3 3 3 *) shutup := false; while not eof(inst) do begin if not eoln(inst) then begin resettrigger(t0a); resettrigger(t0b); resettrigger(t0c); resettrigger(t0d); resettrigger(t0e); resettrigger(t1a); resettrigger(t2a); resettrigger(t3a); resettrigger(t5a); while not eoln(inst) do begin read(inst,c); if not shutup then write(cinst,c); if debugging then write(output,c,'[',state:1,']');{zzz} testfortrigger(c,t0a); testfortrigger(c,t0b); testfortrigger(c,t0c); testfortrigger(c,t0d); testfortrigger(c,t0e); testfortrigger(c,t1a); testfortrigger(c,t2a); testfortrigger(c,t3a); testfortrigger(c,t5a); case state of 0: begin if t0a.found then begin state := 1; end else if t0b.found then begin state := 2; end else if t0c.found then begin state := 5; end else if t0d.found then begin state := 6; end else if t0e.found then begin state := 7; end end; 1: begin if t1a.found then state := 0 end; 2: begin if t2a.found then begin read(inst, fromvalue, fromrange); (* grab replacement fromvalue! *) read(optinst, fromvalue); {zzz here is where the book would have to be read so that the conversion of fromvalue uses book coordinates} (* modify fromvalue here *) fromvalue := fromvalue + zerobase; write(cinst, ' ', fromvalue:1,' ', sign(fromrange),abs(fromrange):1); state := 3; end end; 3: begin if t3a.found then begin state := 4; end; end; 4: begin skipblanks(inst); if inst^ = 's' then begin (* it must be a "same" instruction - use the fromvalue! *) tovalue := fromvalue; skipnonblanks(inst); read(inst, torange); (* writeln(output,'same ', tovalue:1); *) write(cinst, 'same ', sign(torange),abs(torange):1); end else if inst^ = 'p' then begin (* it must be a "piece" instruction - use the fromvalue! *) tovalue := fromvalue; skipnonblanks(inst); { torange := -fromrange; } torange := 100; {zzz fix this later} shutup := true; (* writeln(output,'same ', tovalue:1); *) write(cinst, 'same ', sign(torange),abs(torange):1); end else begin read(inst, tovalue, torange); write(cinst, fromvalue:1,' ', sign(torange),abs(torange):1); end; state := 0; end; 5: begin {zzz} if t5a.found then begin state := 0; end; end; 6: begin if t0d.found then begin state := 0; end; end; 7: begin if t0e.found then begin state := 0; end; end; end; end; end else begin readln(inst); if debugging then writeln(output); {zzz} if shutup then begin writeln(cinst,';'); shutup := false end else writeln(cinst) end end; writeln(cinst); writeln(cinst, '(* malin',version:5:2,' *)'); writeln(cinst,'(* alignment: ',analignment:1, ', occurences: ',occurences:1, ', H: ',H:10:5,' bits *)'); writeln(output,'alignment: ',analignment:1, ', occurences: ',occurences:1, ', H: ',H:10:5,' bits'); end; (* end module malin.themain *) begin themain(optinst, optalign, inst, malinp, cinst, distribution); 1: end.