program rean(oinst, data, reanp, dataout, strongestinst, output); (* rean: list new RBS sites and determine if they are in frame with old sites Brent Jewett National Cancer Institute Laboratory of Experimental and Computational Biology Frederick, Maryland 21702-1201 jewettb@ncifcrf.gov http://www.lecb.ncifcrf.gov/~toms/ National Cancer Institute Laboratory of Experimental and Computational Biology *) label 1; (* end of program *) const (* begin module version *) version = 1.02; (* of rean.p 2005 Jan 14 2005 Jan 15, 1.01: clean documentation 2005 Jan 14, 1.01: more output to dataout 2003 Dec 18, 1.00: origin *) updateversion = 1.00; (* defines lowest acceptable current parameter file *) (* end module version *) (* begin module describe.rean *) (* name rean: list new RBS sites and determine if they are in frame with old sites synopsis rean(oinst: in, data: in, dataout: out, strongestinst: out, output: out) files oinst: inst that defines the start and stop coordinate of a gene. data: output of the biscan program. reanp: parameters to control the program. The file must contain the following parameters, one per line: parameterversion: The version number of the program. This allows the user to be warned if an old parameter file is used. dataout: an organized table of locations of ribosome binding sites found by biscan, along with new gene length orientation etc. strongestinst: makes inst file of the RBS with the highest information content. The information content is reported in comments. description This program analyzes the output of biscan and finds the RBS with the highest information content and the RBS that is closest to the start provided in oinst (annotated start). This program is useful for reannotation. examples {The parameter file is checked but not otherwise used: } reanp documentation see also author Brent Michael Jewett bugs strongestinst is hardwired to be -30 to +20; this should be read from the parameters The global variables should be in themain. technical notes oinst file must be in the following format get from 123 +45 to 678 -910 direction -; *) (* end module describe.rean *) var data: text; (* output of biscan *) oinst: text; (* output of dbinst and contains all originally annotated start sites *) reanp: text; (* parameter file *) dataout: text; (* output of rean *) strongestinst: text; (* output of rean *) annotatedstart: integer; (* originally annotated start site from oinst*) annotatedstop: integer; (* originally annotated stop site from oinst*) betterstart: integer; (* start site of strongest found site by biscan *) bettersd: integer; (* start site of strongest sd site by biscan *) orientation: integer; (* orientation of strongest site found by biscan *) annotatedrbsstrength: real; (* information content of originally annotated site found by biscan *) betterrbsstrength: real; (* information content of strongest site found by biscan *) c: char; (* general character *) k: char; (* general character *) inframe: boolean; (* tells if stronger site is inframe with originally annotated site *) strongestrbs: real; (* the strongest ribosome binding site in bits *) strongeststart: integer; (* start coordinate of strongest start *) distance: integer; (* number of bases between original and better start site *) annotatedlength: integer; (* length of annotated gene *) newlength: integer; (* length of new gene *) storestrength: array[1..50] of integer; (* stores the inframe better site's strength *) storesdistance: array[1..50] of integer; (* stores the inframe better site's distance from annotated start site *) count: integer; (* general number counter *) rank: real; (* distance from origianlly annotated start/Information content *) bestrank: real; (* distance from origianlly annotated start/Information content *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 'delmod 6.16 84 mar 12 tds/gds'; *) (* begin module skipblanks *) procedure skipblanks(var thefile: text); (* skip over blanks until a non-blank, or end of line, is found *) begin while (thefile^ = ' ') and not eoln(thefile) do get(thefile); end; procedure skipnonblanks(var thefile: text); (* skip over nonblanks until a blank, or end of line, is found *) begin while (thefile^ <> ' ') and not eoln(thefile) do get(thefile); end; procedure skipcolumn(var thefile: text); (* skip over a data column *) begin skipblanks(thefile); skipnonblanks(thefile) end; (* end module skipblanks version = 4.51; (@ of prgmod.p 2000 Nov 1 *) (* begin module findnewinfo *) procedure findnewinfo(var arbss: real; var thedatafile: text; var ori: integer; var betterrbss: real; var betters, bettersd: integer); (* reads in the data file and finds the originally annotated start site strength in bits, the orientation, information content, and the start coordinate of the strongest rbs *) (* var c: char; *) begin skipcolumn(thedatafile); skipcolumn(thedatafile); skipcolumn(thedatafile); skipblanks(thedatafile); read(thedatafile, betters); skipcolumn(thedatafile); skipblanks(thedatafile); read(thedatafile, bettersd); skipcolumn(thedatafile); skipblanks(thedatafile); read(thedatafile, ori); skipcolumn(thedatafile); skipcolumn(thedatafile); skipcolumn(thedatafile); skipcolumn(thedatafile); read(data, betterrbss); end; (* end module findnewinfo *) (* begin module findoldinfo *) procedure findoldinfo(var start, stop: integer; var theinstfile: text); (* reads in the oinst file and records the originally annotated start site *) (* var k: char; *) begin skipcolumn(theinstfile); skipcolumn(theinstfile); skipblanks(theinstfile); read(theinstfile, start); skipcolumn(theinstfile); skipcolumn(theinstfile); skipblanks(theinstfile); read(theinstfile, stop); end; (* end module findoldinfo *) (* begin module inframeornot *) procedure inframeornot(var astart, bstart: integer; var frame: boolean); (* looks at stronger rbs to see if it is inframe with the annotated start codon *) var num: integer; begin num := abs(astart - bstart); while (num > 2) do begin num := num - 3; end; if (num = 0) then begin frame := true; end else begin frame := false; end; end; (* end module inframeornot *) (* begin module rean.themain *) procedure themain(var oinst, data, reanp, dataout, strongestinst: text); (* the main procedure of the program *) var parameterversion: real; (* parameter version number *) begin writeln(output,'rean ',version:4:2); reset(reanp); readln(reanp, parameterversion); if round(100*parameterversion) < round(100*updateversion) then begin writeln(output, 'You have an old parameter file!'); halt end; reset(oinst); reset(data); rewrite(strongestinst); writeln(strongestinst,'(* rean ',version:4:2,' *)'); rewrite(dataout); writeln(dataout,'* rean ',version:4:2); writeln(dataout, '* Columns:'); writeln(dataout, '* 1. Annotated Start'); writeln(dataout, '* 2. Annotated RBS strength (bits)'); writeln(dataout, '* 3. Better Start Inframe (between upstream and downstream stop codons inframe)'); writeln(dataout, '* 4. Better RBS strength (bits)'); writeln(dataout, '* 5. Orientation'); writeln(dataout, '* 6. number of bases between better start and annotated start'); writeln(dataout, '* 7. length of anntoated gene in bases'); writeln(dataout, '* 8. length of new gene in bases'); writeln(dataout, '* 9. strongest site is marked by an "!", otherwise "-"'); while not eof(oinst) do begin; read(oinst, k); while (k <> 'g') do begin readln(oinst); read(oinst, k); end; (* find the start and stop coordinate in the oinst file *) findoldinfo(annotatedstart, annotatedstop, oinst); read(data, c); while (c <> ' ') do begin readln(data); read(data, c); end; strongestrbs := -100; count := 1; bestrank := 0; while (c = ' ') do begin findnewinfo(annotatedrbsstrength, data, orientation, betterrbsstrength, betterstart, bettersd); (* write(dataout, betterstart); write(dataout, betterrbsstrength:5:6); writeln(dataout, orientation); *) inframeornot(annotatedstart, betterstart, inframe); if (betterrbsstrength > 0) then begin if (inframe = true) then begin if (betterrbsstrength > annotatedrbsstrength) then begin if (betterrbsstrength > strongestrbs) then begin strongestrbs := betterrbsstrength; strongeststart := betterstart; end; write(dataout, annotatedstart); write(dataout, annotatedrbsstrength:15:6); write(dataout, betterstart); write(dataout, betterrbsstrength:15:6); write(dataout, orientation); distance := betterstart - annotatedstart; if (distance < 0) and (orientation < 0) then begin distance := distance * orientation; end; write(dataout, distance); annotatedlength := abs(annotatedstart - annotatedstop); newlength := abs(betterstart - annotatedstop); rank := (newlength/annotatedlength); write(dataout, annotatedlength); write(dataout, newlength); writeln(dataout, rank:15:6); if (rank > bestrank) then begin bestrank := rank end; end; end; end; readln(data); read(data, c); if (betterstart = annotatedstart) then begin annotatedrbsstrength := betterrbsstrength; end; end; (* write(dataout, strongeststart); write(dataout, strongestrbs:15:6); write(dataout, orientation); distance := strongeststart - annotatedstart; write(dataout, distance); if (distance = 0) then begin write(dataout, ' = '); end else begin write(dataout, ' ! '); end; annotatedlength := abs(annotatedstart - annotatedstop); write(dataout, annotatedlength); newlength := abs(strongeststart - annotatedstop); writeln(dataout, newlength); *) { if (bestrank <> 0) then begin if (bestrank > 1) then begin end else begin (* writeln(output, bestrank:15:6); *) end; end; } if (strongestrbs = -100) then begin writeln(dataout, annotatedstart, annotatedrbsstrength:15:6, ' THERE IS NO BETTER SITE'); end; readln(data); readln(data); (* write(annotatedstart); *) if (strongestrbs > 0) then begin if (orientation < 0) then begin writeln(strongestinst, 'get from ', strongeststart, ' +30 to ', strongeststart, ' -20 direction -;', ' (* ',strongestrbs:5:2,' bits *)'); end else begin writeln(strongestinst, 'get from ', strongeststart, ' -30 to ', strongeststart, ' +20 direction +;', ' (* ',strongestrbs:5:2,' bits *)'); end; end; end; end; (* end module rean.themain *) begin themain(oinst, data, reanp, dataout, strongestinst); 1: end.