program capsmark(sequ, capsmarkp, searchp, output); (* capsmark: read sequence; make features for capitalized regions Tom Schneider NCI/FCRDC Bldg 469. Room 144 P.O. Box B Frederick, MD 21702-1201 (301) 846-5581 (-5532 for messages) toms@ncifcrf.gov http://www-lmmb.ncifcrf.gov/~toms/ National Cancer Institute Laboratory of Mathematical Biology *) label 1; (* end of program *) const (* begin module version *) version = 1.02; (* of capsmark.p 1997 October 17 origin 1997 October 17 *) updateversion = 1.00; (* defines lowest acceptable current parameter file *) (* end module version *) (* begin module describe.capsmark *) (* name capsmark: read sequence; make features for capitalized regions synopsis capsmark(sequ: in, capsmarkp: in, searchp: out, output: out) files sequ: raw DNA sequence in lower case except for objects of interest marked in upper case. searchp: search parameters for the capitalized regions capsmarkp: parameters to control the program. The file must contain the following parameters, one per line: parameterversion: The version number of the program. This allows the user to be warned if an old parameter file is used. name: a string of characters to name the sequence. output: messages to the user description Sequences are often marked by people with capital letters to indicate interesting regions (exons, primers, mutations, etc) This program creates lister features for a raw sequence. examples documentation see also lister.p author Thomas Dana Schneider bugs technical notes *) (* end module describe.capsmark *) var sequ, (* file used by this program *) capsmarkp, (* file used by this program *) searchp: text; (* file used by this program *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 'delmod 6.16 84 mar 12 tds/gds'; *) (* begin module capitalize *) function capitalize(c: char): char; (* convert the character c to upper case *) var n: integer; (* c is the n'th letter of the alphabet *) begin n := ord(c); if (n >= ord('a')) and (n <= ord('z')) then c := chr( n - ord('a') + ord('A')); capitalize := c end; (* end module capitalize prgmod *) (* begin module decapitalize *) function decapitalize(c: char): char; (* convert the character c to lower case *) var n: integer; (* c is the n'th letter of the alphabet *) begin n := ord(c); if (n >= ord('A')) and (n <= ord('Z')) then c := chr( n - ord('A') + ord('a')) else c := chr(n); decapitalize := c end; (* end module decapitalize prgmod *) (* begin module capsmark.themain *) procedure themain(var sequ, capsmarkp, searchp: text); (* the main procedure of the program *) const maxnamelength = 100; (* maximum length name *) var c: char; (* a character in the sequence *) name: array[1..maxnamelength] of char; (* name of the sequence *) namelength: integer; (* length of the name *) n: integer; (* index to the name *) parameterversion: real; (* parameter version number *) position: integer; (* position in the sequence, starting with 1 *) waslower: boolean; (* true if previous character was lower case *) begin writeln(output,'capsmark ',version:4:2); reset(capsmarkp); readln(capsmarkp, parameterversion); if parameterversion < updateversion then begin writeln(output, 'You have an old parameter file!'); halt end; namelength := 0; while not eoln(capsmarkp) do begin namelength := namelength + 1; read(capsmarkp, name[namelength]); end; reset(sequ); rewrite(searchp); writeln(searchp,'* capsmark ',version:4:2); position := 0; waslower := true; while not eof(sequ) do begin if eoln(sequ) (* end of line *) then begin readln(sequ); (* writeln(searchp) *) end else begin (* not end of line *) read(sequ, c); if c in ['a','c','g','t', 'n'] then begin position := position + 1; if not waslower then begin (* upper case ending *) writeln(searchp) end; waslower := true; (* write(searchp, capitalize(c)); *) end else if c in ['A','C','G','T','N'] then begin position := position + 1; if waslower then begin (* begin upper case *) write(searchp,'"'); for n :=1 to namelength do write(searchp,name[n]); writeln(searchp,'.',position:1,'"') end; waslower := false; write(searchp, decapitalize(c)); (* write(searchp, decapitalize(c)); *) end else writeln(output,'unidentified character: ', c); end end; writeln(searchp,'q'); (* searchfeatures: features for the lister program. To start the file, simply provide a name inside double quotes (eg "EcoRI"). Subsequent searches (eg gaattc) will be labeled with that name. To turn off the features, use an empty quote string, as "". The searchfeatures file can be concatenated with other features to create the features file for lister. *) end; (* end module capsmark.themain *) begin themain(sequ, capsmarkp, searchp); 1: end.