program rawbk(raw, rawchanges, book, input, output); (* rawbk: make a raw sequence into a book Dr. Thomas D. Schneider National Cancer Institute Laboratory of Experimental and Computational Biology Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu http://www.lecb.ncifcrf.gov/~toms/ module libraries required: delman, prgmods, delmods *) const (* begin module version *) version = 3.22; (* of rawbk.p 2004 Mar 8 2004 Mar 8 3.22 accept upper case letters. 2000 Feb 18 3.21 upgrade dates 1999 Jan 18 3.20 previous changes origin before 1982 june 26 *) (* end module version *) (* begin module describe.rawbk *) (* name rawbk: make a raw sequence into a book synopsis rawbk(raw: in, rawchanges: out, book: out, input: intty, output: out) files raw: a file with a sequence on it, that is only the letters a,c,g,t, or u, with any spacing and carriage returns. A,C,G,T, and U, are also accepted now. rawchanges: Delila programs cannot handle sequences that have ambiguities because Delila was designed on the assumption that people would finish their sequences. Unfortunately this is not true, and the databases contain bases other than acgt to indicate ambiguity. These are converted to "a" and the cases are reported in this file. NOTE: "u" is converted to "t". book: a file which contains the sequence in the book form such that it will interface with the delila system programs. input: the interactive input from the keyboard. rawbk needs to get some information from the user to name the sequence. output: where error messages will appear. description The purpose of this program is to allow one to rapidly create a book from a raw sequence. rawbk will take a 'raw' sequence and put it into the standard form of a book so that the delila system programs can be used on the sequence. The user is asked for one name, which will become the name of all things in the book (title, organism, chromosome and piece). The program reads thru 'raw', keeping track of characters and lines. It will flag any letters other than 'a','c','g','t', or 'u', that appear in the file and note their locations. it will count the bases. if any characters were flagged, or any other error occurs, rawbk will put 'halt' into the book, in the same form the librarian does, to prevent further use of the book. Otherwise, the book is constructed to contain one piece of sequence. The coordinates begin with base 1. see also dbbk.p, makebk.p, lister.p author Thomas D. Schneider bugs The program should use book writing routines from delmods, but it has not been updated yet. *) (* end module describe.rawbk *) (* begin module interact.const *) (* begin module string.const *) maxstring = 150; (* the maximum string *) (* end module string.const version = 4.39; (@ of prgmod.p 1999 November 28 *) (* end module interact.const version = 7.62; {of delmod.p 2003 Jan 13} *) (* begin module book.const *) (* constants needed for book manipulations *) dnamax = 1024; (* length of dna arrays *) namelength = 100; (* maximum key name length *) linelength = 80; (* maximum line readable in book *) (* end module book.const version = 7.62; {of delmod.p 2003 Jan 13} *) (* begin module datetime.const *) datetimearraylength = 19; (* length of dataarray for dates, It is just long enough to include the 4 digit year - solving the year 2000 problem: 1980/06/09 18:49:11 123456789 123456789 1 2 *) (* end module datetime.const version = 7.62; {of delmod.p 2003 Jan 13} *) type (* begin module datetime.type *) (* array for dates *) datetimearray = packed array[1..datetimearraylength] of char; (* end module datetime.type version = 7.62; {of delmod.p 2003 Jan 13} *) (* begin module interact.type *) (* begin module string.type *) stringptr = ^string; (* pointer to a string *) string = record (* a string of characters *) letters: array[1..maxstring] of char; (* the letters in the string *) length: integer; (* the number of characters in the string *) current: integer; (* the letter we are working on *) next: stringptr; (* the next string in a series *) end; (* end module string.type version = 4.39; (@ of prgmod.p 1999 November 28 *) (* end module interact.type version = 7.62; {of delmod.p 2003 Jan 13} *) (* begin module book.type *) (* types needed for book manipulations *) chset = set of 'a'..'z'; (* types defined in book definition *) alpha = packed array[1..namelength] of char; (* this is not alfa *) (* name is a left justified string with blanks following the characters *) name = record letters: alpha; length: 0..namelength (* zero means an unspecified structure *) end; lineptr = ^line; line = record (* a line of characters *) letters: packed array [1..linelength] of char; length: 0..linelength; next: lineptr end; direction = (plus, minus, dircomplement, dirhomologous); configuration = (linear, circular); state = (on, off); header = record (* header of key *) keynam: name; (* key name of structure *) fulnam: lineptr; (* full name of structure *) note: lineptr (* note key *) end; (* begin module base.type *) (* define the four nucleotide bases *) base = (a,c,g,t); (* end module base.type version = 7.62; {of delmod.p 2003 Jan 13} *) (* sequence types *) dnaptr = ^dnastring; dnarange = 0..dnamax; seq = packed array[1..dnamax] of base; dnastring = record part: seq; length: dnarange; next: dnaptr end; orgkey = record (* organism key *) hea: header; mapunit: lineptr (* genetic map units *) end; chrkey = record (* chromosome key *) hea: header; mapbeg: real; (* number of genetic map beginning *) mapend: real (* number of genetic map ending *) end; pieceptr = ^piece; piekey = record (* piece key *) hea: header; mapbeg: real; (* genetic map beginning *) coocon: configuration; (* configruation (circular/linear) *) coodir: direction; (* direction (+/-) relative to genetic map *) coobeg: integer; (* beginning nucleotide *) cooend: integer; (* ending nucleotide *) piecon: configuration; (* configruation (circular/linear) *) piedir: direction; (* direction (+/-) relative to coordinates *) piebeg: integer; (* beginning nucleotide *) pieend: integer; (* ending nucleotide *) end; piece = record key: piekey; dna: dnaptr end; reference = record pienam : name; (* name of piece referred to *) mapbeg : real; (* genetic map beginning *) refdir : direction; (* direction relative to coordinates *) refbeg : integer; (* beginning nucleotide *) refend : integer; (* ending nucleotide *) end; genkey = record (* gene key *) hea : header; ref : reference; end; trakey = record (* transcript key *) hea : header; ref : reference; end; markerptr = ^marker; markey = record (* marker key *) hea : header; ref : reference; sta : state; phenotype : lineptr; next : markerptr; end; marker = record key : markey; dna : dnaptr; end; (* end module book.type version = 7.62; {of delmod.p 2003 Jan 13} *) var raw, (* the raw sequence *) rawchanges, (* changes *) book (* the book made from raw *) : text; length: integer; (* of the sequence *) error: boolean; (* true if raw is bad in some way *) thename: alpha; (* the name of the seqeuence *) (* begin module getdatetime *) procedure getdatetime(var adatetime: datetimearray); (* get the date and time into a single array from the system clock. adatetime contains the date: 1980/06/09 18:49:11 ye mo da ho mi se (year, month, day, hour, minute, second). As of 2000 February 18, the Sun Pascal compiler requires a formatting statement. This statement allows the date to be generated in this standard Delila format in a single call. Information about the formatting statement is available on the manual page for date in Unix. If a computer does not have this method, see the 'oldgetdatetime' routine in delmod.p (http://www.lecb.ncifcrf.gov/~toms/delila/delmod.html) for some conversion code. GPC Functions: function GetUnixTime (var MicroSecond : Integer) : UnixTimeType; http://agnes.dida.physik.uni-essen.de/~gnu-pascal/gpc_109.html#SEC109 7.10.8 Date And Time Routines procedure GetTimeStamp (var t : TimeStamp); function Date (t : TimeStamp) : packed array [1 .. DateLength] of Char; function Time (t : TimeStamp) : packed array [1 .. TimeLength] of Char; DateLength and TimeLength are implementation dependent constants. GetTimeStamp (t) fills the record `t' with values. If they are valid, the Boolean flags are set to True. TimeStamp is a predefined type in the Extended Pascal standard. It may be extended in an implementation, and is indeed extended in GPC. For the full definition of `TimeStamp', see section 8.255 TimeStamp. *) var t: TimeStamp; (* begin module pluckdigit *) function pluckdigit(number, logplace:integer): char; (* return the digit at the place value ('logplace') position of number. example: pluckdigit(13625, 3) = 3 pluckdigit(13625, 4) = 1 This routine was taken from module numberdigit in prgmod.p, but is modified so as not to give the sign. Instead it gives zeros above the digits. 'myabsolute' replaced 'absolute', which is apparently a keyword for GPC. The name is kept for to keep the code looking similar to its origin. *) var place: integer; (* the exponent of logplace *) count: integer; (* used to make place *) myabsolute: integer; (* the absolute value of number *) acharacter: char; (* the character to be returned *) procedure digit; (* extract a digit at the place position *) var tenplace: integer; (* ten times place *) z: integer; (* an intermediate value *) d: integer; (* the digit extracted *) begin (* digit *) tenplace:=10*place; z:=myabsolute-((myabsolute div tenplace)*tenplace); if place = 1 then d:=z else d:= z div place; case d of 0: acharacter:='0'; 1: acharacter:='1'; 2: acharacter:='2'; 3: acharacter:='3'; 4: acharacter:='4'; 5: acharacter:='5'; 6: acharacter:='6'; 7: acharacter:='7'; 8: acharacter:='8'; 9: acharacter:='9'; end end; (* digit *) begin (* pluckdigit *) place:=1; for count:=1 to logplace do place:=10*place; if number=0 then begin acharacter:='0' end else begin myabsolute:=number; if myabsolute >= place then digit else acharacter := '0' end; pluckdigit:=acharacter end; (* pluckdigit *) (* end module pluckdigit *) begin (* according to: http://agnes.dida.physik.uni-essen.de/~gnu-pascal/gpc_109.html#SEC109 *) GetTimeStamp(t); (* Predefined time stamp: http://agnes.dida.physik.uni-essen.de/~gnu-pascal/gpc_389.html#SEC389 TimeStamp = {@@packed} record DateValid, TimeValid : Boolean; Year : Integer; Month : 1 .. 12; Day : 1 .. 31; DayOfWeek : 0 .. 6; { 0 means Sunday } Hour : 0 .. 23; Minute : 0 .. 59; Second : 0 .. 61; { to allow for leap seconds } MicroSecond : 0 .. 999999 end; *) with t do begin if TimeValid then begin { writeln(output,'valid time'); writeln(output,'year =',year:4); writeln(output,'month =',month:2); writeln(output,'day =',day:2); writeln(output,'hour =',hour:2); writeln(output,'minute =',minute:2); writeln(output,'second =',second:2); } adatetime := 'year/mm/dd hh:mm:ss'; adatetime[ 1] := pluckdigit(Year,3); adatetime[ 2] := pluckdigit(Year,2); adatetime[ 3] := pluckdigit(Year,1); adatetime[ 4] := pluckdigit(Year,0); adatetime[ 6] := pluckdigit(Month,1); adatetime[ 7] := pluckdigit(Month,0); adatetime[ 9] := pluckdigit(Day,1); adatetime[10] := pluckdigit(Day,0); adatetime[12] := pluckdigit(Hour,1); adatetime[13] := pluckdigit(Hour,0); adatetime[15] := pluckdigit(Minute,1); adatetime[16] := pluckdigit(Minute,0); adatetime[18] := pluckdigit(Second,1); adatetime[19] := pluckdigit(Second,0); end else begin writeln(output,'getdatetime: invalid time!'); halt; end end; { Sun compiler method: date(adatetime,'%Y/%m/%d %H:%M:%S'); } end; (* end module getdatetime version = 7.62; {of delmod.p 2003 Jan 13} *) (* begin module writedatetime *) procedure writedatetime(var thefile: text; adatetime: datetimearray); (* expand the date and time out and print in the file *) var index: integer; (* index of datetime *) begin for index:=1 to datetimearraylength do write(thefile,adatetime[index]) end; (* end module writedatetime version = 7.62; {of delmod.p 2003 Jan 13} *) (* begin module package.interact *) (* ************************************************************************ *) (* begin module interact.prompt *) procedure prompt(var afile: text); (* prompt a file. the prompt is sent to the output file, and a line is read into the pascal line buffer. (for the cyber system this means to readln afile.) guarantee no bomb *) begin (* prompt *) if eof(afile) then reset(afile); readln(afile); end; (* prompt *) (* end module interact.prompt version = 4.59; (@ of prgmod.p 2003 Jul 31 *) (* begin module interact.readchar *) procedure readchar(var afile:text; var ch: char); (* read a character from afile, guarantee no bomb *) begin (* readchar *) if eof(afile) then prompt(afile); read(afile,ch); (*writeln(output,'"',ch,'"') *) end; (* readchar *) (* end module interact.readchar version = 4.59; (@ of prgmod.p 2003 Jul 31 *) (* begin module clearstring *) procedure clearstring(var ribbon: string); (* empty the string *) var index: integer; (* to the ribbon *) begin (* clearstring *) with ribbon do begin for index := 1 to maxstring do letters[index] := ' '; length := 0; current := 0; end end; (* clearstring *) procedure initializestring(var ribbon: string); (* start the string with a nil pointer. This routine should be called before doing linked list work. This allows the standard string routines to clear the string without killing the pointer. *) begin (* initializestring *) clearstring(ribbon); ribbon.next := nil; end; (* initializestring *) (* end module clearstring version = 4.59; (@ of prgmod.p 2003 Jul 31 *) (* begin module interact.readstring *) procedure readstring(var afile: text; var line: string); (* read in a string from afile, protect against bombing *) var index: integer; (* for line *) cha: char; (* a character read in *) done: boolean; (* used for removing trailing blanks from the line *) acceptable: boolean; (* was the line typed short enough? *) begin (* readstring *) with line do begin repeat clearstring(line); prompt(afile); index := 0; (* we now count characters *) while (not eoln(afile)) and (index < maxstring) do begin index:=succ(index); readchar(afile,cha); line.letters[index]:=cha end; if not eoln(afile) then begin writeln(output, 'type lines shorter than ', (maxstring+1): 1, ' characters. please retype the line...'); acceptable := false end else acceptable := true until acceptable; length := index; if length > 0 then begin done := false; repeat (* remove blanks from the line. note that a while loop can not be used because one must avoid letters[0], since that position does not exist... *) if letters[length] = ' ' then length := pred(length) else done := true; if length = 0 then done := true until done end; if length > 0 then current := 1 else current := 0 end end; (* readstring *) (* end module interact.readstring version = 4.59; (@ of prgmod.p 2003 Jul 31 *) (* begin module interact.figurestring *) procedure figurestring( var line: string; (* a string of characters to figure out *) var first: integer; (* first found non-blank character in the line *) var last: integer; (* last character before a blank after first *) var whzat: char; (* what the token is *) var c: char; (* the first character of the token *) var i: integer; (* integer value of token if it is integer; or 0 *) var r: real); (* the real value if it is real; or 0.0 *) (* figurestring figures out the tokens in a string. it recognizes words, integers, reals and poorly formed numbers. you can easily use it to parse lines. our goal is to figure out what thing is on a string. start looking at the current place on the line. first and last are the first 'token' in line after start. the current place is updated to the letter after last. the thing found is described by the value of whzat: 'c': character (when the token does not begin with a digit, '+', or '-') 'i': integer 'r': real ' ': blank line 'g': garbage, cannot figure it out and the value of the thing found is the appropriate variable *) var numbers: set of '0'..'9'; sign: integer; (* sign of a number *) numberstart: integer; (* the point a number starts, beyond its sign, if any *) point: integer; (* location of decimal point *) power: integer; (* of 10 representing a place value in the number *) l: integer; (* an index for dissecting numbers *) function figureinteger(first,last:integer):integer; (* figure the integer in the token *) var i: integer; (* index *) sum, increment: integer; begin (* figureinteger *) power:=1; (* start at ones place *) sum:=0; (* start sum at zero *) for i:=last downto first do begin case line.letters[i] of '0': increment:=0; '1': increment:=1; '2': increment:=2; '3': increment:=3; '4': increment:=4; '5': increment:=5; '6': increment:=6; '7': increment:=7; '8': increment:=8; '9': increment:=9 end; sum:=sum+power*increment; power:=power*10 end; figureinteger:=sum end; (* figureinteger *) begin (* figurestring *) numbers:=['0','1','2','3','4','5','6','7','8','9']; (* c:=' '; i:=0; r:=0.0; do not affect these variables unless necessary *) point:=0; whzat := '.'; (* assume that we have someting to work on *) (* now to see if that is true: *) with line do if (length = 0) or (current < 1) or (current > length) then whzat := ' ' else begin (* figure out where the first token is in the line *) first:=line.current; while (line.letters[first]=' ') and (first < line.length) do first:=succ(first); if (first = line.length) and (line.letters[first] = ' ') then whzat := ' '; end; if whzat <> ' ' then begin last:=first; while (line.letters[last]<>' ') and (last < line.length) do last:=succ(last); if line.letters[last] = ' ' then last := pred(last); (* the token is between inclusive first and last *) c:=line.letters[first]; if (c in numbers) or (c in ['+','-']) then begin if c in ['+','-'] then begin case c of '+': sign:=+1; '-': sign:=-1; end; numberstart:=succ(first) end else begin sign:=+1; numberstart:=first end; whzat:='i'; for l:=numberstart to last do begin if not(line.letters[l] in numbers) then if line.letters[l]='.' (* we found a period *) then if whzat='i' (* if so far it is numbers *) then begin whzat:='r'; (* it is actually real *) point:=l end else whzat:='g' (* it is a second '.', ie garbage *) else whzat:='g' (* it is garbage *) end; (* if it is only numbers, it is integer *) (* build number *) (* if it ends in a period, it is integer *) if (whzat = 'r') and (point = last) then whzat:='i'; if whzat = 'i' then begin if point = last (* had an ending decimal point *) then i:=sign * figureinteger(numberstart,pred(last)) else i:=sign * figureinteger(numberstart,last); r:=i end else if whzat = 'r' then begin i:=figureinteger(numberstart,point-1); r:=sign * (i+figureinteger(point+1,last)/power); i:=sign * i end end else begin whzat:='c'; end; (* move the start to just beyond the last character of the token *) line.current:=succ(last) end end; (* figurestring *) (* end module interact.figurestring version = 4.59; (@ of prgmod.p 2003 Jul 31 *) (* ************************************************************************ *) (* end module package.interact version = 4.59; (@ of prgmod.p 2003 Jul 31 *) (* begin module skipblanks *) procedure skipblanks(var thefile: text); (* skip over blanks until a non-blank, or end of line, is found *) begin while (thefile^ = ' ') and not eoln(thefile) do get(thefile); end; procedure skipnonblanks(var thefile: text); (* skip over nonblanks until a blank, or end of line, is found *) begin while (thefile^ <> ' ') and not eoln(thefile) do get(thefile); end; procedure skipcolumn(var thefile: text); (* skip over a data column *) begin skipblanks(thefile); skipnonblanks(thefile) end; (* end module skipblanks version = 7.62; {of delmod.p 2003 Jan 13} *) (* begin module onetoken *) procedure onetoken(var afile: text; var buffer: string; var gotten: boolean); (* Get a string from a file not using string calls. This lets one obtain lines from a file without interactive prompts. if end of file is found, gotten is false. *) var index: integer; (* of buffer *) done: boolean; begin (* onetoken *) skipblanks(afile); clearstring(buffer); done := false; if eof(afile) then gotten := false else begin index := 0; while (not eoln(afile)) and (index < maxstring) and not done do begin index := succ(index); read(afile, buffer.letters[index]); if buffer.letters[index] = ' ' then begin done := true; index := pred(index); end end; buffer.length := index; buffer.current := 1; gotten := true end end; (* end module onetoken version = 4.59; (@ of prgmod.p 2003 Jul 31 *) (* begin module interact.getstring *) procedure getstring(var afile: text; var buffer: string; var gotten: boolean); (* get a line (as a string) from a file not using string calls. this lets one obtain lines from a file without interactive prompts *) var index: integer; (* of buffer *) begin (* getstring *) clearstring(buffer); if eof(afile) then gotten := false else begin index := 0; while (not eoln(afile)) and (index < maxstring) do begin index := succ(index); read(afile, buffer.letters[index]) end; if not eoln(afile) then begin writeln(output, ' getstring: a line exceeds maximum string size (', maxstring:1,')'); halt end; buffer.length := index; buffer.current := 1; readln(afile); gotten := true end end; (* getstring *) (* end module interact.getstring version = 4.59; (@ of prgmod.p 2003 Jul 31 *) (* begin module decapitalize *) function decapitalize(c: char): char; (* convert the character c to lower case *) var n: integer; (* c is the n'th letter of the alphabet *) begin n := ord(c); if (n >= ord('A')) and (n <= ord('Z')) then c := chr( n - ord('A') + ord('a')) else c := chr(n); decapitalize := c end; (* end module decapitalize version = 4.59; (@ of prgmod.p 2003 Jul 31 *) procedure getname(var thename: alpha); (* get thename from the user (interactive) *) var (* for figure *) line: string; first, last: integer; whzat: char; c: char; i: integer; r: real; (* i also serves as an index *) done: boolean; gotten: boolean; begin done:=false; writeln(output,'type a name for the sequence'); repeat { zzz readstring(input,line); } getstring(input,line,gotten); if gotten then begin figurestring(line,first,last,whzat,c,i,r); case whzat of 'c','g': if (last-first+1) <= namelength - 1 then begin (* use i as an index *) for i:=1 to namelength do thename[i]:=' '; for i:=first to last do thename[i-first+1]:=line.letters[i]; done:=true end else writeln(output,'use a name shorter than ', namelength:1,' characters'); 'i','r': writeln(output,'do not type a number'); ' ': writeln(output,'type a name please') end end until done end; procedure bwritestartline(var book: text); (* write the start of the book line *) begin write(book,'* ') end; procedure writealpha(var afile: text; thename: alpha); (* write out thename with no extra blanks *) var i: integer; begin i:=1; while (i < namelength) and (thename[i]<>' ') do begin write(afile,thename[i]); i:=succ(i) end end; procedure bwritealpha(var afile: text; thealpha: alpha); begin bwritestartline(afile); writealpha(afile,thealpha); writeln(afile) end; procedure title(var book: text; thename: alpha); (* make the book title *) var adatetime: datetimearray; begin bwritestartline(book); getdatetime(adatetime); writedatetime(book,adatetime); write(book,', '); writedatetime(book,adatetime); write(book,', '); writealpha(book,thename); writeln(book) end; procedure ocbeg(var book: text; thename: alpha; length: integer); (* write organism and chromosome part of the book *) begin writeln(book,'organism'); bwritealpha(book,thename); bwritealpha(book,thename); bwritestartline(book); writeln(book,'bases'); writeln(book,'chromosome'); bwritealpha(book,thename); bwritealpha(book,thename); bwritestartline(book); writeln(book,1:1); bwritestartline(book); writeln(book,length:1) end; procedure sequence(var raw, book: text); (* copy the raw sequence into the book *) const dnalinelength = 60; (* maximum number of bases per line *) var basenumber: integer; (* current number of base *) charpos: integer; (* current number of bases per line *) ch: char; (* a base *) begin reset(raw); charpos:=1; basenumber := 0; bwritestartline(book); while not eof(raw) do begin if not eoln(raw) then begin read(raw,ch); { if ch in ['a','c','g','t','u'] then begin } (* accept non acgt characters but alter them *) if ch <> ' ' then begin if charpos > dnalinelength then begin charpos:=1; writeln(book); bwritestartline(book) end; ch := decapitalize(ch); if ch = 'u' then ch:='t'; basenumber := succ(basenumber); if not (ch in ['a','c','g','t','u']) then begin write(rawchanges,'@ '); writealpha(rawchanges,thename); write(rawchanges, ' ', basenumber:6,'.0 +1 "'); writealpha(rawchanges,thename); writeln(rawchanges, ' change:" "was ', ch,'"'); ch := 'a'; (* smash it *) end; write(book,ch); charpos:=succ(charpos) end end else readln(raw) end; writeln(book) end; procedure p(var book,raw: text; thename: alpha; length: integer); (* write the piece out to the book *) begin writeln(book,'piece'); bwritealpha(book,thename); bwritealpha(book,thename); writeln(book,'note'); bwritestartline(book); writeln(book,'# 1'); writeln(book,'note'); bwritestartline(book); writeln(book,1:1); bwritestartline(book); writeln(book,'linear'); bwritestartline(book); writeln(book,'+'); bwritestartline(book); writeln(book,1:1); bwritestartline(book); writeln(book,length:1); bwritestartline(book); writeln(book,'linear'); bwritestartline(book); writeln(book,'+'); bwritestartline(book); writeln(book,1:1); bwritestartline(book); writeln(book,length:1); writeln(book,'dna'); sequence(raw,book); writeln(book,'dna'); writeln(book,'piece'); end; procedure coend(var book: text); (* write the end of the organism and chromosome out *) begin writeln(book,'chromosome'); writeln(book,'organism'); end; procedure check(var raw: text; var length: integer; var error: boolean); (* check the raw sequence for non acgtu characters and find its length. errors are noted to output. note: check accepts acgt and u. *) var line,character: integer; (* location in the file *) ch: char; (* a character in raw *) begin reset(raw); length:=0; error:=false; line:=1; while not eof(raw) do begin character:=0; while not eoln(raw) do begin read(raw,ch); character:=succ(character); if (ch <> ' ') then length:=succ(length); ch := decapitalize(ch); if not (ch in ['a','c','g','t','u',' ']) then begin writeln(output, ' unacceptable sequence character at line ',line:1, ' character ',character:1, ':"',ch,'"'); error:=true end { else if (ch <> ' ') then length:=succ(length) } end; readln(raw); line:=succ(line) end; if length = 0 then error:=true; writeln(output,' number of bases: ',length:1); if error then writeln(output, ' odd characters in raw file, see rawchanges file') { if error then writeln(output,' error in raw file, no book made.') } end; procedure wreck(var book: text); (* destroy the book: produce the error messages that the librarian uses *) begin rewrite(book); writeln(book,'halt: error in raw sequence') end; procedure build(var raw: text; length: integer; thename: alpha; var book: text); (* build a book from the raw sequences of the name and length. note: u is converted to t. *) begin rewrite(book); rewrite(rawchanges); if error then begin rewrite(rawchanges); writeln(rawchanges,'* version ',version:4:2,' of rawbk'); end; (* define the changes *) write(rawchanges,'define "'); writealpha(rawchanges,thename); writeln(rawchanges, ' change:" " " "^" "^" 0'); (* construct the book *) title(book, thename); ocbeg(book,thename,length); p(book,raw,thename,length); coend(book) end; begin (* rawbk *) writeln(output,' rawbk ',version:4:2); { check(raw,length,error); if error then wreck(book) else begin getname(thename); build(raw,length,thename,book) end; } check(raw,length,error); getname(thename); write(output, 'The name will be "'); writealpha(output,thename); writeln(output, '".'); build(raw,length,thename,book) end. (* rawbk *)