program makebk(sequ, book, changes, output, input); (* makebk: make a book from a file of raw sequences. by Gary Stormo modified by Tom Schneider Dr. Thomas D. Schneider National Cancer Institute Laboratory of Experimental and Computational Biology Molecular Information Theory Group Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu (use only if first address fails) http://www.lecb.ncifcrf.gov/~toms/ modules libraries needed: delman, delmods, prgmods *) label 1; (* end of program *) const (* begin module version *) version = 2.59; (* of makebk.p 2005 Feb 7 2005 Feb 7, 2.59: allow piece names to be listed in automatic 2005 Jan 19, 2.58: cleanup 2005 Jan 19, 2.57: bug: numberdigit changed, must account for here! 2004 Dec 15, 2.56: cleanup 2004 Dec 15, 2.55: bug: number in book not produced after # symbol 2004 Sep 8, 2.54: upgrade for compiling by gpcc 2002 Nov 1, 2.53: tweek documentation 2000 Jun 29, 2.52: accept lower or upper case sequence 2000 Jun 28, 2.51: upgrade documentation 1999 Dec 13, 2.50: 2yk and delila upgrade 1999 Jul 22: z command allows one to set the zero coordinate 1998 dec 18: allow other characters in book, create changes file. origin before 1983 april 21 *) (* end module version *) (* begin module describe.makebk *) (* name makebk: make a book from a file of sequences. synopsis makebk(sequ: in, book: out, changes: out, output: out, input: intty) files sequ: file of raw sequences, each ending in a '.'; no characters are allowed in this file except the bases (a,c,g,t,u) and period and blank. Characters other than these will be converted to 'a' and the change will be noted in the changes file. The bases can also be in capital letters. book: the output file containing the sequences and the necessary information for it to be a proper book. the user types in the required information after prompts from the program. changes: Delila programs cannot handle sequences that have ambiguities because Delila was designed on the assumption that people would finish their sequences. Unfortunately this is not true, and the databases contain bases other than acgt to indicate ambiguity. These are converted to "a" and the cases are reported in this file. NOTE: "u" is converted to "t". output: for messages and queries to the user input: interactive input. Type a carriage return to start; this is (unfortunately) ignored. Then the program asks: Is this an insertion module (m), an independent book (b), or an automatically generated book (a, z), using default values? Chose the kind of output you want. An 'a' will give you an automatically generated book, while 'z' will do the same but it will first ask you which base is to be the zero coordinate of each sequence. This allows one to shift the alignment. If you do not chose the automatic route, you will be prompted for all the parts of the book. New as of 2005 Feb 7: In the automatic route, names of pieces may follow the title. On each line: The key name is given. a space is given the rest of the line is the long name. description Makebk takes a file of raw sequences (sequ) separated by periods (.) and converts that into a proper delila book format, getting the required information from the user. The user may also have makebk fill in the piece information automatically, using default values. see also {Another program to convert a single sequence to a Delila book:} rawbk.p {A program to list the contents of your new book:} lister.p {This program evolved as an interactive one, but can be used under Unix as if it had a parameter file. To do this, execute it in this form: makebk <} makebkp {where} makebkp {is an example parameter file that will read all sequences and give a simple title. It consists of three or more lines: this line is ignored. a this line triggers automatic book generation title This is the title name1 first sequence name name2 second sequence name } author Gary Stormo bugs The delila system requires complete sequence. To handle sequences with unknown bases, they are replaced with constant chreplace. technical notes The constant chreplace replaces all unidentified characters in the sequence. *) (* end module describe.makebk *) (* begin module makebk.const *) chreplace = 'a'; (* replaces all unidentified characters *) (* end module makebk.const *) (* begin module datetime.const *) datetimearraylength = 19; (* length of dataarray for dates, It is just long enough to include the 4 digit year - solving the year 2000 problem: 1980/06/09 18:49:11 123456789 123456789 1 2 *) (* end module datetime.const version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module book.const *) (* constants needed for book manipulations *) dnamax = 1024; (* length of dna arrays *) namelength = 100; (* maximum key name length *) linelength = 80; (* maximum line readable in book *) (* end module book.const version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module interact.const *) (* begin module string.const *) maxstring = 2000; (* the maximum string *) (* end module string.const version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* end module interact.const version = 5.11; (@ of prgmod.p 2005 Jan 19 *) type (* begin module datetime.type *) (* array for dates *) datetimearray = packed array[1..datetimearraylength] of char; (* end module datetime.type version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module book.type *) (* types needed for book manipulations *) chset = set of 'a'..'z'; (* types defined in book definition *) alpha = packed array[1..namelength] of char; (* this is not alfa *) (* name is a left justified string with blanks following the characters *) name = record letters: alpha; length: 0..namelength (* zero means an unspecified structure *) end; lineptr = ^line; line = record (* a line of characters *) letters: packed array [1..linelength] of char; length: 0..linelength; next: lineptr end; direction = (plus, minus, dircomplement, dirhomologous); configuration = (linear, circular); state = (on, off); header = record (* header of key *) keynam: name; (* key name of structure *) fulnam: lineptr; (* full name of structure *) note: lineptr (* note key *) end; (* begin module base.type *) (* define the four nucleotide bases *) base = (a,c,g,t); (* end module base.type version = 7.66; {of delmod.p 2004 Aug 4} *) (* sequence types *) dnaptr = ^dnastring; dnarange = 0..dnamax; seq = packed array[1..dnamax] of base; dnastring = record part: seq; length: dnarange; next: dnaptr end; orgkey = record (* organism key *) hea: header; mapunit: lineptr (* genetic map units *) end; chrkey = record (* chromosome key *) hea: header; mapbeg: real; (* number of genetic map beginning *) mapend: real (* number of genetic map ending *) end; pieceptr = ^piece; piekey = record (* piece key *) hea: header; mapbeg: real; (* genetic map beginning *) coocon: configuration; (* configruation (circular/linear) *) coodir: direction; (* direction (+/-) relative to genetic map *) coobeg: integer; (* beginning nucleotide *) cooend: integer; (* ending nucleotide *) piecon: configuration; (* configruation (circular/linear) *) piedir: direction; (* direction (+/-) relative to coordinates *) piebeg: integer; (* beginning nucleotide *) pieend: integer; (* ending nucleotide *) end; piece = record key: piekey; dna: dnaptr end; reference = record pienam : name; (* name of piece referred to *) mapbeg : real; (* genetic map beginning *) refdir : direction; (* direction relative to coordinates *) refbeg : integer; (* beginning nucleotide *) refend : integer; (* ending nucleotide *) end; genkey = record (* gene key *) hea : header; ref : reference; end; trakey = record (* transcript key *) hea : header; ref : reference; end; markerptr = ^marker; markey = record (* marker key *) hea : header; ref : reference; sta : state; phenotype : lineptr; next : markerptr; end; marker = record key : markey; dna : dnaptr; end; (* end module book.type version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module interact.type *) (* begin module string.type *) stringptr = ^string; (* pointer to a string *) string = record (* a string of characters *) letters: array[1..maxstring] of char; (* the letters in the string *) length: integer; (* the number of characters in the string *) current: integer; (* the letter we are working on *) next: stringptr; (* the next string in a series *) end; (* end module string.type version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* end module interact.type version = 5.11; (@ of prgmod.p 2005 Jan 19 *) var (* begin module book.var *) (* ************************************************************************ *) (* global variables needed for book manipulations *) (* free storage: *) freeline: lineptr; (* unused lines *) freedna: dnaptr; (* unused dnas *) readnumber: boolean; (* whether to read a number from the notes, or to read in the notes *) number: integer; (* the number of the item just read *) numbered: boolean; (* true when the item just read is numbered *) skipunnum: boolean; (* a control variable to allow skipping of un-numbered items in the book *) (* ************************************************************************ *) (* end module book.var version = 7.66; {of delmod.p 2004 Aug 4} *) book, (* the output book *) changes, (* changes for the lister program *) sequ: text; (* the file with the dna sequence for the book *) genenum, (* number of genes for this piece *) transnum, (* number of transcripts for this piece *) marknum, (* number of markers for this piece *) index, (* an index for transnum, genenum and marknum *) i : integer; (* an index *) ch : char; (* for reading user response *) pie: pieceptr; (* pointer to the piece being used *) d: dnaptr; (* for calculating piece length *) seqnum, (* the number of the present sequence *) length: integer; (* of the piece being used *) numberpieces, (* true if pieces are to be numbered *) truebook, (* true if this is an independent book *) autobook, (* true if defaults are used for piece info *) orgopen, (* true if we have opened an organism and not closed it *) chropen: boolean; (* true if a chromosome is open *) gotten: boolean; (* when a string has been gotten from input *) aline: string; (* read from input *) adatetime: datetimearray; (* for dating the book *) zerobase: integer; (* the position that is to be the zero base in the book *) (* begin module package.primitive *) (* ************************************************************************ *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module copyaline *) procedure copyaline(var fin, fout: text); (* copy a line from file fin to file fout *) begin (* copyaline *) while not eoln(fin) do begin fout^ := fin^; put(fout); get(fin) end; readln(fin); writeln(fout); end; (* copyaline *) (* end module copyaline version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module copylines *) function copylines(var fin, fout: text; n: integer): integer; (* copy n lines of file fin to file fout. the actual number of lines copied is returned. *) var index: integer; (* the current line number *) begin (* copylines *) index := 0; while (not eof(fin)) and (index < n) do begin copyaline(fin, fout); index := succ(index) end; copylines := index end; (* copylines *) (* end module copylines version = 7.66; {of delmod.p 2004 Aug 4} *) (* ************************************************************************ *) (* end module package.primitive version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module getdatetime *) procedure getdatetime(var adatetime: datetimearray); (* get the date and time into a single array from the system clock. adatetime contains the date: 1980/06/09 18:49:11 ye mo da ho mi se (year, month, day, hour, minute, second). As of 2000 February 18, the Sun Pascal compiler requires a formatting statement. This statement allows the date to be generated in this standard Delila format in a single call. Information about the formatting statement is available on the manual page for date in Unix. If a computer does not have this method, see the 'oldgetdatetime' routine in delmod.p (http://www.lecb.ncifcrf.gov/~toms/delila/delmod.html) for some conversion code. GPC Functions: function GetUnixTime (var MicroSecond : Integer) : UnixTimeType; http://agnes.dida.physik.uni-essen.de/~gnu-pascal/gpc_109.html#SEC109 7.10.8 Date And Time Routines procedure GetTimeStamp (var t : TimeStamp); function Date (t : TimeStamp) : packed array [1 .. DateLength] of Char; function Time (t : TimeStamp) : packed array [1 .. TimeLength] of Char; DateLength and TimeLength are implementation dependent constants. GetTimeStamp (t) fills the record `t' with values. If they are valid, the Boolean flags are set to True. TimeStamp is a predefined type in the Extended Pascal standard. It may be extended in an implementation, and is indeed extended in GPC. For the full definition of `TimeStamp', see section 8.255 TimeStamp. *) var t: TimeStamp; (* begin module pluckdigit *) function pluckdigit(number, logplace:integer): char; (* return the digit at the place value ('logplace') position of number. example: pluckdigit(13625, 3) = 3 pluckdigit(13625, 4) = 1 This routine was taken from module numberdigit in prgmod.p, but is modified so as not to give the sign. Instead it gives zeros above the digits. 'myabsolute' replaced 'absolute', which is apparently a keyword for GPC. The name is kept for to keep the code looking similar to its origin. *) var place: integer; (* the exponent of logplace *) count: integer; (* used to make place *) myabsolute: integer; (* the absolute value of number *) acharacter: char; (* the character to be returned *) procedure digit; (* extract a digit at the place position *) var tenplace: integer; (* ten times place *) z: integer; (* an intermediate value *) d: integer; (* the digit extracted *) begin (* digit *) tenplace:=10*place; z:=myabsolute-((myabsolute div tenplace)*tenplace); if place = 1 then d:=z else d:= z div place; case d of 0: acharacter:='0'; 1: acharacter:='1'; 2: acharacter:='2'; 3: acharacter:='3'; 4: acharacter:='4'; 5: acharacter:='5'; 6: acharacter:='6'; 7: acharacter:='7'; 8: acharacter:='8'; 9: acharacter:='9'; end end; (* digit *) begin (* pluckdigit *) place:=1; for count:=1 to logplace do place:=10*place; if number=0 then begin acharacter:='0' end else begin myabsolute:=number; if myabsolute >= place then digit else acharacter := '0' end; pluckdigit:=acharacter end; (* pluckdigit *) (* end module pluckdigit *) begin (* according to: http://agnes.dida.physik.uni-essen.de/~gnu-pascal/gpc_109.html#SEC109 *) GetTimeStamp(t); (* Predefined time stamp: http://agnes.dida.physik.uni-essen.de/~gnu-pascal/gpc_389.html#SEC389 TimeStamp = {@@packed} record DateValid, TimeValid : Boolean; Year : Integer; Month : 1 .. 12; Day : 1 .. 31; DayOfWeek : 0 .. 6; { 0 means Sunday } Hour : 0 .. 23; Minute : 0 .. 59; Second : 0 .. 61; { to allow for leap seconds } MicroSecond : 0 .. 999999 end; *) with t do begin if TimeValid then begin { writeln(output,'valid time'); writeln(output,'year =',year:4); writeln(output,'month =',month:2); writeln(output,'day =',day:2); writeln(output,'hour =',hour:2); writeln(output,'minute =',minute:2); writeln(output,'second =',second:2); } adatetime := 'year/mm/dd hh:mm:ss'; adatetime[ 1] := pluckdigit(Year,3); adatetime[ 2] := pluckdigit(Year,2); adatetime[ 3] := pluckdigit(Year,1); adatetime[ 4] := pluckdigit(Year,0); adatetime[ 6] := pluckdigit(Month,1); adatetime[ 7] := pluckdigit(Month,0); adatetime[ 9] := pluckdigit(Day,1); adatetime[10] := pluckdigit(Day,0); adatetime[12] := pluckdigit(Hour,1); adatetime[13] := pluckdigit(Hour,0); adatetime[15] := pluckdigit(Minute,1); adatetime[16] := pluckdigit(Minute,0); adatetime[18] := pluckdigit(Second,1); adatetime[19] := pluckdigit(Second,0); end else begin writeln(output,'getdatetime: invalid time!'); halt; end end; { Sun compiler method: date(adatetime,'%Y/%m/%d %H:%M:%S'); } end; (* end module getdatetime version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module readdatetime *) procedure readdatetime (var thefile: text; var adatetime: datetimearray); (* read the date and time from the file. It must have this format: 123456789 123456789 1 1980/06/09 18:49:11 *) (* 2000 Oct 11: upgraded so that the p2c compiler does not object to writing out the adatetime; added checks for the date. *) var index: integer; (* to the udatetime *) (* the following is an unpacked date time array, to avoid reading into a packed array. reading into a packed array is not transportable *) udatetime: array[1..datetimearraylength] of char; begin for index:=1 to datetimearraylength do read(thefile,udatetime[index]); pack(udatetime, 1, adatetime); if (adatetime[3]='/') and (adatetime[12]=':') then begin writeln(output,' old datetime (only 2 year digits) read: '); for index:=1 to datetimearraylength do write(thefile,adatetime[index]); writeln(output); end; (* check the adatetime format. Note that further checks for the other positions in the array could be done to be sure that they are numbers. But this should be pretty good. *) if (adatetime[ 5]<>'/') or (adatetime[ 8]<>'/') or (adatetime[14]<>':') or (adatetime[17]<>':') then begin writeln(output,'readdatetime: bad date time read:'); for index:=1 to datetimearraylength do write(thefile,adatetime[index]); writeln(output); halt end; end; (* end module readdatetime version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module writedatetime *) procedure writedatetime(var thefile: text; adatetime: datetimearray); (* expand the date and time out and print in the file *) var index: integer; (* index of datetime *) begin for index:=1 to datetimearraylength do write(thefile,adatetime[index]) end; (* end module writedatetime version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module package.interact *) (* ************************************************************************ *) (* begin module interact.prompt *) procedure prompt(var afile: text); (* prompt a file. the prompt is sent to the output file, and a line is read into the pascal line buffer. (for the cyber system this means to readln afile.) guarantee no bomb *) begin (* prompt *) if eof(afile) then reset(afile); readln(afile); end; (* prompt *) (* end module interact.prompt version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* begin module interact.readchar *) procedure readchar(var afile:text; var ch: char); (* read a character from afile, guarantee no bomb *) begin (* readchar *) if eof(afile) then prompt(afile); read(afile,ch); (*writeln(output,'"',ch,'"') *) end; (* readchar *) (* end module interact.readchar version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* begin module clearstring *) procedure clearstring(var ribbon: string); (* empty the string *) var index: integer; (* to the ribbon *) begin (* clearstring *) with ribbon do begin for index := 1 to maxstring do letters[index] := ' '; length := 0; current := 0; next := nil; end end; (* clearstring *) procedure initializestring(var ribbon: string); (* start the string with a nil pointer. This routine should be called before doing linked list work. This allows the standard string routines to clear the string without killing the pointer. *) begin (* initializestring *) clearstring(ribbon); ribbon.next := nil; end; (* initializestring *) (* end module clearstring version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* begin module interact.readstring *) procedure readstring(var afile: text; var line: string); (* read in a string from afile, protect against bombing *) var index: integer; (* for line *) cha: char; (* a character read in *) done: boolean; (* used for removing trailing blanks from the line *) acceptable: boolean; (* was the line typed short enough? *) begin (* readstring *) with line do begin repeat clearstring(line); prompt(afile); index := 0; (* we now count characters *) while (not eoln(afile)) and (index < maxstring) do begin index:=succ(index); readchar(afile,cha); line.letters[index]:=cha end; if not eoln(afile) then begin writeln(output, 'type lines shorter than ', (maxstring+1): 1, ' characters. please retype the line...'); acceptable := false end else acceptable := true until acceptable; length := index; if length > 0 then begin done := false; repeat (* remove blanks from the line. note that a while loop can not be used because one must avoid letters[0], since that position does not exist... *) if letters[length] = ' ' then length := pred(length) else done := true; if length = 0 then done := true until done end; if length > 0 then current := 1 else current := 0 end end; (* readstring *) (* end module interact.readstring version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* begin module interact.figurestring *) procedure figurestring( var line: string; (* a string of characters to figure out *) var first: integer; (* first found non-blank character in the line *) var last: integer; (* last character before a blank after first *) var whzat: char; (* what the token is *) var c: char; (* the first character of the token *) var i: integer; (* integer value of token if it is integer; or 0 *) var r: real); (* the real value if it is real; or 0.0 *) (* figurestring figures out the tokens in a string. it recognizes words, integers, reals and poorly formed numbers. you can easily use it to parse lines. our goal is to figure out what thing is on a string. start looking at the current place on the line. first and last are the first 'token' in line after start. the current place is updated to the letter after last. the thing found is described by the value of whzat: 'c': character (when the token does not begin with a digit, '+', or '-') 'i': integer 'r': real ' ': blank line 'g': garbage, cannot figure it out and the value of the thing found is the appropriate variable *) var numbers: set of '0'..'9'; sign: integer; (* sign of a number *) numberstart: integer; (* the point a number starts, beyond its sign, if any *) point: integer; (* location of decimal point *) power: integer; (* of 10 representing a place value in the number *) l: integer; (* an index for dissecting numbers *) function figureinteger(first,last:integer):integer; (* figure the integer in the token *) var i: integer; (* index *) sum, increment: integer; begin (* figureinteger *) power:=1; (* start at ones place *) sum:=0; (* start sum at zero *) for i:=last downto first do begin case line.letters[i] of '0': increment:=0; '1': increment:=1; '2': increment:=2; '3': increment:=3; '4': increment:=4; '5': increment:=5; '6': increment:=6; '7': increment:=7; '8': increment:=8; '9': increment:=9 end; sum:=sum+power*increment; power:=power*10 end; figureinteger:=sum end; (* figureinteger *) begin (* figurestring *) numbers:=['0','1','2','3','4','5','6','7','8','9']; (* c:=' '; i:=0; r:=0.0; do not affect these variables unless necessary *) point:=0; whzat := '.'; (* assume that we have someting to work on *) (* now to see if that is true: *) with line do if (length = 0) or (current < 1) or (current > length) then whzat := ' ' else begin (* figure out where the first token is in the line *) first:=line.current; while (line.letters[first]=' ') and (first < line.length) do first:=succ(first); if (first = line.length) and (line.letters[first] = ' ') then whzat := ' '; end; if whzat <> ' ' then begin last:=first; while (line.letters[last]<>' ') and (last < line.length) do last:=succ(last); if line.letters[last] = ' ' then last := pred(last); (* the token is between inclusive first and last *) c:=line.letters[first]; if (c in numbers) or (c in ['+','-']) then begin if c in ['+','-'] then begin case c of '+': sign:=+1; '-': sign:=-1; end; numberstart:=succ(first) end else begin sign:=+1; numberstart:=first end; whzat:='i'; for l:=numberstart to last do begin if not(line.letters[l] in numbers) then if line.letters[l]='.' (* we found a period *) then if whzat='i' (* if so far it is numbers *) then begin whzat:='r'; (* it is actually real *) point:=l end else whzat:='g' (* it is a second '.', ie garbage *) else whzat:='g' (* it is garbage *) end; (* if it is only numbers, it is integer *) (* build number *) (* if it ends in a period, it is integer *) if (whzat = 'r') and (point = last) then whzat:='i'; if whzat = 'i' then begin if point = last (* had an ending decimal point *) then i:=sign * figureinteger(numberstart,pred(last)) else i:=sign * figureinteger(numberstart,last); r:=i end else if whzat = 'r' then begin i:=figureinteger(numberstart,point-1); r:=sign * (i+figureinteger(point+1,last)/power); i:=sign * i end end else begin whzat:='c'; end; (* move the start to just beyond the last character of the token *) line.current:=succ(last) end end; (* figurestring *) (* end module interact.figurestring version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* ************************************************************************ *) (* end module package.interact version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* begin module package.interact.gets *) (* ************************************************************************ *) (* begin module interact.nostring *) function nostring(var buffer: string): boolean; (* true if there are no characters in the rest of the buffer; false if there are characters. also, if there is no buffer, then buffer.length is set to 0 *) var answer: boolean; (* the answer returned *) procedure kill; (* destroy the line *) begin (* kill *) answer := true; (* blood and gore *) clearstring(buffer) (* total death *) end; (* kill *) begin (* nostring *) with buffer do begin if length > 0 then begin if length < maxstring then while (letters[current] = ' ') and (current < length) do current := succ(current); if current <= maxstring then if letters[current] = ' ' then kill else answer := false else kill end else kill end; nostring := answer end; (* nostring *) (* end module interact.nostring version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* begin module writestring *) procedure writestring(var tofile: text; var s: string); (* write the string s to file tofile, no writeln *) var i: integer; (* index to s *) begin (* writestring *) with s do for i := 1 to length do write(tofile, letters[i]) end; (* writestring *) (* end module writestring version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* begin module interact.flagstring *) procedure flagstring(var afile: text; var buffer: string); (* flag an error in the buffer at the current place, and clear the buffer *) begin (* flagstring *) with buffer do length := current; (* chop off the rest of the buffer *) writestring(afile, buffer); (* show the buffer *) write(afile,'? '); clearstring(buffer) end; (* flagstring *) (* end module interact.flagstring version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* begin module interact.getchar *) procedure getchar(var afile: text; var buffer: string; var cha: char; var gotten: boolean); (* get a character from the buffer, or refill the buffer and let the calling program figure out whether the buffer has non blank characters in it. *) var (* variables for calling figurestring: *) first: integer; last: integer; what: char; int: integer; rea: real; begin (* getchar *) if buffer.length = 0 then begin gotten:=false; readstring(afile,buffer) end else begin figurestring(buffer,first,last,what,cha,int,rea); gotten := (what <> ' ') end end; (* getchar *) (* end module interact.getchar version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* begin module interact.getinteger *) procedure getinteger(var afile: text; var buffer: string; var int: integer; var gotten: boolean); (* get the integer int from the buffer or interactive file afile *) var (* variables for calling figurestring: *) first: integer; last: integer; what: char; cha: char; rea: real; begin (* getinteger *) if buffer.length = 0 then begin gotten:=false; readstring(afile,buffer) end else begin figurestring(buffer,first,last,what,cha,int,rea); if what <> 'i' then begin flagstring(output,buffer); writeln(output,' please type an integer'); gotten:=false end else gotten:=true end end; (* getinteger *) (* end module interact.getinteger version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* begin module interact.getreal *) procedure getreal(var afile: text; var buffer: string; var rea: real; var gotten: boolean); (* get the real rea from the buffer or interactive file afile integer values are also accepted. *) var (* variables for calling figurestring: *) first: integer; last: integer; what: char; cha: char; int: integer; begin (* getreal *) if buffer.length = 0 then begin gotten:=false; readstring(afile,buffer) end else begin figurestring(buffer,first,last,what,cha,int,rea); if not (what in ['r', 'i']) then begin flagstring(output,buffer); writeln(output,' please type a real number'); gotten:=false end else gotten:=true end; (* handle integers *) if what = 'i' then rea := int end; (* getreal *) (* end module interact.getreal version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* begin module interact.token *) procedure token(var buffer, atoken: string; var gotten: boolean); (* get a token from the buffer *) var (* variables for calling figurestring: *) first: integer; last: integer; what: char; cha: char; int: integer; rea: real; index: integer; (* to the buffer *) begin figurestring(buffer,first,last,what,cha,int,rea); if what = ' ' then gotten := false else begin clearstring(atoken); for index := first to last do atoken.letters[index-first+1] := buffer.letters[index]; atoken.length := last - first + 1; atoken.current := 1; gotten:=true end end; (* end module interact.token version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* begin module interact.gettoken *) procedure gettoken(var afile: text; var buffer: string; var atoken: string; var gotten: boolean); (* get a token from the buffer or interactive file afile *) begin (* gettoken *) if buffer.length = 0 then begin gotten:=false; readstring(afile,buffer) end else token(buffer,atoken,gotten) end; (* gettoken *) (* end module interact.gettoken version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* ************************************************************************ *) (* end module package.interact.gets version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* begin module numbersize *) function numbersize(n: integer):integer; (* calculate amount of space to be reserved for the integer n *) const ln10 = 2.30259; (* natural log of 10 - for conversion to log base 10 *) epsilon = 0.00001; (* a small number to correct log base 10 errors *) var size: integer; (* intermediate result *) begin (* numbersize *) if n = 0 then numbersize:=1 else begin size:=trunc(ln(abs(n))/ln10 + epsilon) + 1; (* the 1 is for the last digit *) (* the epsilon assures that we do not lose a place due to roundoff. eg, sometimes log base 10 of 10 would be 0.9999 instead of 1, and we would not do it right... note: this will fail for very large numbers on the order of 1/epsilon. *) if n < 0 then size := succ(size); (* account for minus sign *) numbersize := size; end end; (* numbersize *) (* end module numbersize version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* begin module numberdigit *) function numberdigit(number, logplace:integer): char; (* return the digit at the place value ('logplace') position of number. example: numberdigit(13625, 3) = 3 numberdigit(13625, 4) = 1 2000 July 30 'myabsolute' replaced 'absolute', which is apparently a keyword for GPC. The name is kept to keep the code looking similar to its origin. *) var place: integer; (* the exponent of logplace *) count: integer; (* used to make place *) myabsolute: integer; (* the absolute value of number *) acharacter: char; (* the character to be returned *) procedure digit; (* extract a digit at the place position *) var tenplace: integer; (* ten times place *) z: integer; (* an intermediate value *) d: integer; (* the digit extracted *) begin (* digit *) tenplace:=10*place; z:=myabsolute-((myabsolute div tenplace)*tenplace); if place = 1 then d:=z else d:= z div place; case d of 0: acharacter:='0'; 1: acharacter:='1'; 2: acharacter:='2'; 3: acharacter:='3'; 4: acharacter:='4'; 5: acharacter:='5'; 6: acharacter:='6'; 7: acharacter:='7'; 8: acharacter:='8'; 9: acharacter:='9'; end end; (* digit *) procedure sign; (* put a negative sign out or a positive sign *) begin (* sign *) if number <0 then acharacter:='-' else acharacter:='+' end; (* sign *) begin (* numberdigit *) place:=1; for count:=1 to logplace do place:=10*place; if number=0 then begin if place=1 then acharacter:='0' else acharacter:=' ' end else begin myabsolute:=abs(number); if myabsolute < (place div 10) then acharacter:=' ' else if myabsolute >= place then digit else sign end; numberdigit:=acharacter end; (* numberdigit *) (* end module numberdigit version = 5.11; (@ of prgmod.p 2005 Jan 19 *) (* begin module book.basis *) (* procedures needed for book manipulations *) (* get procedures should be used for all linked lists of records *) procedure getline(var l: lineptr); (* obtain a line from the free line list or by making a new one *) begin if freeline<>nil then begin l:=freeline; freeline:=freeline^.next end else new(l); l^.length:=0; l^.next:=nil end; procedure getdna(var l: dnaptr); begin if freedna<>nil then begin l:=freedna; freedna:=freedna^.next end else new(l); l^.length:=0; l^.next:=nil end; (* clear procedures should be called each time the records are no longer needed failure to do this may result in a stack overflow. *) procedure clearline(var l: lineptr); (* return a line to the free line list *) var lptr: lineptr; begin if l<>nil then begin lptr:=l; l:=l^.next; lptr^.next:=freeline; freeline:=lptr end end; procedure writeline(var afile: text; l: lineptr; carriagereturn: boolean); (* write a line to a file, with carriage return if carriagereturn is true. *) var index: integer; (* index to characters in l *) begin with l^ do begin for index := 1 to length do write(afile, letters[index]); end; if carriagereturn then writeln(afile); end; procedure showfreedna; (* show the freedna list *) var counter: integer; (* count of freedna list *) l: dnaptr; (* pointer into freedna list *) begin l := freedna; counter := 0; while l <> nil do begin counter := succ(counter); write(output,counter:1); write(output, ', length = ',l^.length:1); { This is illegal according to gpc because one cannot write a pointer to a text file. It can be unearthed for debugging. write(output, ', pointer id: ',l:1); } writeln(output); l := l^.next end; end; procedure cleardna(var l: dnaptr); (* clear the dna strutures to the free list *) var lptr: dnaptr; begin if l<>nil then begin lptr:=l; l:=l^.next; lptr^.next:=freedna; freedna:=lptr end end; procedure clearheader(var h: header); (* clear the header h (remove lines to free storage) *) begin with h do begin clearline(fulnam); while note<>nil do clearline(note) end end; procedure clearpiece(var p: pieceptr); (* clear the dna of the piece *) begin while p^.dna<>nil do cleardna(p^.dna); clearheader(p^.key.hea) end; function chartobase(ch:char):base; (* convert a character into a base *) begin case ch of 'a': chartobase:=a; 'c': chartobase:=c; 'g': chartobase:=g; 't': chartobase:=t end end; function basetochar(ba:base):char; (* convert a base into a character *) begin case ba of a: basetochar:='a'; c: basetochar:='c'; g: basetochar:='g'; t: basetochar:='t'; end end; function complement(ba:base):base; (* take the complement of ba *) begin case ba of a: complement:=t; c: complement:=g; g: complement:=c; t: complement:=a; end end; function chomplement(b: char): char; (* create the character complement of base b. I must be getting hungry! *) begin chomplement := basetochar(complement(chartobase(b))); end; function pietoint(p: integer; pie: pieceptr): integer; (* p is a coordinate on the piece. we want to transform p into a number from 1 to n: an internal coordinate system for easy manipulation of piece coordinates *) (* Note: the dirhomologous and dircomplement are treated as plus and minus directions, which MIGHT NOT BE RIGHT! *) var i: integer; (* an intermediate value *) begin with pie^.key do begin case piedir of dirhomologous, plus: if p>=piebeg then i:=p-piebeg+1 else i:=(p-coobeg)+(cooend-piebeg)+2; dircomplement, minus: if p<=piebeg then i:=piebeg-p+1 else i:=(cooend-p)+(piebeg-coobeg)+2 end; pietoint:=i end end; function inttopie(i: integer; pie: pieceptr):integer; (* i is in the range 1 to some maximum. it is an internal coordinate system for the program. we want to do a coordinate transformation to obtain a value in the range of the piece called pie: i=1 corresponds to piebeg and i=its maximum corresponds to pieend *) (* Note: the dirhomologous and dircomplement are treated as plus and minus directions, which MIGHT NOT BE RIGHT! *) var p: integer; (* an intermediate value *) begin with pie^.key do begin case piedir of dirhomologous, plus: begin p:=piebeg+(i-1); if p>cooend then if coocon=circular then p:=p-(cooend-coobeg+1) end; dircomplement, minus: begin p:=piebeg-(i-1); if pnil then begin bwstartline(book); if l^.length<>0 then for i:=1 to l^.length do write(book,l^.letters[i]); writeln(book); end end; procedure bwtext(var book: text; lines: lineptr); (* write a set of lines to the book *) var l: lineptr; begin l := lines; while l<>nil do begin bwline(book,l); l := l^.next; end; end; procedure bwnote(var book: text; note: lineptr); (* writes the notes pointed to by 'note' to 'book' *) begin if note<>nil then begin writeln(book,'note'); bwtext(book,note); writeln(book,'note'); end end; procedure bwnumber(var book: text; num: integer); (* write a number to the book *) begin bwstartline(book); writeln(book,num:1) (* pascal will expand the field as far as needed *) end; procedure bwreanum(var book: text; reanum: real); (* write a real number to the book *) begin bwstartline(book); writeln(book,reanum:1:2) (* pascal will expand the field *) end; procedure bwstate(var book: text; sta: state); (* write a state to the book *) begin bwstartline(book); case sta of on: writeln(book,'on'); off: writeln(book,'off') end end; procedure bwname(var book: text; nam: name); (* write a name to the book *) var i: integer; begin with nam do begin bwstartline(book); for i:=1 to length do write(book,letters[i]); writeln(book) end end; procedure bwdirect(var book: text; direct: direction); (* write a direction to the book *) begin bwstartline(book); case direct of dirhomologous, (* handle case, may not be right *) plus: writeln(book,'+'); dircomplement, (* handle case, may not be right *) minus: writeln(book,'-') end end; procedure bwconfig(var book: text; config: configuration); (* write a configuration to the book *) begin bwstartline(book); case config of linear: writeln(book,'linear'); circular: writeln(book,'circular') end end; procedure bwheader(var book: text; hea: header); (* write a key header to the book *) begin with hea do begin bwname(book,keynam); bwline(book,fulnam); bwnote(book,note); end end; procedure bworgkey(var book: text; org: orgkey); (* write the organism key *) begin with org do begin bwheader(book,hea); bwline (book,mapunit) end end; procedure bwchrkey(var book: text; chr: chrkey); (* write the chromosome key *) begin with chr do begin bwheader(book,hea); bwreanum(book,mapbeg); bwreanum(book,mapend) end end; (* end module book.bwbasics version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module book.bworg *) procedure bworg(var thefile: text; org: orgkey; var chropen, orgopen: boolean); (* this writes the organism key 'org' to 'thefile', and returns 'orgopen' as true. if there is already a chromosome or organism open they are closed. *) begin if chropen then begin writeln(thefile,'chromosome'); chropen := false; end; if orgopen then writeln(thefile,'organism'); writeln(thefile,'organism'); bworgkey(thefile,org); orgopen := true; end; (* end module book.bworg version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module book.bwchr *) procedure bwchr(var thefile: text; chr: chrkey; var chropen: boolean); (* write the chromosome key 'chr' to 'thefile' and return chropen as true. if a chromosome is already open it is closed. *) begin if chropen then writeln(thefile,'chromosome'); writeln(thefile,'chromosome'); bwchrkey(thefile,chr); chropen := true; end; (* end module book.bwchr version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module book.bwdna *) procedure bwdna(var thefile: text; d: dnaptr); (* write the dna pointed to by 'd' to 'thefile' *) var i: integer; (* index to the sequence *) l: integer; (* index to the number of bases on the line *) newline: boolean; (* true when a new line should be started *) begin writeln(thefile,'dna'); newline := true; while (d <> nil) do begin for i := 1 to d^.length do begin if newline then begin bwstartline(thefile); l := 0; newline := false end; write(thefile,basetochar(d^.part[i])); l := succ(l); if (l mod 60 = 0) or (* end of line *) ((i = d^.length) and (d^.next = nil)) (* last base *) then begin writeln(thefile); newline := true end; end; d := d^.next; end; if not newline then writeln(thefile); (* last base *) writeln(thefile,'dna'); end; (* end module book.bwdna version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module book.bwpie *) procedure bwpie(var thefile: text; pie: pieceptr); (* writes the information pointed to by 'pie' to 'thefile' *) begin writeln(thefile,'piece'); with pie^ do begin bwheader(thefile,key.hea); bwstartline(thefile); writeln(thefile,key.mapbeg:1:2); bwstartline(thefile); if (key.coocon = circular) then writeln(thefile,'circular') else writeln(thefile,'linear'); bwstartline(thefile); if (key.coodir = plus) then writeln(thefile,'+') else writeln(thefile,'-'); bwstartline(thefile); writeln(thefile,key.coobeg:1); bwstartline(thefile); writeln(thefile,key.cooend:1); bwstartline(thefile); if (key.piecon = circular) then writeln(thefile,'circular') else writeln(thefile,'linear'); bwstartline(thefile); if (key.piedir = plus) then writeln(thefile,'+') else writeln(thefile,'-'); bwstartline(thefile); writeln(thefile,key.piebeg:1); bwstartline(thefile); writeln(thefile,key.pieend:1); end; bwdna(thefile,pie^.dna); writeln(thefile,'piece'); end; (* end module book.bwpie version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module book.bwref *) procedure bwref(var book: text; ref: reference); (* write a key reference to the book *) begin with ref do begin bwname (book,pienam); bwreanum(book,mapbeg); bwdirect(book,refdir); bwnumber(book,refbeg); bwnumber(book,refend) end end; (* end module book.bwref version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module book.bwgen *) procedure bwgen(var thefile: text; gene: genkey); (* this proecdure writes to 'thefile' the information in 'gene', properly formatted; *) begin writeln(thefile,'gene'); with gene do begin bwheader(thefile,hea); bwref(thefile,ref); end; writeln(thefile,'gene'); end; (* end module book.bwgen version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module book.bwtra *) procedure bwtra(var thefile: text; trans: trakey); (* this proecdure writes to 'thefile' the information in 'trans', properly formatted; *) begin writeln(thefile,'transcript'); with trans do begin bwheader(thefile,hea); bwref(thefile,ref); end; writeln(thefile,'transcript'); end; (* end module book.bwtra version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module book.bwmar *) procedure bwmar(var thefile: text; mark: marker); (* this proecdure writes to 'thefile' the information in 'mark', properly formatted; *) var i : integer; begin writeln(thefile,'marker'); with mark.key do begin bwheader(thefile,hea); bwref(thefile,ref); bwstate(thefile,sta); bwstartline(thefile); for i := 1 to phenotype^.length do write(thefile,phenotype^.letters[i]); writeln(thefile); end; bwdna(thefile,mark.dna); writeln(thefile,'marker'); end; (* end module book.bwmar version = 7.66; {of delmod.p 2004 Aug 4} *) (****************************************************************************) (* end module package.bwrite version = 7.66; {of delmod.p 2004 Aug 4} *) (* begin module checkcoordinates *) function checkcoordinates(piedir: direction; piebeg, pieend, coobeg, cooend: integer): integer; (* calculate the length of a piece with the input piecekey values. the function was derived from the standard piecelength and pietoint functions in delmods. *) var length: integer; (* temporary answer *) begin (* checkcoordinates *) case piedir of dirhomologous, plus: if pieend >= piebeg then length := (pieend - piebeg) + 1 else length := (pieend - coobeg) + (cooend - piebeg) + 2; dircomplement, minus: if pieend <= piebeg then length := (piebeg - pieend) + 1 else length := (cooend - pieend) + (piebeg - coobeg) + 2 end; checkcoordinates := length end; (* checkcoordinates *) (* end module checkcoordinates *) (* begin module decapitalize *) function decapitalize(c: char): char; (* convert the character c to lower case *) var n: integer; (* c is the n'th letter of the alphabet *) begin n := ord(c); if (n >= ord('A')) and (n <= ord('Z')) then c := chr( n - ord('A') + ord('a')) else c := chr(n); decapitalize := c end; (* end module decapitalize version = 5.11; (@ of prgmod.p 2005 Jan 19 *) procedure readseq(var thefile: text; var d: dnaptr); (* takes the sequence in 'thefile' and puts it into the dnastring pointed to by 'd'. the input sequence must have no characters other than a,c,g,t,u, period and blank. the period is used to separate different sequences within the file. *) var i: integer; ch: char; newd, (* new dna pointer *) lastd: dnaptr; (* pointer to previous dna *) coordinate: integer; (* coordinate of the base *) begin seqnum := succ(seqnum); getdna(d); newd := d; i := 0; coordinate := 0; ch := ' '; while (not eof(thefile) and (ch <> '.')) do begin if eoln(thefile) then begin readln(thefile); end else begin read(thefile,ch); if ((ch <> ' ') and (ch <> '.')) then begin { if eoln(thefile) then begin writeln(output,'BUBBA'); halt; end; } coordinate := succ(coordinate); i := succ(i); if (i > dnamax) then begin newd^.length := dnamax; lastd := newd; getdna(newd); lastd^.next := newd; i := 1; end; ch := decapitalize(ch); if not (ch in ['a','c','g','t','u']) then begin write(output,'nonbase (',ch,') found in sequence ', seqnum:1,' at ', coordinate:1); (* halt; *) writeln(output,': converted to ',chreplace, ' and noted in changes file'); (* write the changes *) write(changes,'@ '); write(changes,'p',seqnum:1); write(changes, ' ', coordinate:6,'.0 +1 "'); write(changes, 'change:" "was ', ch); ch := chreplace; (* smash it *) write(changes, ' is now ', ch,'"'); writeln(changes); end; if (ch = 'u') then ch := 't'; newd^.part[i] := chartobase(ch) end end; end; newd^.length := i; newd^.next := nil; end; (* readseq *) procedure okstring(var thefile: text; var aline: string); (* read in and check a line from the user. insist that it be short enough to fit into the line type *) var acceptable: boolean; (* is this line acceptable? *) begin (* okstring *) repeat readstring(thefile,aline); acceptable := (aline.length <= linelength-2); (* the -2 accounts for the '* ' at the beginning of each line *) if not acceptable then writeln(output,'line too long, please retype') until acceptable; end; (* okstring *) procedure readnotes(var thefile: text; var notes: lineptr); (* this takes the lines printed to 'thefile' and makes notes from them until a blank line is typed. *) var i: integer; aline: string; (* read from input *) newline, (* the newline of notes *) last: lineptr; (* last note line in a string of note lines *) begin getline(notes); newline := notes; writeln(output,'write the notes (<= ',(linelength-2):1, ' characters per line)', ' terminated by a blank line.'); okstring(input,aline); while (aline.length > 0) do begin newline^.length := aline.length; for i := 1 to newline^.length do newline^.letters[i] := aline.letters[i]; okstring(thefile,aline); last := newline; getline(newline); last^.next := newline; end; newline := nil; clearstring(aline); end; procedure readname(var thefile : text; var keyname: name); (* gets the key name from 'thefile' and puts it into 'keyname' *) var i: integer; aline: string; (* from the user input *) token: string; (* a name from aline *) acceptable: boolean; (* is this name acceptable? *) gotten: boolean; (* was a token gotten? *) begin (* readname *) repeat readstring(thefile,aline); if not nostring(aline) then begin gettoken(thefile,aline,token,gotten); acceptable := (token.length <= namelength); if not acceptable then writeln(output,'the name must be no longer than ', namelength:1, ' characters. please retype.') else begin keyname.length := token.length; for i := 1 to keyname.length do keyname.letters[i] := token.letters[i]; end end else begin writeln(output,'blank lines are not allowed'); acceptable := false end until acceptable; end; (* readname *) procedure readline(var thefile: text; var linep: lineptr); (* gets a line from 'thefile' and points to it with 'linep' *) var i: integer; aline: string; (* from the user input *) acceptable: boolean; (* is the line acceptable? *) begin repeat okstring(thefile,aline); if not nostring(aline) then begin linep^.length := aline.length; for i := 1 to linep^.length do linep^.letters[i] := aline.letters[i]; acceptable := true end else begin writeln(output,'blank lines not allowed'); acceptable := false end until acceptable end; procedure writepiecoordinates(coord: piekey); (* write the piece coordinates to the user for checking *) begin with coord do begin write(output,' ',mapbeg:4:2); if (coocon = circular) then write(output,' c') else write(output,' l'); if (coodir = plus) then write(output,' +') else write(output,' -'); write(output,' ',coobeg:1); write(output,' ',cooend:1); if (piecon = circular) then write(output,' c') else write(output,' l'); if (piedir = plus) then write(output,' +') else write(output,' -'); write(output,' ',piebeg:1); writeln(output,' ',pieend:1); end; end; procedure getpieceinfo; (* gets the information about the sequence from the user *) var aline: string; (* from the user input *) c: char; (* the first character on the string *) i: integer; (* the integer value in the string *) r: real; (* the real value in the string *) gotten: boolean; (* true when the desired string is gotten *) acceptable: boolean; (* true when everything checks out *) (* variables for putting numbers (seqnum) in the piece notes *) numbernote: lineptr; (* the note *) d: integer; (* index to the number in the note *) begin with pie^.key do begin writeln(output,'what is the key name of this piece?'); readname(input,hea.keynam); writeln(output,'what is the full name of this piece?'); getline(hea.fulnam); readline(input,hea.fulnam); readnotes(input,hea.note); (* insert a number into the notes *) if numberpieces then begin getline(numbernote); (* obtain the line *) with numbernote^ do begin (* insert the number symbol and a space *) letters[1] := '#'; letters[2] := ' '; length := numbersize(seqnum) - 1; (* length of the number *) for d := 1 to length do letters[d + 2] := numberdigit(seqnum, length - d); length := length + 2; (* account for the two extra symbols *) end; (* string the note into the header *) numbernote^.next := hea.note; hea.note := numbernote end; repeat acceptable := true; clearstring(aline); repeat if nostring(aline) then writeln(output,'what is the map beginning?'); getreal(input,aline,r,gotten); until gotten; mapbeg := r; repeat if nostring(aline) then writeln(output,'what is the coordinate configuration? (c/l)'); getchar(input,aline,c,gotten); until (c in ['c','l']) and gotten; if (c = 'c') then coocon := circular else coocon := linear; repeat if nostring(aline) then writeln(output,'what is the coordinate direction? (+/-)'); getchar(input,aline,c,gotten); until (c in ['+','-']) and gotten; if (c = '+') then coodir := plus else coodir := minus; repeat if nostring(aline) then writeln(output,'what is the coordinate beginning?'); getinteger(input,aline,i,gotten); until gotten; coobeg := i; repeat if nostring(aline) then writeln(output,'what is the coordinate end?'); getinteger(input,aline,i,gotten); until gotten; cooend := i; repeat if nostring(aline) then writeln(output,'what is the piece configuration? (c/l)'); getchar(input,aline,c,gotten); until (c in ['c','l']) and gotten; if (c = 'c') then piecon := circular else piecon := linear; repeat if nostring(aline) then writeln(output,'what is the piece direction? (+/-)'); getchar(input,aline,c,gotten); until (c in ['+','-']) and gotten; if (c = '+') then piedir := plus else piedir := minus; repeat if nostring(aline) then writeln(output,'what is the piece beginning?'); getinteger(input,aline,i,gotten); until gotten; piebeg := i; repeat if nostring(aline) then writeln(output,'what is the piece end?'); getinteger(input,aline,i,gotten); until gotten; pieend := i; if (length <> checkcoordinates(piedir,piebeg,pieend,coobeg,cooend)) then begin acceptable := false; writeln(output,'the number of bases (',length:1,') does not ', 'match the coordinates given:'); writepiecoordinates(pie^.key); writeln(output,' retype the coordinates.') end else begin repeat if nostring(aline) then begin write(output,'the piece coordinates are: '); writepiecoordinates(pie^.key); writeln(output,'do you wish to correct any of ', 'these coordinates?'); end; getchar(input,aline,c,gotten); until (c in ['y','n']) and gotten; if (c = 'y') then acceptable := false; end; until acceptable; end; end; procedure makeorg; (* get the information about the organism from the user and write to book *) var org: orgkey; begin with org do begin writeln(output,'what is the organism key name?'); readname(input,hea.keynam); writeln(output,'what is the organism full name?'); getline(hea.fulnam); readline(input,hea.fulnam); readnotes(input,hea.note); writeln(output,'what are the map units?'); getline(mapunit); readline(input,mapunit); end; bworg(book,org,chropen,orgopen); clearline(org.hea.fulnam); clearline(org.mapunit); while (org.hea.note <> nil) do clearline(org.hea.note); end; procedure makechr; (* get the chromosome informatin from the user and write to book *) var chr: chrkey; (* the chromosome key *) aline: string; (* from the user *) r: real; (* the values of mapbeg and mapend *) gotten: boolean; (* true when the values are gotten *) begin with chr do begin writeln(output,'what is the chromosome key name?'); readname(input,hea.keynam); writeln(output,'what is the chromosome full name?'); getline(hea.fulnam); readline(input,hea.fulnam); readnotes(input,hea.note); clearstring(aline); repeat if nostring(aline) then writeln(output,'what is the chromosome map beginning?'); getreal(input,aline,r,gotten); until gotten; mapbeg := r; repeat if nostring(aline) then writeln(output,'what is the chromosome map end?'); getreal(input,aline,r,gotten); until gotten; mapend := r; end; bwchr(book,chr,chropen); clearline(chr.hea.fulnam); while (chr.hea.note <> nil) do clearline(chr.hea.note); end; procedure writerefcoordinates(coord: reference); (* write the reference coordinates to the user for checking *) begin with coord do begin write(output,' ',mapbeg:4:2); if (refdir = plus) then write(output,' +') else write(output,' -'); write(output,' ',refbeg:1); writeln(output,' ',refend:1); end; end; procedure maketrans; (* get the transcript information and write it to 'thefile' *) var trans: trakey; aline: string; (* from the user input *) c: char; (* the first character on the string *) i: integer; (* the integer value in the string *) r: real; (* the real value in the string *) gotten: boolean; (* true when the string is gotten *) acceptable: boolean; (* true when everything checks out *) begin writeln(output,'for transcript ',index:1); with trans do begin writeln(output,'what is the transcript key name?'); readname(input,hea.keynam); writeln(output,'what is the transcript full name?'); getline(hea.fulnam); readline(input,hea.fulnam); readnotes(input,hea.note); with ref do begin pienam.length := pie^.key.hea.keynam.length; for i := 1 to pienam.length do pienam.letters[i] := pie^.key.hea.keynam.letters[i]; repeat acceptable := true; clearstring(aline); repeat if nostring(aline) then writeln(output,'what is the transcript map beginning?'); getreal(input,aline,r,gotten); until gotten; mapbeg := r; repeat if nostring(aline) then writeln(output,'what is the transcript direction? (+/-)'); getchar(input,aline,c,gotten); until (c in ['+','-']) and gotten; if (c = '+') then refdir := plus else refdir := minus; repeat if nostring(aline) then writeln(output,'what is the transcript beginning?'); getinteger(input,aline,i,gotten); until gotten; refbeg := i; repeat if nostring(aline) then writeln(output,'what is the transcript end?'); getinteger(input,aline,i,gotten); until gotten; refend := i; repeat if nostring(aline) then begin write(output,'the transcript coordinates are: '); writerefcoordinates(trans.ref); writeln(output,'do you wish to correct any of ', 'these coordinates?'); end; getchar(input,aline,c,gotten); until (c in ['y','n']) and gotten; if (c = 'y') then acceptable := false; until acceptable; end; end; bwtra(book,trans); clearline(trans.hea.fulnam); while (trans.hea.note <> nil) do clearline(trans.hea.note); end; procedure makegene; (* get the gene information and write it to 'thefile' *) var gene: genkey; aline: string; (* from the user input *) c: char; (* the first character on the string *) i: integer; (* the integer value in the string *) r: real; (* the real value in the string *) gotten: boolean; (* true when the string is gotten *) acceptable: boolean; (* true when everything checks out *) begin writeln(output,'for gene ',index:1); with gene do begin writeln(output,'what is the gene key name?'); readname(input,hea.keynam); writeln(output,'what is the gene full name?'); getline(hea.fulnam); readline(input,hea.fulnam); readnotes(input,hea.note); with ref do begin pienam.length := pie^.key.hea.keynam.length; for i := 1 to pienam.length do pienam.letters[i] := pie^.key.hea.keynam.letters[i]; repeat acceptable := true; clearstring(aline); repeat if nostring(aline) then writeln(output,'what is the gene map beginning?'); getreal(input,aline,r,gotten); until gotten; mapbeg := r; repeat if nostring(aline) then writeln(output,'what is the gene direction? (+/-)'); getchar(input,aline,c,gotten); until (c in ['+','-']) and gotten; if (c = '+') then refdir := plus else refdir := minus; repeat if nostring(aline) then writeln(output,'what is the gene beginning?'); getinteger(input,aline,i,gotten); until gotten; refbeg := i; repeat if nostring(aline) then writeln(output,'what is the gene end?'); getinteger(input,aline,i,gotten); until gotten; refend := i; repeat if nostring(aline) then begin write(output,'the gene coordinates are: '); writerefcoordinates(gene.ref); writeln(output,'do you wish to correct any of ', 'these coordinates?'); end; getchar(input,aline,c,gotten); until (c in ['y','n']) and gotten; if (c = 'y') then acceptable := false; until acceptable; end; end; bwgen(book,gene); clearline(gene.hea.fulnam); while (gene.hea.note <> nil) do clearline(gene.hea.note); end; procedure makemark; (* get the marker information and write it to 'thefile' *) (* the dna must be typed without a carriage return and not be longer than dnamax *) var mark: marker; token, (* for the marker state *) aline: string; (* from the user input *) c: char; (* the first character on the string *) gotten: boolean; (* true when the string is gotten *) acceptable: boolean; (* true when everything checks out *) j,k : integer; (* indicies for reading marker dna *) begin writeln(output,'for marker ',index:1); with mark.key do begin writeln(output,'what is the marker key name?'); readname(input,hea.keynam); writeln(output,'what is the marker full name?'); getline(hea.fulnam); readline(input,hea.fulnam); readnotes(input,hea.note); with ref do begin pienam.length := pie^.key.hea.keynam.length; for j := 1 to pienam.length do pienam.letters[j] := pie^.key.hea.keynam.letters[j]; repeat acceptable := true; clearstring(aline); repeat if nostring(aline) then writeln(output,'what is the marker map beginning?'); getreal(input,aline,mapbeg,gotten); until gotten; repeat if nostring(aline) then writeln(output,'what is the marker direction? (+/-)'); getchar(input,aline,c,gotten); until (c in ['+','-']) and gotten; if (c = '+') then refdir := plus else refdir := minus; repeat if nostring(aline) then writeln(output,'what is the marker beginning?'); getinteger(input,aline,refbeg,gotten); until gotten; repeat if nostring(aline) then writeln(output,'what is the marker end?'); getinteger(input,aline,refend,gotten); until gotten; repeat if nostring(aline) then begin write(output,'the marker coordinates are: '); writerefcoordinates(mark.key.ref); writeln(output,'do you wish to correct any of ', 'these coordinates?'); end; getchar(input,aline,c,gotten); until (c in ['y','n']) and gotten; if (c = 'y') then acceptable := false; until acceptable; end; repeat if nostring(aline) then writeln(output,'what is the state of the marker? (on/off)'); gettoken(input,aline,token,gotten); until (token.letters[1] = 'o') and (token.letters[2] in ['n','f']) and gotten; if (token.letters[2] = 'n') then sta := on else sta := off; writeln(output,'what is the marker phenotype?'); getline(phenotype); readline(input,phenotype); end; writeln(output,'write the marker dna (<= 80 bases per line)', ' terminated by a blank line'); getdna(mark.dna); readstring(input,aline); j := 0; while (aline.length > 0) do begin for k := 1 to aline.length do begin ch := aline.letters[k]; if (ch <> ' ') then begin if (ch in ['a','c','g','t']) then begin j := succ(j); mark.dna^.part[j] := chartobase(ch) end else begin writeln(output,'nonbase (',ch,') found in marker sequence, ', 'rewrite entire marker dna'); j := 0; end; end; end; readstring(input,aline); end; mark.dna^.length := j; bwmar(book,mark); clearline(mark.key.hea.fulnam); clearline(mark.key.phenotype); while (mark.key.hea.note <> nil) do clearline(mark.key.hea.note); cleardna(mark.dna); end; (* These next two procedures, autopiece and autoorgchr, use default values for all the necessary information so the user has to specify nothing *) procedure autopiece; (* name the piece by its number and make it linear, + and from 1 to length *) var l, (* the length of a number *) d: integer; (* an index *) begin { writeln(output,'AUTOPIECE ----------------'); } if not eof(input) then readln(input); (* we were on the end of the previous line *) l := numbersize(seqnum); with pie^.key do begin if eof(input) then begin write(output, ' AUTOMATIC PIECE NAME BEING CREATED: '); writeln(output, 'piece-',seqnum:1); with hea.keynam do begin (* insert a number into the keyname *) letters[1] := 'p'; letters[2] := 'i'; letters[3] := 'e'; letters[4] := 'c'; letters[5] := 'e'; letters[6] := '-'; for d := 1 to l do letters[d + 6] := numberdigit(seqnum,l - d); length := l + 6; { 2005 Feb 7: TDS: retire the old method of p1, p2 ... renaming these was a pain. letters[1] := 'p'; for d := 1 to l do letters[d + 1] := numberdigit(seqnum, l - d); length := l + 1; (* account for the one extra symbol *) } {zzz} end; getline(hea.fulnam); with hea.fulnam^ do begin (* insert a number into the full name *) letters[1] := 'p'; letters[2] := 'i'; letters[3] := 'e'; letters[4] := 'c'; letters[5] := 'e'; letters[6] := '-'; for d := 1 to l do letters[d + 6] := numberdigit(seqnum,l - d); length := l + 6; next := nil; end; end else begin if not eof(input) then begin write(output, ' PIECE KEY NAME: '); with hea.keynam do begin (* insert a keyname *) length := 0; while (input^ <> ' ') and not eoln(input) do begin length := succ(length); read(input, letters[length]); write(output, letters[length]); end; writeln(output); if not eoln(input) then if input^ = ' ' then get(input); end; write(output, ' PIECE LONG NAME: '); if not eof(input) then begin getline(hea.fulnam); with hea.fulnam^ do begin (* insert a number into the full name *) length := 0; while not eoln(input) do begin length := succ(length); read(input, letters[length]); write(output, letters[length]); end; writeln(output); next := nil; end; end; end; end; getline(hea.note); with hea.note^ do begin (* insert a number into the notes *) letters[1] := '#'; letters[2] := ' '; for d := 1 to l do letters[d + 2] := numberdigit(seqnum,l - d); length := l + 2; next := nil; end; (* fill in the piece information *) mapbeg := 0.0; coocon := linear; coodir := plus; coobeg := 1-zerobase; cooend := length-zerobase; piecon := linear; piedir := plus; piebeg := 1-zerobase; pieend := length-zerobase; end; end; procedure autoorgchr; (* make up names for the organism and chromosome and write them to the book *) var org : orgkey; (* the organism key *) chr : chrkey; (* the chromosome key *) begin with org do begin with hea.keynam do begin letters[1] := 'o'; letters[2] := 'r'; letters[3] := 'g'; length := 3; end; getline(hea.fulnam); with hea.fulnam^ do begin letters[1] := 'o'; letters[2] := 'r'; letters[3] := 'g'; length := 3; next := nil; end; hea.note := nil; getline(mapunit); with mapunit^ do begin letters[1] := 'n'; letters[2] := 'o'; letters[3] := 'n'; letters[4] := 'e'; length := 4; next := nil; end; end; bworg(book,org,chropen,orgopen); clearline(org.hea.fulnam); clearline(org.mapunit); with chr do begin with hea.keynam do begin letters[1] := 'c'; letters[2] := 'h'; letters[3] := 'r'; length := 3; end; getline(hea.fulnam); with hea.fulnam^ do begin letters[1] := 'c'; letters[2] := 'h'; letters[3] := 'r'; length := 3; next := nil; end; hea.note := nil; mapbeg := 0.0; mapend := 1.0; end; bwchr(book,chr,chropen); clearline(chr.hea.fulnam); end; begin (* makebk *) writeln(output,' makebk ',version:4:2); (* required initializations *) reset(sequ); if eof(sequ) then begin writeln(output,' no sequence file provided'); halt end; rewrite(book); new(pie); new(pie^.key.hea.fulnam); new(pie^.key.hea.note); pie^.key.hea.note^.next := nil; freeline := nil; freedna := nil; orgopen := false; chropen := false; autobook := false; rewrite(changes); writeln(changes, '* makebk ', version:4:2 ); writeln(changes, 'define "change:" "-" "^" "^" 0'); repeat if nostring(aline) then begin writeln(output,'Is this an insertion module (m), ', 'an independent book (b), '); writeln(output,'or an automatically generated book (a, z), ', 'using default values?'); writeln(output,'(TYPE CARRIAGE RETURN AND THEN YOUR RESPONSE)'); end; getchar(input,aline,ch,gotten); until (ch in ['m','b','a','z']) and gotten; (*writeln(output,'the character ch is "',ch,'"'); writeln(output,'ch=b is ',(ch='b')); *) if ((ch = 'b') or (ch = 'a') or (ch = 'z')) then begin truebook := true; if (ch = 'a') then autobook := true; if (ch = 'z') then begin autobook := true; repeat if nostring(aline) then writeln(output,'give position', ' that is to become zero coordinate:'); getinteger(input,aline,zerobase,gotten); until gotten; end else zerobase := 0; getdatetime(adatetime); bwstartline(book); writedatetime(book,adatetime); write(book,', '); writedatetime(book,adatetime); write(book,', '); writeln(output,'type a title if you want'); readstring(input,aline); for i := 1 to aline.length do write(book,aline.letters[i]); writeln(book) end else truebook := false; if autobook then ch := 'y' else repeat if nostring(aline) then writeln(output,'do you want the pieces numbered (y/n)?'); getchar(input,aline,ch,gotten) until (ch in ['y','n']) and gotten; numberpieces := (ch = 'y'); { if numberpieces then writeln(output,'BUBBA numberpieces is true') else writeln(output,'BUBBA numberpieces is false'); } if autobook then autoorgchr; seqnum := 0; readseq(sequ,pie^.dna); while pie^.dna^.length > 0 do begin length := 0; d := pie^.dna; repeat length := length + d^.length; d := d^.next; until (d = nil); writeln(output,'sequence ',seqnum:1,' is ',length:1, ' nucleotides long'); clearstring(aline); ch := ' '; if not autobook then begin if truebook and not orgopen then makeorg else begin repeat if nostring(aline) then writeln(output,'is this a new organism?'); getchar(input,aline,ch,gotten); until (ch in ['y','n']) and gotten; if (ch = 'y') then makeorg; end; if orgopen and not chropen then makechr else begin repeat if nostring(aline) then writeln(output,'is this a new chromosome?'); getchar(input,aline,ch,gotten); until (ch in ['y','n']) and gotten; if (ch = 'y') then makechr; end; end; if autobook then autopiece else begin (* regive the piece length *) writeln(output,'this sequence is ',length:1, ' nucleotides long'); getpieceinfo; clearstring(aline); repeat if nostring(aline) then writeln(output,'how many transcripts are there?'); getinteger(input,aline,i,gotten); until gotten; transnum := i; for index := 1 to transnum do maketrans; repeat if nostring(aline) then writeln(output,'how many genes are there?'); getinteger(input,aline,i,gotten); until gotten; genenum := i; for index := 1 to genenum do makegene; repeat if nostring(aline) then writeln(output,'how many markers are there?'); getinteger(input,aline,i,gotten); until gotten; marknum := i; for index := 1 to marknum do makemark; end; bwpie(book,pie); (* clear the lines for reuse *) clearline(pie^.key.hea.fulnam); while(pie^.key.hea.note <> nil) do clearline(pie^.key.hea.note); cleardna(pie^.dna); (* clear the dna for reuse *) readseq(sequ,pie^.dna); end; if chropen then writeln(book,'chromosome'); if orgopen then writeln(book,'organism'); 1: end. (* makebk *)