program dbpull ( fin, fout, dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8, ecat, gcat, output ); (* dbpull: database access program by matthew yarus module libraries required: delman, delmods *) label 1; (* used to halt program *) const (* begin module version *) version = 2.49; (* of dbpull.p 1998 July 16 1998 July 16: upgrade to datetimearray, upgrade to use accession instead of locus name. origin before 1983 july 14 *) (* end module version *) (* begin module describe.dbpull *) (* name dbpull: database extraction program. synopsis dbpull (fin: in, fout: out, dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8: in, ecat: in, gcat: in, output: out) files fin: User requests for extractions from libraries. Each request takes up a single line and consists of a genetic sequence identi- fication code followed by either a single special extraction code or a series of line code requests. If an entry request is to be found in embl format the request line must have a line containing simply 'embl' somewhere above it. A line containing only 'genb' will instruct dbpull to look only for genbank format entries on the following request lines. Important note: the exact form of fin instructions is found in delman.use.dbpull.instructions. If no request is given, then ALL is assumed. This means that the program will now run using a raw list of entry names. fout: contains fulfilled requests in the same entry order as fin. this file may serve, itself, as a database library for dbpull as long as 'id ' or 'loc' occur with every request.(one of these two line codes identifies the beginning of each entry and holds its id) dbl1-dbl8: same files, in the same order, as dbcat. see delman. describe.dbcat. ecat: same as in dbcat also. gcat: same as in dbcat also. output: messages to the user. description this program uses catalogs generated by the dbcat program to quickly extract all or part of embl or genb type entries from data base lib- raries. the user may choose one of two special requests('all', which pulls out an entire entry or 'raw', which pulls out only the genetic sequence) or s-he may simply request a number of line codes. the wild- card character '*' represents any number of unspecified characters in an id request. this allows one fin line to extract several entries whose ids have characters in common. the id 'every' extracts all ids it is compared to. dbpull also checks the production dates of all the catalogs and libraries to see that they are consistent. documentation dbhelp, delman.describe.dbcat, embl and genbank libaries. see also dbcat.p author matthew yarus bugs technical notes 1:dbpull functions on genbank(tm) release 9 (june 1, 1983). 2: if the value of the constant checknum is increased, dbpull will do a more complete check of its catalogs. *) (* end module describe.dbpull *) (* begin module dbpull.constant *) (* LOCK begin module datetime.const *) (* THIS IS LOCKED TO 20 TO KEEP IT THE SAME AS idlength! *) datetimearraylength = 20; (* length of dataarray for dates, It is just long enough to include the 4 digit year - solving the year 2000 problem: 1980/06/09 18:49:11 123456789 123456789 1 2 *) (* LOCK end module datetime.const version = 'delmod 6.88 98 Jul 10 tds/gds' *) (* more constants *) idlength = 20; (* length of identification code of a library entry *) namelength = idlength; (* length of computer system date *) lclength = 3; (* length of code at beginning of each library line *) libtotal = 8; (* number of libraries used in program *) lctotal = 20; (* maximum number of line code requests *) checknum = 1; (* controls the number of times certain procedures run *) (* the following constants are used for string comparisons by the functions lcequal and idequal. if your computer system does not do these packed array comparisons, the functions must be re- written and the constants replaced by a series of assignment statements to declared arrays. *) spraw = 'RAW'; (* special fin request that pulls only sequence *) spall = 'ALL'; (* special request that pulls whole entry *) lcloc = 'LOC'; (* short for locus, line code(lc) for genb entry beginning point. this line contains id of entry *) lcid = 'ID '; (* begining point of embl entry, also holds id *) lc3spc = ' '; (* three space line code always identifies sequence lines of an embl type entry *) lcxx = 'XX '; (* marks the end of a string of one or more embl lines having the same line code *) lct = ' T'; (* first letter of title, part of reference section of genb entry. if you pull 'ref', you pull ' t' *) lca = ' A'; (* first letter of author, part of reference section of a genb entry. if you pull 'ref', you pull ' a' *) lcj = ' J'; (* first letter of journal, part of ref also *) lcori = 'ORI'; (* short for origin, code for line just above sequence in genbank entries *) lcsit = 'SIT'; (* short for sites, line just below sequence in genb *) lcterm = '// '; (* terminus code at the end of every entry *) lcdat = 'DAT'; (* short for date, used for dateline of libraries *) idembl = 'EMBL '; (* fin line indicating that following lines contain embl requests *) idgenb = 'GENB '; (* following lines are for genb requests *) idgenbank = 'GENBANK '; (* ' ' *) iddate = 'DATE '; (* catalogs of undated libraries use this as the date *) idevery = 'EVERY '; (* special id that matches any id it is compared to *) (* end module dbpull.constant *) type (* begin module datetime.type *) (* array for dates *) datetimearray = packed array[1..datetimearraylength] of char; (* end module datetime.type version = 7.40; {of delmod.p 2000 Feb 18} *) (* begin module dbpull.type *) (* a 'u' after the 'lc' or 'id' in a type name indicates that the array is unpacked; a 'p' indicates that the array is packed; an 's' indicates that the array has more than one dimension *) idutype = array[1..idlength] of char; (* holds id for reading-writing before packing *) idptype = packed array[1..idlength] of char; (* holds id for string comparisons *) alpha = idptype; (* holds computer system date *) lnrange = 0..libtotal; (* subrange for handling number of libraries *) catrec = record (* structure of each catalog entry *) idp: idptype; (* holds id of entry *) libnum: lnrange; (* identifies in which of 8 libraries entry is found *) linenumber: integer; (* holds the line in a library where an entry begins *) end; dbcat = file of catrec; (* structure of whole catalog *) lcutype = array[1..lclength] of char; (* holds library line code for reading-writing before packing *) lcptype = packed array[1..lclength] of char; (* holds line code for string comparisons *) lcstype = array[1..lctotal,1..lclength] of char; (* holds fin line code requests *) countype = 0..lclength; (* subrange for handling line codes *) libsused = ( embl, genb ); (* used to indicate whether requested entry is of embl or genbank type *) wctype = ( notwc, xwc, wcx, xwcx ); (* used to indicate whether or not an id request is wildcard and if wc whether the set characters are in front, in back, or in the middle of the variable characters. the 'x' represents the variable part of the id *) (* end module dbpull.type *) var (* begin module dbpull.var *) cat: dbcat; (* any catalog *) lib: text; (* any library *) gcat, ecat: dbcat; (* embl and genbank catalogs from dbcat program *) dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8: text; (* ordinarily, the first three libraries are embl and the rest are genbank. however, program can handle any case of one to eight libraries containing both types of entries *) fin, fout: text; (* requests for extraction from libraries and output file with fulfilled requests *) (* end module dbpull.var *) (* begin module package.primitive *) (* ************************************************************************ *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 7.40; {of delmod.p 2000 Feb 18} *) (* begin module copyaline *) procedure copyaline(var fin, fout: text); (* copy a line from file fin to file fout *) begin (* copyaline *) while not eoln(fin) do begin fout^ := fin^; put(fout); get(fin) end; readln(fin); writeln(fout); end; (* copyaline *) (* end module copyaline version = 7.40; {of delmod.p 2000 Feb 18} *) (* begin module copylines *) function copylines(var fin, fout: text; n: integer): integer; (* copy n lines of file fin to file fout. the actual number of lines copied is returned. *) var index: integer; (* the current line number *) begin (* copylines *) index := 0; while (not eof(fin)) and (index < n) do begin copyaline(fin, fout); index := succ(index) end; copylines := index end; (* copylines *) (* end module copylines version = 7.40; {of delmod.p 2000 Feb 18} *) (* ************************************************************************ *) (* end module package.primitive version = 7.40; {of delmod.p 2000 Feb 18} *) (* begin module dbpull.readlcu *) procedure readlcu ( var lib: text; var liblcu: lcutype; var liblcp: lcptype ); (* grabs a library line code and then packs it for string comparisons *) var index: countype; (* for loop counter *) begin (* readlcu *) for index := 1 to lclength do if eoln ( lib ) then liblcu[index] := ' ' else read ( lib, liblcu[index] ); pack ( liblcu, 1, liblcp ) end; (* readlcu *) (* end module dbpull.readlcu *) (* begin module dbpull.lcequal *) function lcequal ( lcp1, lcp2: lcptype ): boolean; (* tests two line codes for equality. if your computer system version of pascal does not do string comparisons simply change lcequal and the program will be fixed throughout *) begin (* lcequal *) lcequal := false; if lcp1 = lcp2 then lcequal := true end; (* lcequal *) (* end module dbpull.lcequal *) (* begin module dbpull.idequal *) function idequal ( idp1, idp2: idptype ): boolean; (* tests two ids for equality. if your computer system version of pascal does not do string comparisons simply change idequal and the program will be fixed throughout *) begin (* idequal *) idequal := false; if idp1 = idp2 then idequal := true end; (* idequal *) (* end module dbpull.idequal *) (* begin module dbpull.datesequal *) function datesequal ( idp1, idp2: datetimearray): boolean; (* tests two dates for equality. if your computer system version of pascal does not do string comparisons simply change idequal and the program will be fixed throughout. *) begin (* idequal *) datesequal := false; if idp1 = idp2 then datesequal := true end; (* idequal *) (* end module dbpull.datesequal *) (* begin module dbpull.getid *) procedure getid ( var fin: text; (* see global *) var finidp: idptype ); (* holds requested library id *) (* finds the next string of non-space characters following the file cursor, adding spaces at the end if the string is too short, and then packs the string into the finidp array *) var index: integer; (* counter for loop *) finidu: idutype; (* holds id for reading in before packing *) begin (* getid *) while fin^ = ' ' do (* advances to first id character *) get ( fin ); index := 0; (* loop initialization value *) (* the following loop grabs all id characters *) while ( fin^ <> ' ' ) and ( index < idlength ) do begin index := succ ( index ); if eoln ( fin ) then finidu[index] := ' ' else read ( fin, finidu[index] ) end; (* the following loop fills out id if id is too short *) while index < idlength do begin index := succ ( index ); finidu[index] := ' ' end; pack ( finidu, 1, finidp ) end; (* getid *) (* end module dbpull.getid *) (* begin module readdatetime *) procedure readdatetime (var thefile: text; var adatetime: datetimearray); (* read the date and time from the file *) var index: integer; (* to the udatetime *) (* the following is an unpacked date time array, to avoid reading into a packed array. reading into a packed array is not transportable *) udatetime: array[1..datetimearraylength] of char; begin for index:=1 to datetimearraylength do read(thefile,udatetime[index]); pack(udatetime, 1, adatetime); if (adatetime[3]='/') and (adatetime[12]=':') then begin writeln(output,' old datetime (only 2 year digits) read: ', adatetime:datetimearraylength); end; end; (* end module readdatetime version = 7.40; {of delmod.p 2000 Feb 18} *) (* begin module dbpull.findaline *) procedure findaline ( var lib: text; (* see global *) line: integer; (* the line requested *) var current: integer ); (* the line where the lib cursor currently lies *) (* get to a line in lib from current line. the variable current = line after findaline is done *) begin (* findaline *) if current >= line then begin (* the line is above where we are or we are in the middle of a line we want to get to the beginning of *) reset ( lib ); current := 1 end; while ( current < line ) and ( not eof ( lib ) ) do begin current := succ ( current ); readln ( lib ) end; if eof ( lib ) then begin writeln ( output, ' DBCAT REFERS TO A LINE (', line:1, ') THAT IS PAST THE END OF A LIBRARY.' ); writeln ( output, ' ARE ALL LIBRARIES PRESENT ?' ); halt end end; (* findaline *) (* end module dbpull.findaline *) (* begin module dbpull.entrycheck *) procedure entrycheck ( var dbl1,dbl2,dbl3,dbl4,dbl5,dbl6,dbl7,dbl8: text; var cat1, cat2: dbcat; (* libs of data bases, cats produced by dbcat *) var c1, c2, c3, c4, c5, c6, c7, c8: integer ); (* 'c' stands for current library line *) (* checks libaries by looking for a few entries referred to in the catalogs *) var index: integer; (* loop counter *) procedure checklc ( var lib: text ); (* data base library *) (* checks to see if a catalog referred entry starts with a proper line code *) var liblcu: lcutype; (* holds line code for reading-writing before packing *) liblcp: lcptype; (* holds a line code for string comparisons *) begin (* checklc *) readlcu ( lib, liblcu, liblcp ); {zzz This check can't be done anymore because the ACCESSION is used and not the locus name! if not ( lcequal ( liblcp, lcid ) or lcequal ( liblcp, lcloc )) then begin writeln ( output, ' DATA BASE LIBARIES ARE EITHER IN THE WRONG' ); writeln ( output, ' ORDER OR THEY ARE NOT EVEN EMBL OR GENBANK' ); writeln ( output, ' FORMAT AT ALL' ); halt end; } end; (* checklc *) procedure getentry ( var dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8: text; var cat: dbcat; (* catalog produced by dbcat *) var c1, c2, c3, c4, c5, c6, c7, c8: integer ); (* 'c' stands for current library line *) (* gets to an entry referred to in a catalog and then calls checklc *) begin (* getentry *) case cat^.libnum of 1: begin findaline ( dbl1, cat^.linenumber, c1 ); checklc ( dbl1 ) end; 2: begin findaline ( dbl2, cat^.linenumber, c2 ); checklc ( dbl2 ) end; 3: begin findaline ( dbl3, cat^.linenumber, c3 ); checklc ( dbl3 ) end; 4: begin findaline ( dbl4, cat^.linenumber, c4 ); checklc ( dbl4 ) end; 5: begin findaline ( dbl5, cat^.linenumber, c5 ); checklc ( dbl5 ) end; 6: begin findaline ( dbl6, cat^.linenumber, c6 ); checklc ( dbl6 ) end; 7: begin findaline ( dbl7, cat^.linenumber, c7 ); checklc ( dbl7 ) end; 8: begin findaline ( dbl8, cat^.linenumber, c8 ); checklc ( dbl8 ) end; end; end; (* getentry *) begin (* entrycheck *) reset ( cat1 ); reset ( cat2 ); c1 := 1; c2 := 1; c3 := 1; c4 := 1; c5 := 1; c6 := 1; c7 := 1; c8 := 1; for index := 1 to checknum do begin if not eof ( cat1 ) then begin get ( cat1 ); if not eof(cat1) then getentry ( dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8, cat1, c1, c2, c3, c4, c5, c6, c7, c8 ); end; if not eof ( cat2 ) then begin get ( cat2 ); if not eof(cat2) then getentry ( dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8, cat2, c1, c2, c3, c4, c5, c6, c7, c8 ); end end; reset ( cat1 ); reset ( cat2 ); reset ( dbl1 ); reset ( dbl2 ); reset ( dbl3 ); reset ( dbl4 ); reset ( dbl5 ); reset ( dbl6 ); reset ( dbl7 ); reset ( dbl8 ); c1 := 1; c2 := 1; c3 := 1; c4 := 1; c5 := 1; c6 := 1; c7 := 1; c8 := 1; (* all libraries start at line one *) end; (* entrycheck *) (* end module dbpull.entrycheck *) (* begin module dbpull.datecheck *) procedure datecheck ( var dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8: text; var cat1, cat2: dbcat ); (* see global for all *) (* checks to see that there are the proper number of libraries and catalogs and that all files have the same date *) var index: integer; (* loop counter *) liblcu: lcutype; (* holds the line code of the first line of a lib *) liblcp: lcptype; (* same as liblcu but used for string comparison to string constant lcdat *) adatetime: datetimearray; (* holds the date of a catalog for comparison to the dates of other cats and libs *) libpresent: boolean; (* 'true' if at least one lib is not empty *) libmatch: boolean; (* 'true' indicates that two dates are equal *) libcount: lnrange; (* holds number of non-empty libraries *) procedure libcheck ( var lib: text; (* global *) var libpresent: boolean; var libmatch: boolean; var libcount: lnrange; (* counts non-empty libs *) adatetime: datetimearray ); (* counts libaries and checks dates for match to catalog dates *) var index: integer; (* loop counter *) dumpchar: char; (* holds unwanted character *) libidp: datetimearray; (* the id, in this case, is the date of a lib *) begin (* libcheck *) if not eof ( lib ) then begin libpresent := true; libcount := libcount + 1; readlcu ( lib, liblcu, liblcp ); if not lcequal ( liblcp, lcdat ) then begin (* no date line in the library *) libidp[1] := 'D'; libidp[2] := 'A'; libidp[3] := 'T'; libidp[4] := 'E'; (* 'date' with 16 spaces after it will match the catalog null date code *) for index := 5 to idlength do libidp[index] := ' '; end else begin read ( lib, dumpchar ); (* dumps the 'e' in 'date' *) repeat get ( lib ) until ( lib^ <> ' ' ); readdatetime ( lib, libidp ) end; if libmatch then libmatch := datesequal ( libidp, adatetime ) end; end; (* libcheck *) begin (* datecheck *) reset ( cat1 ); reset ( cat2 ); reset ( dbl1 ); reset ( dbl2 ); reset ( dbl3 ); reset ( dbl4 ); reset ( dbl5 ); reset ( dbl6 ); reset ( dbl7 ); reset ( dbl8 ); if eof ( cat1 ) or eof ( cat2 ) then begin writeln ( output, ' A DATABASE CATALOG IS MISSING' ); halt end else begin for index := 1 to idlength do adatetime[index] := cat1^.idp[index]; if not datesequal ( adatetime, cat2^.idp ) then begin writeln ( output, ' DATABASE CATALOGS DO NOT MATCH' ); halt end end; libpresent := false; libmatch := true; libcount := 0; libcheck ( dbl1, libpresent, libmatch, libcount, adatetime ); libcheck ( dbl2, libpresent, libmatch, libcount, adatetime ); libcheck ( dbl3, libpresent, libmatch, libcount, adatetime ); libcheck ( dbl4, libpresent, libmatch, libcount, adatetime ); libcheck ( dbl5, libpresent, libmatch, libcount, adatetime ); libcheck ( dbl6, libpresent, libmatch, libcount, adatetime ); libcheck ( dbl7, libpresent, libmatch, libcount, adatetime ); libcheck ( dbl8, libpresent, libmatch, libcount, adatetime ); if not libpresent then begin writeln ( output, ' NO DATABASE LIBRARIES ARE PRESENT' ); halt end else if not libmatch then begin writeln ( output, ' DATABASE LIBRARIES DO NOT MATCH CATALOGS' ); halt end; if ( libcount <> cat1^.libnum ) or ( libcount <> cat2^.libnum ) then begin writeln ( output, ' YOU HAVE A DIFFERENT NUMBER OF LIBRARIES' ); writeln ( output, ' THEN YOUR CATALOGS REFER TO.' ); halt end end; (* datecheck *) (* end module dbpull.datecheck *) (* begin module dbpull.makearray *) procedure makearray ( var fin: text; (* see global *) var finlcs: lcstype; (* holds all requested library line codes for a given id *) var lcnum: integer ); (* holds number of line codes requested *) (* builds a two dimensional array(finlcs) containing all requested library line codes for a given id request *) var index: integer; (* counter for repeat loop *) begin (* makearray *) lcnum := 0; (* no codes counted yet *) repeat (* makes a slot for each 3-letter code *) lcnum := lcnum + 1; while ( fin^ = ' ' ) and ( not eoln ( fin )) do get ( fin ); index := 0; (* loop initialization value *) repeat (* stores line code *) index := index + 1; if ( eoln ( fin )) or ( fin^ = ' ' ) then finlcs[lcnum, index] := ' ' else read ( fin, finlcs[lcnum, index] ) until index = lclength; while ( not eoln ( fin )) and ( fin^ <> ' ' ) do get ( fin ); (* only first few characters of line request used *) while ( fin^ = ' ' ) and ( not eoln ( fin )) do get ( fin ) (* gets rid of extra spaces on end of fin line *) until eoln ( fin ) or ( lcnum = lctotal ); readln ( fin ) end; (* makearray *) (* end module dbpull.makearray *) (* begin module dbpull.wccheck *) procedure wccheck ( finidp: idptype; (* holds a requested id *) var finidnum: integer; (* holds number of characters in finidp *) var wildcard: wctype ); (* holds one of four id request types *) (* decides how many characters are in each finidp and to which of the four wildcard types the id belongs *) var index: integer; (* counter for loop *) done: boolean; (* 'true' indicates that finidp is full of chars *) begin (* wccheck *) index := 1; (* loop initialization *) done := false; (* ' ' *) if finidp[index] = '*' then begin index := succ ( index ); while ( finidp[index] <> ' ' ) and ( not done ) do begin if finidp[index] = '*' then wildcard := xwcx else wildcard := xwc; if index < idlength then index := succ ( index ) else done := true end; end else begin index := succ ( index ); while ( finidp[index] <> ' ' ) and ( not done ) do begin if finidp[index] = '*' then wildcard := wcx else wildcard := notwc; if index < idlength then index := succ ( index ) else done := true end; end; if index = idlength then finidnum := index else finidnum := index - 1; (* the index is one ahead because it has to be initialized at 1 instead of 0(the array definition is 1..idlength) *) end; (* wccheck *) (* end module dbpull.wccheck *) (* begin module dbpull.idhandle *) procedure idhandle ( var cat: dbcat; (* see global *) var unequal: boolean; (* 'true' indicates that finidp <> catalog id *) wildcard: wctype; (* holds one of four id request types(three are wc) *) finidp: datetimearray; (* holds fin id request code *) finidnum: integer ); (* number of chars in finidp *) (* decides whether finidp and cat^.idp are equal *) var index, (* loop counter *) catidnum,(* holds number of non-space characters in catalog id *) placef, (* index for the position of the finidp array that is being used in comparisons *) startf, (* starting point for placef *) placec, (* same function as placef, but used on cat^idp *) startc, (* starting point for placec *) sclimit: integer; (* holds upper limit of startc *) begin (* idhandle *) catidnum := 0; for index := 1 to idlength do if cat^.idp[index] <> ' ' then catidnum := succ ( catidnum ); if finidnum > catidnum then unequal := true else begin unequal := false; case wildcard of notwc: unequal := not ( idequal ( cat^.idp, finidp ) or idequal ( finidp, idevery )); (* 'every' request matches all ids *) xwc: begin placef := 2; (* skips first '*' *) placec := catidnum - ( finidnum - 2 ); (* previous assignment lines up the right hand ends of the two compared ids *) while ( placef <= finidnum ) and ( not unequal ) do begin if finidp[placef] <> cat^.idp[placec] then unequal := true; placef := succ ( placef ); placec := succ ( placec ) end end; wcx: begin placef := 1; placec := 1; while ( not unequal ) and ( placef < finidnum ) do begin if finidp[placef] <> cat^.idp[placec] then unequal := true; placef := succ ( placef ); placec := succ ( placec ) end end; xwcx: begin startf := 2; (* skips the first '*' *) startc := 1; sclimit := catidnum - ( finidnum - 2 ); (* previous assignment stops the matching procedure at the point where the right hand ends are lined up *) unequal := true; (* the first while loop advances the section of the cat^.idp being checked while the second loop does the actual check *) while ( startc <= sclimit ) and unequal do begin placef := startf; placec := startc; unequal := false; while ( not unequal ) and ( placef < finidnum ) do begin if finidp[placef] <> cat^.idp[placec] then unequal := true; placef := succ ( placef ); placec := succ ( placec ) end; startc := succ ( startc ) end; end; end; end; end; (* idhandle *) (* end module dbpull.idhandle *) (* begin module dbpull.alphacheck *) procedure alphacheck ( var cat: dbcat; (* see global *) wildcard: wctype; (* holds one of four id request types (three are wc) *) finidp: idptype ); (* holds a requested library id *) (* checks id to see whether or not it will be found nearer to beginning of catalog than current cursor position. if so, catalog is reset. *) var index: integer; (* counter for repeat loop *) closertoa: boolean; (* 'true' indicates current idp is nearer to beginning of catalog than cursor *) begin (* alphacheck *) if eof ( cat ) then begin reset ( cat ); get ( cat ) (* skips date *) end else begin index := 0; (* loop initialization value *) closertoa := false; (* '' *) repeat index := index + 1; if ( finidp[index] < cat^.idp[index] ) or ( wildcard <> notwc ) or idequal ( finidp, idevery ) (* all wilcard ids reset catalog because there is an indeterminable possibility of a match with a previous id *) then begin closertoa := true; reset ( cat ); get ( cat ) (* skips date *) end; until closertoa or ( index = idlength ); end; end; (* alphacheck *) (* end module dbpull.alphacheck *) (* begin module dbpull.copylib *) procedure copylib ( var lib: text; (* see global *) var c: integer; (* the current line of library *) libtitle: libsused; (* indicates whether reqested entry is of embl or genbank type *) lcnum: integer; (* holds the number of line codes requested for each id *) finlcs: lcstype ); (* holds line codes *) (* copies lines from an entry which correspond to requested line codes *) var index: integer; (* loop counter *) index2: integer; (* '' *) dumpint: integer; (* a place for unwanted integers *) lccount: integer; (* counts the number of line code requests that have been satisfied *) lcsfound: array[1..lctotal] of char; (* indicates which line codes from a request line occur in an entry *) nextlc: boolean; (* 'true' indicates that all lines pertaining to a particular line code have been copied *) finlcp: lcptype; (* holds a requested line code *) liblcp: lcptype; (* holds line code of line being compared to reqested code *) liblcu: lcutype; (* holds lib line code for reading-writing before packing *) liblcp2: lcptype; (* holds linecode for comparison to previous code *) liblcu2: lcutype; (* holds second line code, like liblcp2, but used reading in *) begin (* copylib *) for index := 1 to lclength do finlcp[index] := finlcs[1,index]; if lcequal ( finlcp, spall ) (* first special request code *) then begin repeat readlcu ( lib, liblcu, liblcp ); for index := 1 to lclength do write ( fout, liblcu[index] ); copyaline ( lib, fout ); c := succ ( c ) until lcequal ( liblcp, lcterm ); end else if lcequal ( finlcp, spraw ) then (* second special code *) case libtitle of embl: repeat readlcu ( lib, liblcu, liblcp ); if lcequal ( liblcp, lc3spc ) then copyaline ( lib, fout ) else readln ( lib ); c := succ ( c ); writeln ( fout, '.' ) until lcequal ( liblcp, lcterm ); genb: begin repeat (* finds line, marked 'origin', which is right above sequence *) readlcu ( lib, liblcu, liblcp ); readln ( lib ); c := succ ( c ) until lcequal ( liblcp, lcori ); (* following loop copies lib until a line, marked 'sites', that is found right after sequence *) while ( not lcequal ( liblcp, lcsit )) and ( not lcequal ( liblcp, lcterm )) do begin (* above line required because sometimes there is no sites section after sequence *) read ( lib, dumpint ); (* coordinate value unneeded *) copyaline ( lib, fout ); c := succ ( c ); readlcu ( lib, liblcu, liblcp ) end; writeln ( fout, '.' ) end end else begin (* no special codes requested *) nextlc := false; (* loop initialization *) lccount := 0; (* ' ' *) for index := 1 to lctotal do lcsfound[index] := 'N'; (* 'n' represents an lc yet to be found *) repeat if nextlc then for index := 1 to lclength do liblcp[index] := liblcp2[index] else readlcu ( lib, liblcu, liblcp ); index2 := 0; (* loop initialization *) repeat (* checks fin line codes against current lib lc *) index2 := index2 + 1; index := 0; (* loop initialization *) repeat (* grabs a fin line code request *) index := index + 1; finlcp[index] := finlcs[index2, index] until index = lclength; until ( index2 = lcnum ) or lcequal ( finlcp, liblcp ); if ( not lcequal ( finlcp, liblcp )) or lcequal ( finlcp, lc3spc ) or lcequal ( finlcp, lcterm ) (* ' ' represents an error of extra spaces after fin line code and '// ' is unnecessary because terminus is installed after every embl request, so both these codes are ignored *) then begin nextlc := false; (* we have not advanced to next code *) if not eof ( lib ) then readln ( lib ); c := succ ( c ) end else begin lcsfound[index2] := 'F'; (* liblcp = finlcp so, 'f' represents found *) unpack ( liblcp, liblcu, 1 ); for index := 1 to lclength do write ( fout, liblcu[index] ); copyaline ( lib, fout ); c := succ ( c ); repeat (* this loop copies lines that are 1: not marked by line codes but are related to previous code or 2: have codes equal to previous requested code *) readlcu ( lib, liblcu2, liblcp2 ); if lcequal ( liblcp2, liblcp ) or lcequal ( liblcp2, lc3spc ) or lcequal ( liblcp2, lct ) or lcequal ( liblcp2, lca ) or lcequal ( liblcp2, lcj ) or lcequal ( liblcp2, lcxx ) then begin for index := 1 to lclength do write ( fout, liblcu2[index] ); copyaline ( lib, fout ); c := succ ( c ) end else nextlc := true (* we have advanced to next code *) until nextlc; lccount := succ ( lccount ) end; until lcequal ( liblcp, lcterm ) or ( lccount = lcnum ) or eof ( lib ); writeln ( fout, '// ' ); (* fout library terminus code *) index := 0; repeat index := succ ( index ) until ( index = lcnum ) or ( lcsfound[index] = 'N' ); if lcsfound[index] = 'N' (* at least one lc not found *) then begin writeln ( output, ' REQUESTED ENTRY DID NOT CONTAIN THESE' ); write ( output, ' LINE CODES-- ' ); for index := 1 to lcnum do if lcsfound[index] = 'N' then begin for index2 := 1 to lclength do write ( finlcs[index, index2] ); write ( output, ' ' ); end; writeln ( output ); end; end; writeln ( fout); (* blank between entries *) {zzz} end; (* copylib *) (* end module dbpull.copylib *) (* begin module getdatetime *) procedure getdatetime(var adatetime: datetimearray); (* get the date and time into a single array from the system clock. adatetime contains the date: 1980/06/09 18:49:11 ye mo da ho mi se (year, month, day, hour, minute, second). As of 2000 February 18, the Sun Pascal compiler requires a formatting statement. This statement allows the date to be generated in this standard Delila format in a single call. Information about the formatting statement is available on the manual page for date in Unix. If a computer does not have this method, see the 'oldgetdatetime' routine in delmod.p (http://www.lecb.ncifcrf.gov/~toms/delila/delmod.html) for some conversion code. *) begin date(adatetime,'%Y/%m/%d %H:%M:%S'); end; (* end module getdatetime version = 7.40; {of delmod.p 2000 Feb 18} *) (* begin module writedatetime *) procedure writedatetime(var thefile: text; adatetime: datetimearray); (* expand the date and time out and print in the file *) var index: integer; (* index of datetime *) begin for index:=1 to datetimearraylength do write(thefile,adatetime[index]) end; (* end module writedatetime version = 7.40; {of delmod.p 2000 Feb 18} *) (* begin module dbpull.getlib *) procedure getlib ( var cat: dbcat; (* see global for cat and libs below *) var dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8: text; var c1, c2, c3, c4, c5, c6, c7, c8: integer; (* 'c' stands for current library line *) libtitle: libsused; wildcard: wctype; (* holds one of four id request types (three are wc) *) finlcs: lcstype; (* contains all line codes for a given id request *) finidp: datetimearray; (* holds a requested id *) finidnum: integer; (* holds number of chars in finidp *) lcnum: integer; (* holds the number of line codes for a given id request *) var dated: boolean ); (* 'true' indicates that the computer system date has been added to fout *) (* finds position in library of requested id by looking in catalog and gets to appropriate position in library *) var index: integer; (* repeat loop counter *) finidu: idutype; (* holds id for writing it to output *) adatetime: datetimearray; (* holds the computer system date *) unequal: boolean; (* 'true' indicates that catalog id does not match fin requested id *) finish: boolean; (* 'true' indicates that fin id request has been satisfied *) idnotfound: boolean; (* 'true' indicates that fin id request does not match any ids occuring in a catalog *) lchold: lcptype; (* holds first line code stored in finlcs *) begin (* getlib *) for index := 1 to lclength do lchold[index] := finlcs[1, index]; if lcequal ( lchold, lc3spc ) then begin (* writeln ( output, ' CODE(S) REQUESTING PART OR ALL OF AN ENTRY' ); writeln ( output, ' MUST NORMALLY FOLLOW EACH ID REQUEST' ); writeln (output, 'However, I''ll be nice and make them ALL'); *) (* If no request was found, make the request be ALL *) lchold := spall; finlcs[1, 1] := 'A'; finlcs[1, 2] := 'L'; finlcs[1, 3] := 'L'; end; if not dated then begin if lcequal ( lchold, spraw ) then dated := true (* 'raw' requests are not dated *) else begin getdatetime ( adatetime ); write ( fout, 'DATE ' ); adatetime[20] := ' '; writedatetime ( fout, adatetime ); writeln ( fout ); dated := true end; end; finish := false; (* loop initialization *) idnotfound := true; (* ' ' *) repeat idhandle ( cat, unequal, wildcard, finidp, finidnum ); if unequal and eof ( cat ) then reset ( cat ); if unequal and ( not eof ( cat )) then get ( cat ) else begin (* id"s are equal *) idnotfound := false; case cat^.libnum of 0: begin writeln ( output, ' PROGRAM ERROR IN GETLIB' ); halt (* this should never happen, 0 allowed for purposes of loop initialization *) end; 1: begin findaline ( dbl1, cat^.linenumber, c1 ); copylib ( dbl1, c1, libtitle, lcnum, finlcs ) end; 2: begin findaline ( dbl2, cat^.linenumber, c2 ); copylib ( dbl2, c2, libtitle, lcnum, finlcs ) end; 3: begin findaline ( dbl3, cat^.linenumber, c3 ); copylib ( dbl3, c3, libtitle, lcnum, finlcs ) end; 4: begin findaline ( dbl4, cat^.linenumber, c4 ); copylib ( dbl4, c4, libtitle, lcnum, finlcs ) end; 5: begin findaline ( dbl5, cat^.linenumber, c5 ); copylib ( dbl5, c5, libtitle, lcnum, finlcs ) end; 6: begin findaline ( dbl6, cat^.linenumber, c6 ); copylib ( dbl6, c6, libtitle, lcnum, finlcs ) end; 7: begin findaline ( dbl7, cat^.linenumber, c7 ); copylib ( dbl7, c7, libtitle, lcnum, finlcs ) end; 8: begin findaline ( dbl8, cat^.linenumber, c8 ); copylib ( dbl8, c8, libtitle, lcnum, finlcs ) end; end; if ( wildcard = notwc ) (* wc id might match other cat id"s *) and ( not idequal ( finidp, idevery )) then finish := true; get ( cat ) end; until eof ( cat ) or finish; if idnotfound then begin write ( output, ' ID ' ); unpack ( finidp, finidu, 1 ); for index := 1 to idlength do write ( output, finidu[index] ); write ( output, ' NOT FOUND' ); if libtitle = embl then writeln ( output, ' IN EMBL' ) else writeln ( output, ' IN GENBANK' ); if wildcard = notwc (* wc searches are more general, so they allow for guesses at possible ids *) then halt end end; (* getlib *) (* end module dbpull.getlib *) (* begin module dbpull.order *) procedure order ( var dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8: text; var ecat, gcat: dbcat ); (* see global for all *) (* orders all other procedures *) var finidp: idptype; (* holds requested id *) finidnum: integer; (* holds number of chars(non-space) in finidp *) lcnum: integer; (* represents the total number of different line codes associated with a given request *) finlcs: lcstype; (* 2d array that holds fin line code requests *) wildcard: wctype; (* holds one of four id request types, three of which are wildcard and one just regular. one wc type has the variable length wc section in front of the set request chars, one has it in back, and one has it in front and back *) libtitle: libsused; (* indicates whether lib request is of embl or genbank type *) c1, c2, c3, c4, c5, c6, c7, c8: integer; (* 'c' stands for current library line *) firstline: boolean; (* 'true' indicates that file cursor is on the first line of fin *) dated: boolean; (* 'true' indicates that the computer system date has been added to fout(except in 'raw' special request where no date is required) *) begin (* order *) reset ( fin ); rewrite ( fout ); if eof ( fin ) then begin writeln ( output, ' INPUT FILE FIN IS EMPTY' ); halt end; datecheck ( dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8, ecat, gcat ); entrycheck ( dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8, ecat, gcat, c1, c2, c3, c4, c5, c6, c7, c8 ); dated := false; firstline := true; while not eof ( fin ) do begin getid ( fin, finidp ); makearray ( fin, finlcs, lcnum ); wccheck ( finidp, finidnum, wildcard ); if firstline then if idequal ( finidp, idembl ) (* fin line indicating that following lines are for embl entry type requests *) then libtitle := embl else if idequal ( finidp, idgenbank ) (* following lines genb *) or idequal ( finidp, idgenb ) then libtitle := genb else begin writeln ( output, ' Since the first line of fin does', ' indicate which database to use,'); writeln ( output, ' GENBANK was assumed'); reset(fin); (* go back for the first entry! *) libtitle := genb end else begin if idequal ( finidp, idembl ) (* embl lines follow this line *) then libtitle := embl else if idequal ( finidp, idgenb ) (* following lines genb *) or idequal ( finidp, idgenbank ) then libtitle := genb else if libtitle = embl then begin alphacheck ( ecat, wildcard, finidp ); getlib ( ecat, dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8, c1, c2, c3, c4, c5, c6, c7, c8, libtitle, wildcard, finlcs, finidp, finidnum, lcnum, dated ) end else begin alphacheck ( gcat, wildcard, finidp ); getlib ( gcat, dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8, c1, c2, c3, c4, c5, c6, c7, c8, libtitle, wildcard, finlcs, finidp, finidnum, lcnum, dated ) end; end; firstline := false; end; end; (* order *) (* end module dbpull.order *) begin (* dbpull *) writeln ( output, ' DBPULL ', version:4:2 ); order ( dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8, ecat, gcat ); 1:end. (* dbpull *)