program dbcat ( dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8, ecat, gcat, output ); (* dbcat: database catalog production and sorting program by matthew yarus modified by: Dr. Thomas D. Schneider National Cancer Institute Laboratory of Experimental and Computational Biology Molecular Information Theory Group Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu (use only if first address fails) http://www.lecb.ncifcrf.gov/~toms/ module libraries required: delman, prgmods, delmods *) label 1; (* used to halt program *) const (* begin module version *) version = 2.19; (* of dbcat.p 2004 Sep 8 2004 Sep 8, 2.19: upgrade to GPC 1998 Jul 16, 2.18: upgrade to datetimearray, upgrade to use accession instead of locus name. origin before 1983 july 13 *) (* end module version *) (* begin module describe.dbcat *) (* name dbcat: database catalog production and sorting program. synopsis dbcat (dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8: inout, ecat: out, gcat: out, output: out ) files dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8: text libraries that contain entries of either embl(european molecular biology labratory) or genbank(genetic sequence data bank) types. in both cases the general format is a series of entries, each entry beginning with a twenty letter identification code name for a particular genetic sequence followed by many lines of other relevant information. all lines begin with a two or three letter code identifying the purpose of the line. however, the two entry types have different line codes and contain similar but not identical kinds of information. ecat: catalog of embl type library entries. each catalog entry contains the location of the beginning of the library entry, a number signifying which library the entry is found in, and the special identification code of the entry's genetic sequence. gcat: same as ecat except containing information on genbank entries. output: messages to the user. description this program makes catalogs for use in the program dbpull. in addition to sorting catalog entries in the innate alphanumeric order of the computer it is run on, dbcat marks both catalogs and libraries with the date of the run so that dbpull never uses mis- matched sets of information. documentation delman.describe.dbpull, embl and genbank libraries. see also loocat.p, catal.p, dbpull.p author matthew yarus bugs technical notes dbcat functions on genbank(tm) release 9 (june 1, 1983) *) (* end module describe.dbcat *) (* more constants *) idlength = 20; (* length of identification code for each library or catalog entry *) namelength = idlength; (* length of computer system date *) libtotal = 8; (* number of libraries used in program *) lclength = 3; (* length of code at beginning of each library line *) lcdat = 'DAT'; (* short for date, used for dating cats and libs *) (* LOCK begin module datetime.const *) (* THIS IS LOCKED TO 20 TO KEEP IT THE SAME AS idlength! *) datetimearraylength = 20; (* length of dataarray for dates, It is just long enough to include the 4 digit year - solving the year 2000 problem: 1980/06/09 18:49:11 123456789 123456789 1 2 *) (* LOCK end module datetime.const version = 'delmod 6.88 98 Jul 10 tds/gds' *) type (* begin module dbcat.type *) (* a 'u' after the 'lc' or 'id' of a type name indicates that the array is unpacked; a 'p' indicates that the array is packed; an 's' indicates that the array has more than one dimension *) idutype = array[1..idlength] of char; (* used to read-write library entry ids *) idptype = packed array[1..idlength] of char; (* holds a library entry id for string comparisons *) alpha = idptype; (* holds computer system date *) lnrange = 0..libtotal; (* subrange for handling number of libraries *) catrec = record (* structure of each catalog entry *) idp: idptype; (* holds id of entry *) libnum: lnrange; (* identifies in which of 8 libraries entry is found *) linenumber: integer; (* holds the line in library where an entry begins *) end; dbcats = file of catrec; (* structure of whole catalog *) libsused = ( embl, genb ); (* used to indicate whether requested library entry is embl or genbank type *) lcutype = array[1..lclength] of char; (* holds library line code for reading-writing before packing *) lcptype = packed array[1..lclength] of char; (* holds lib line code for string comparisons *) countype = 0..lclength; (* subrange for handling library line codes *) (* end module dbcat.type *) (* begin module datetime.type *) (* array for dates *) datetimearray = packed array[1..datetimearraylength] of char; (* end module datetime.type version = 7.67; {of delmod.p 2004 Sep 8} *) var (* begin module dbcat.var *) cat: dbcats; (* any catalog *) lib: text; (* any library *) ecatunsort, ecat: dbcats; (* internal unsorted catalog and finished version (of embl libraries) *) gcatunsort, gcat: dbcats; (* same for genbank *) dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8: text; (* ordinarily, the first three libraries are embl and the rest are genbank. however, program can handle any case of one to eight libraries containing both types of entries *) (* end module dbcat.var *) (* begin module package.primitive *) (* ************************************************************************ *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 7.67; {of delmod.p 2004 Sep 8} *) (* begin module copyaline *) procedure copyaline(var fin, fout: text); (* copy a line from file fin to file fout *) begin (* copyaline *) while not eoln(fin) do begin fout^ := fin^; put(fout); get(fin) end; readln(fin); writeln(fout); end; (* copyaline *) (* end module copyaline version = 7.67; {of delmod.p 2004 Sep 8} *) (* begin module copylines *) function copylines(var fin, fout: text; n: integer): integer; (* copy n lines of file fin to file fout. the actual number of lines copied is returned. *) var index: integer; (* the current line number *) begin (* copylines *) index := 0; while (not eof(fin)) and (index < n) do begin copyaline(fin, fout); index := succ(index) end; copylines := index end; (* copylines *) (* end module copylines version = 7.67; {of delmod.p 2004 Sep 8} *) (* ************************************************************************ *) (* end module package.primitive version = 7.67; {of delmod.p 2004 Sep 8} *) (* begin module skipblanks *) (* 2003 July 31: tab is considered a blank character *) function isblank(c: char): boolean; (* is the character c blank or tab? *) begin isblank := (c = ' ') or (c = chr(9)) end; procedure skipblanks(var thefile: text); (* skip over blanks until a non-blank, or end of line, is found *) begin while isblank(thefile^) and not eoln(thefile) do get(thefile); end; procedure skipnonblanks(var thefile: text); (* skip over nonblanks until a blank, or end of line, is found *) begin while (not isblank(thefile^)) and not eoln(thefile) do get(thefile); end; procedure skipcolumn(var thefile: text); (* skip over a data column *) begin skipblanks(thefile); skipnonblanks(thefile) end; (* end module skipblanks version = 7.67; {of delmod.p 2004 Sep 8} *) (* begin module dbpull.readlcu *) procedure readlcu ( var lib: text; var liblcu: lcutype; var liblcp: lcptype ); (* grabs a library line code and then packs it for string comparisons *) var index: countype; (* for loop counter *) begin (* readlcu *) for index := 1 to lclength do if eoln ( lib ) then liblcu[index] := ' ' else read ( lib, liblcu[index] ); pack ( liblcu, 1, liblcp ) end; (* readlcu *) (* end module dbpull.readlcu *) (* begin module dbpull.lcequal *) function lcequal ( lcp1, lcp2: lcptype ): boolean; (* tests two line codes for equality. if your computer system version of pascal does not do string comparisons simply change lcequal and the program will be fixed throughout *) begin (* lcequal *) lcequal := false; if lcp1 = lcp2 then lcequal := true end; (* lcequal *) (* end module dbpull.lcequal *) (* begin module dbpull.idequal *) function idequal ( idp1, idp2: idptype ): boolean; (* tests two ids for equality. if your computer system version of pascal does not do string comparisons simply change idequal and the program will be fixed throughout *) begin (* idequal *) idequal := false; if idp1 = idp2 then idequal := true end; (* idequal *) (* end module dbpull.idequal *) (* begin module dbpull.datesequal *) function datesequal ( idp1, idp2: datetimearray): boolean; (* tests two dates for equality. if your computer system version of pascal does not do string comparisons simply change idequal and the program will be fixed throughout. *) begin (* idequal *) datesequal := false; if idp1 = idp2 then datesequal := true end; (* idequal *) (* end module dbpull.datesequal *) (* begin module dbpull.getid *) procedure getid ( var fin: text; (* see global *) var finidp: idptype ); (* holds requested library id *) (* finds the next string of non-space characters following the file cursor, adding spaces at the end if the string is too short, and then packs the string into the finidp array *) var index: integer; (* counter for loop *) finidu: idutype; (* holds id for reading in before packing *) begin (* getid *) while fin^ = ' ' do (* advances to first id character *) get ( fin ); index := 0; (* loop initialization value *) (* the following loop grabs all id characters *) while ( fin^ <> ' ' ) and ( index < idlength ) do begin index := succ ( index ); if eoln ( fin ) then finidu[index] := ' ' else read ( fin, finidu[index] ) ;write(output,finidu[index]); {zzz} end; (* the following loop fills out id if id is too short *) while index < idlength do begin index := succ ( index ); finidu[index] := ' ' end; pack ( finidu, 1, finidp ) ;writeln(output); {zzz} end; (* getid *) (* end module dbpull.getid *) (* begin module dbcat.makecat *) procedure makecat ( var lib: text; (* see global *) var cat: dbcats; (* see global *) libtitle: libsused; (* holds which type of library entry is being handled *) libnumhold: lnrange ); (* holds which library entry is found in *) (* creates embl and genbank catalogue entries in cat corresponding to entries in the library lib. each catalogue entry (catrec) contains the entry name, the library it is in and the line it is on in the library *) var index: countype; (* loop counter *) linenum: integer; (* holds the current library line *) entry: boolean; (* 'true' indicates that current line is the start of an entry *) ch: char; (* a character in the library *) trigger: array[1..lclength] of char; (* the string of characters to look for as the start of an entry *) done: boolean; (* done searching for ACCESSION *) recordinglocation: integer; (* the place to record the start of an entry *) begin (* makecat *) linenum := 1; (* lib starts on line one *) if lclength>3 then begin writeln(output,'ERROR IN MAKECAT: LCLENGTH TOO BIG'); halt end; case libtitle of genb: begin trigger[1] := 'L'; trigger[2] := 'O'; trigger[3] := 'C'; end; embl: begin trigger[1] := 'I'; trigger[2] := 'D'; trigger[3] := ' '; end end; while not eof ( lib ) do begin entry := true; (* we try to disprove this *) index:= 0; (* loop initialization *) (* check for id line code one character at a time, but do not even start if the line has no characters *) if eoln(lib) then entry := false else repeat index := succ ( index ); read ( lib, ch ); if eoln(lib) then entry := false else entry := (trigger[index]=ch) until (not entry) or ( index = lclength ); if entry then begin (* did pass test *) if libtitle = genb then begin { get ( lib ); get ( lib ); (* dumps 'us' of 'locus' *) } (* the locus name is irrelevant since genbank uses accession numbers now. So move to the ACCESSION line *) recordinglocation := linenum; readln(lib); linenum := succ(linenum); done := false; while not done do begin if lib^='A' then begin get(lib); if lib^='C' then begin get(lib); if lib^='C' then begin done := true end; end; end; if not done then begin readln(lib); linenum := succ(linenum); end; end; skipnonblanks(lib); {zzz} end; getid ( lib, cat^.idp ); { cat^.linenumber := linenum; } cat^.linenumber := recordinglocation; cat^.libnum := libnumhold; put ( cat ); end; readln ( lib ); linenum := succ ( linenum ) (* a line has been completed *) end; end; (* makecat *) (* end module dbcat.makecat version = 2.06; (@ of dbcat 1985 may 30 *) (* begin module dbcat.mergesort.upper *) procedure mergesort(var fin, fout: dbcats); (* merge sort fin to produce fout. a quicksort is used to sort sublists from fin. these are merged with an internal file (or fout) until all data is sorted. *) const maxdata = 3000; (* maximum number of data items that can be quick sorted *) type position = 0..maxdata; dataarray = array[1..maxdata] of catrec; (* for internal storage *) var tofout: boolean; (* direction to send data *) internal: dbcats; (* internal file *) store: dataarray; (* for internal storage *) size: integer; (* amount of store used *) procedure readin(var f: dbcats; var s: dataarray; var size: integer); (* read from f into s as much data as possible *) begin (* readin *) size := 0; while (not eof(f)) and (size < maxdata) do begin size := succ(size); s[size] := f^; get(f) end (*; if debugging then write(output, 'datain(', size:1,')')*) end; (* readin *) procedure copy(var f, t: dbcats); (* copy the rest of file f into file t *) begin (* copy *) while not eof(f) do begin t^ := f^; get(f); put(t) end end; (* copy *) function datalessthan(a, b: catrec): boolean; (* compare data in a and b for order *) begin (* datalessthan *) datalessthan := a.idp < b.idp (* can your system handle this ? *) end; (* datalessthan *) function lessthan(a, b: position): boolean; (* compare data at positions a and b in array store for order *) begin (* lessthan *) lessthan := datalessthan(store[a], store[b]) end; (* lessthan *) procedure swap(a, b: position); (* swap the data in store at a and b *) var hold: catrec; begin (* swap *) hold := store[a]; store[a] := store[b]; store[b] := hold end; (* swap *) (* end module dbcat.mergesort.upper *) (* begin module quicksort *) procedure quicksort(left, right: integer); (* quick sort a list between positions left and right, into ascending order. a position is simply a scalar of the form 0..max. the array to be sorted is dimensioned 1..max. (the difference in the ranges is important to the correct operation of the sort...) two external routines are used: function lessthan(a, b: integer): boolean is a generalized test for value-at-a < value-at-b. procedure swap(a, b: integer) switches the items at positions a and b. since these routines are external, the procedure is general. this procedure taken from the book 'algorithms + data structures = programs' by niklaus wirth, prentice-hall, inc., englewood cliffs, n.j.(1976), pp. 76-82 *) var lower, upper: integer; (* the positions looked at currently *) center: integer; (* the rough center of the region being sorted *) begin lower := left; center := (left + right) div 2; upper := right; repeat while lessthan(lower, center) do lower := succ(lower); while lessthan(center, upper) do upper := pred(upper); if lower <= upper then begin (* keep track of the center through the map: *) if lower = center then center:=upper else if upper = center then center:=lower; swap(lower, upper); lower := succ(lower); upper := pred(upper) end until lower > upper; if left < upper then quicksort(left, upper); if lower < right then quicksort(lower, right) end; (* end module quicksort version = 4.86; (@ of prgmod.p 2004 Sep 8 *) (* begin module dbcat.mergesort.lower *) procedure merge(a: dataarray; asize: integer; (* size of a *) var b, c: dbcats); (* merge all items in a with those in b (note different types) and place them into c *) var aindex: integer; (* index to a *) acurrent: catrec; (* the value of a pointed to by aindex *) begin (* merge *) reset(b); (* start reading from b *) rewrite(c); (* write the merged result to c *) aindex := 1; acurrent := a[aindex]; (* if debugging then writeln(output, 'merge, asize=', asize:1); *) while (aindex <= asize) and (not eof(b)) do begin if datalessthan(acurrent, b^) then begin (* if debugging then writeln(output, '1. aindex=', aindex:1);*) c^ := acurrent; put(c); aindex := succ(aindex); if aindex <= size then acurrent := a[aindex] end else begin (* if debugging then writeln(output, '2.'); *) c^ := b^; put(c); get(b) end end; (* finish copy of remaining data to c *) if not eof(b) then copy(b, c) else begin while aindex <= asize do begin c^ := a[aindex]; put(c); aindex := succ(aindex) end end end; (* merge *) procedure pass(var i, o: dbcats); (* make one pass through the data *) begin (* pass *) (* if debugging then write(output,'r'); *) readin(fin, store, size); (* if debugging then write(output,'q'); *) quicksort(1, size); (* store is sorted *) (* if debugging then write(output,'m'); *) merge(store, size, i, o) (* merge array store with file i to produce file o *) end; (* pass *) begin (* mergesort *) reset(fin); rewrite(fout); (* besure that any previous list is gone *) tofout := false; (* start by storing to internal *) while not eof(fin) do begin if tofout then pass(internal, fout) else pass(fout, internal); tofout := not tofout end; (* if debugging then writeln(output, 'now copy to fout');*) (* be sure that fout contains the data *) if tofout then begin reset(internal); rewrite(fout); copy(internal, fout) end end; (* mergesort *) (* end module dbcat.mergesort.lower *) (* begin module getdatetime *) procedure getdatetime(var adatetime: datetimearray); (* get the date and time into a single array from the system clock. adatetime contains the date: 1980/06/09 18:49:11 ye mo da ho mi se (year, month, day, hour, minute, second). As of 2000 February 18, the Sun Pascal compiler requires a formatting statement. This statement allows the date to be generated in this standard Delila format in a single call. Information about the formatting statement is available on the manual page for date in Unix. If a computer does not have this method, see the 'oldgetdatetime' routine in delmod.p (http://www.lecb.ncifcrf.gov/~toms/delila/delmod.html) for some conversion code. GPC Functions: function GetUnixTime (var MicroSecond : Integer) : UnixTimeType; http://agnes.dida.physik.uni-essen.de/~gnu-pascal/gpc_109.html#SEC109 7.10.8 Date And Time Routines procedure GetTimeStamp (var t : TimeStamp); function Date (t : TimeStamp) : packed array [1 .. DateLength] of Char; function Time (t : TimeStamp) : packed array [1 .. TimeLength] of Char; DateLength and TimeLength are implementation dependent constants. GetTimeStamp (t) fills the record `t' with values. If they are valid, the Boolean flags are set to True. TimeStamp is a predefined type in the Extended Pascal standard. It may be extended in an implementation, and is indeed extended in GPC. For the full definition of `TimeStamp', see section 8.255 TimeStamp. *) var t: TimeStamp; (* begin module pluckdigit *) function pluckdigit(number, logplace:integer): char; (* return the digit at the place value ('logplace') position of number. example: pluckdigit(13625, 3) = 3 pluckdigit(13625, 4) = 1 This routine was taken from module numberdigit in prgmod.p, but is modified so as not to give the sign. Instead it gives zeros above the digits. 'myabsolute' replaced 'absolute', which is apparently a keyword for GPC. The name is kept for to keep the code looking similar to its origin. *) var place: integer; (* the exponent of logplace *) count: integer; (* used to make place *) myabsolute: integer; (* the absolute value of number *) acharacter: char; (* the character to be returned *) procedure digit; (* extract a digit at the place position *) var tenplace: integer; (* ten times place *) z: integer; (* an intermediate value *) d: integer; (* the digit extracted *) begin (* digit *) tenplace:=10*place; z:=myabsolute-((myabsolute div tenplace)*tenplace); if place = 1 then d:=z else d:= z div place; case d of 0: acharacter:='0'; 1: acharacter:='1'; 2: acharacter:='2'; 3: acharacter:='3'; 4: acharacter:='4'; 5: acharacter:='5'; 6: acharacter:='6'; 7: acharacter:='7'; 8: acharacter:='8'; 9: acharacter:='9'; end end; (* digit *) begin (* pluckdigit *) place:=1; for count:=1 to logplace do place:=10*place; if number=0 then begin acharacter:='0' end else begin myabsolute:=number; if myabsolute >= place then digit else acharacter := '0' end; pluckdigit:=acharacter end; (* pluckdigit *) (* end module pluckdigit *) begin (* according to: http://agnes.dida.physik.uni-essen.de/~gnu-pascal/gpc_109.html#SEC109 *) GetTimeStamp(t); (* Predefined time stamp: http://agnes.dida.physik.uni-essen.de/~gnu-pascal/gpc_389.html#SEC389 TimeStamp = {@@packed} record DateValid, TimeValid : Boolean; Year : Integer; Month : 1 .. 12; Day : 1 .. 31; DayOfWeek : 0 .. 6; { 0 means Sunday } Hour : 0 .. 23; Minute : 0 .. 59; Second : 0 .. 61; { to allow for leap seconds } MicroSecond : 0 .. 999999 end; *) with t do begin if TimeValid then begin { writeln(output,'valid time'); writeln(output,'year =',year:4); writeln(output,'month =',month:2); writeln(output,'day =',day:2); writeln(output,'hour =',hour:2); writeln(output,'minute =',minute:2); writeln(output,'second =',second:2); } adatetime := 'year/mm/dd hh:mm:ss'; adatetime[ 1] := pluckdigit(Year,3); adatetime[ 2] := pluckdigit(Year,2); adatetime[ 3] := pluckdigit(Year,1); adatetime[ 4] := pluckdigit(Year,0); adatetime[ 6] := pluckdigit(Month,1); adatetime[ 7] := pluckdigit(Month,0); adatetime[ 9] := pluckdigit(Day,1); adatetime[10] := pluckdigit(Day,0); adatetime[12] := pluckdigit(Hour,1); adatetime[13] := pluckdigit(Hour,0); adatetime[15] := pluckdigit(Minute,1); adatetime[16] := pluckdigit(Minute,0); adatetime[18] := pluckdigit(Second,1); adatetime[19] := pluckdigit(Second,0); end else begin writeln(output,'getdatetime: invalid time!'); halt; end end; { Sun compiler method: date(adatetime,'%Y/%m/%d %H:%M:%S'); } end; (* end module getdatetime version = 7.67; {of delmod.p 2004 Sep 8} *) (* begin module readdatetime *) procedure readdatetime (var thefile: text; var adatetime: datetimearray); (* read the date and time from the file. It must have this format: 123456789 123456789 1 1980/06/09 18:49:11 *) (* 2000 Oct 11: upgraded so that the p2c compiler does not object to writing out the adatetime; added checks for the date. *) var index: integer; (* to the udatetime *) (* the following is an unpacked date time array, to avoid reading into a packed array. reading into a packed array is not transportable *) udatetime: array[1..datetimearraylength] of char; begin for index:=1 to datetimearraylength do read(thefile,udatetime[index]); pack(udatetime, 1, adatetime); if (adatetime[3]='/') and (adatetime[12]=':') then begin writeln(output,' old datetime (only 2 year digits) read: '); for index:=1 to datetimearraylength do write(thefile,adatetime[index]); writeln(output); end; (* check the adatetime format. Note that further checks for the other positions in the array could be done to be sure that they are numbers. But this should be pretty good. *) if (adatetime[ 5]<>'/') or (adatetime[ 8]<>'/') or (adatetime[14]<>':') or (adatetime[17]<>':') then begin writeln(output,'readdatetime: bad date time read:'); for index:=1 to datetimearraylength do write(thefile,adatetime[index]); writeln(output); halt end; end; (* end module readdatetime version = 7.67; {of delmod.p 2004 Sep 8} *) (* begin module writedatetime *) procedure writedatetime(var thefile: text; adatetime: datetimearray); (* expand the date and time out and print in the file *) var index: integer; (* index of datetime *) begin for index:=1 to datetimearraylength do write(thefile,adatetime[index]) end; (* end module writedatetime version = 7.67; {of delmod.p 2004 Sep 8} *) (* begin module dbcat.getlibdate *) procedure getlibdate ( var dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8: text; (* global *) var libpresent: boolean; (* 'true' if at least one non- empty lib is present *) var libmatch: boolean; (* 'true' if two libs have matching dates *) var libcount: lnrange; (* holds the number of non- empty libraries *) var adatetime: datetimearray ); (* holds the date of a lib for comparison to all the other dates *) (* counts non-empty libs and checks to see that their dates match *) procedure libcheck ( var lib: text; (* global variable *) var libpresent: boolean; var libmatch: boolean; var libcount: lnrange; var adatetime: datetimearray ); (* counts and checks date of a single library *) var index: integer; (* loop counter *) dumpchar: char; (* holds unwanted character *) liblcu: lcutype; (* holds the line code of the first line of a lib *) liblcp: lcptype; (* same as liblcu but used for string comparison to lcdat *) libidp: datetimearray; (* in this special case, this holds the lib date, not an entry identification code *) begin (* libcheck *) reset ( lib ); if not eof ( lib ) then begin libcount := libcount + 1; readlcu ( lib, liblcu, liblcp ); if not lcequal ( liblcp, lcdat ) then begin (* undated library is treated as null date case *) libidp[1] := 'D'; libidp[2] := 'A'; libidp[3] := 'T'; libidp[4] := 'E'; (* 'date' with 16 spaces after it is the null date code *) for index := 5 to idlength do libidp[index] := ' ' end else begin read ( lib, dumpchar ); (* dumps 'e' of 'date' *) repeat get ( lib ) until ( lib^ <> ' ' ); readdatetime ( lib, libidp ) end; if not libpresent then for index := 1 to idlength do adatetime[index] := libidp[index]; libpresent := true; if libmatch (* if not libmatch, libcheck will no longer test for equality when it is called *) then libmatch := idequal ( libidp, adatetime ) end; end; (* libcheck *) begin (* getlibdate *) libpresent := false; libmatch := true; libcount := 0; libcheck ( dbl1, libpresent, libmatch, libcount, adatetime ); libcheck ( dbl2, libpresent, libmatch, libcount, adatetime ); libcheck ( dbl3, libpresent, libmatch, libcount, adatetime ); libcheck ( dbl4, libpresent, libmatch, libcount, adatetime ); libcheck ( dbl5, libpresent, libmatch, libcount, adatetime ); libcheck ( dbl6, libpresent, libmatch, libcount, adatetime ); libcheck ( dbl7, libpresent, libmatch, libcount, adatetime ); libcheck ( dbl8, libpresent, libmatch, libcount, adatetime ); end; (* getlibdate *) (* end module dbcat.getlibdate *) (* begin module dbcat.datecat *) procedure datecat ( var catin: dbcats; (* catalog without date *) var cat: dbcats; (* intermediate file *) var catout: dbcats; (* dated catalog *) adatetime: datetimearray; (* holds computer system date or date of libraries if they all match *) libcount: lnrange ); (* holds number of non-empty libraries *) (* puts the date and library count in the first record of the catalog and then recopies the rest of the cat *) var index: integer; (* loop counter *) begin (* datecat *) reset ( catin ); rewrite ( cat ); (* the following loop adds date even if catalog is empty because the other catalog has entries(double checked by catentry) *) for index := 1 to idlength do cat^.idp[index] := adatetime[index]; cat^.libnum := libcount; put ( cat ); while not eof ( catin ) do begin cat^ := catin^; get ( catin ); put ( cat ) end; reset ( cat ); rewrite ( catout ); repeat catout^ := cat^; put ( catout ); get ( cat ) until eof ( cat ); end; (* datecat *) (* end module dbcat.datecat *) (* begin module dbcat.datelib *) procedure datelib ( var libin: text; (* undated library *) var lib: text; (* intermediate file *) var libout: text; (* dated library *) adatetime: datetimearray ); (* holds computer system date *) (* adds date line to front of library and then recopies the rest of the lib *) var index: integer; (* loop counter *) liblcu: lcutype; (* holds the line code of the first line of a lib *) liblcp: lcptype; (* same as liblcu but used for string comparison to string constant lcdat *) dateskip: boolean; (* 'true' means that old libin dateline has been passed over *) begin (* datelib *) reset ( libin ); rewrite ( lib ); if not eof ( libin ) then begin dateskip := false; repeat readlcu ( libin, liblcu, liblcp ); if lcequal ( liblcp, lcdat ) then readln ( libin ) else dateskip := true until dateskip; write ( lib, 'DATE ' ); writedatetime ( lib, adatetime ); writeln ( lib ); for index := 1 to lclength do write ( lib, liblcu[index] ); (* cursor is past code *) repeat (* copies the rest of the first non-date line *) copyaline ( libin, lib ) until eof ( libin ); reset ( lib ); rewrite ( libout ); repeat (* copies the rest of the library *) copyaline ( lib, libout ) until eof ( lib ); end; end; (* datelib *) (* end module dbcat.datelib *) (* begin module dbcat.catlibdate *) procedure catlibdate ( var dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8: text; (* global *) var cat1, cat2: dbcats ); (* global *) (* structures whole dating process *) var libpresent: boolean; (* 'true' if at least one non-empty lib is found *) libmatch: boolean; (* 'true' if two lib dates matched *) libcount: lnrange; (* holds the number of non-empty libaries *) adatetime: datetimearray; (* holds computer system date or date of libraries(if they all match) *) begin (* catlibdate *) getlibdate ( dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8, libpresent, libmatch, libcount, adatetime ); if not libpresent then begin writeln ( output, ' NO DATABASE LIBRARIES ARE PRESENT' ); halt end; if libmatch then begin datecat ( ecat, cat, ecat, adatetime, libcount ); datecat ( gcat, cat, gcat, adatetime, libcount ) end else begin getdatetime ( adatetime ); adatetime[20] := ' '; datecat ( ecat, cat, ecat, adatetime, libcount ); datecat ( gcat, cat, gcat, adatetime, libcount ); datelib ( dbl1, lib, dbl1, adatetime ); datelib ( dbl2, lib, dbl2, adatetime ); datelib ( dbl3, lib, dbl3, adatetime ); datelib ( dbl4, lib, dbl4, adatetime ); datelib ( dbl5, lib, dbl5, adatetime ); datelib ( dbl6, lib, dbl6, adatetime ); datelib ( dbl7, lib, dbl7, adatetime ); datelib ( dbl8, lib, dbl8, adatetime ); end; end; (* catlibdate *) (* end module dbcat.catlibdate *) (* begin module dbcat.order *) procedure order ( var dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8: text; var ecatunsort, ecat: dbcats; var gcatunsort, gcat: dbcats ); (* see global for all *) (* orders other procedures *) procedure call ( var lib: text; var cat1, cat2: dbcats; libnumhold: lnrange ); (* calls makecat procedure for each library. both embl and genb entries are catalogued. libnumhold indicates in which of 8 libs entry is found *) var libtitle: libsused; (* indicates whether procedure will search for embl or genbank type entries *) begin (* call *) libtitle := embl; makecat ( lib, cat1, libtitle, libnumhold ); reset ( lib ); libtitle := genb; makecat ( lib, cat2, libtitle, libnumhold ); end; (* call *) procedure catentry ( var cat1, cat2: dbcats ); (* catalogs produced by repeated uses of call *) (* if no entries are found in any libraries, the program halts *) begin (* catentry *) reset ( cat1 ); reset ( cat2 ); if eof ( cat1 ) and eof ( cat2 ) then begin writeln ( output, ' YOUR DATA BASE LIBRARIES CONTAIN NO ENTRIES' ); writeln ( output, ' OF EITHER EMBL OR GENBANK FORMAT' ); halt end; end; (* catentry *) begin (* order *) reset ( dbl1 ); reset ( dbl2 ); reset ( dbl3 ); reset ( dbl4 ); reset ( dbl5 ); reset ( dbl6 ); reset ( dbl7 ); reset ( dbl8 ); rewrite ( ecatunsort ); rewrite ( gcatunsort ); (* in the following each lib is searched for its embl type entries, is reset, and then searched for its genbank entries *) call ( dbl1, ecatunsort, gcatunsort, 1 ); call ( dbl2, ecatunsort, gcatunsort, 2 ); call ( dbl3, ecatunsort, gcatunsort, 3 ); call ( dbl4, ecatunsort, gcatunsort, 4 ); call ( dbl5, ecatunsort, gcatunsort, 5 ); call ( dbl6, ecatunsort, gcatunsort, 6 ); call ( dbl7, ecatunsort, gcatunsort, 7 ); call ( dbl8, ecatunsort, gcatunsort, 8 ); catentry ( ecatunsort, gcatunsort ); (* catalogs contain every lib entry, but perhaps not in alphanumeric order *) mergesort ( ecatunsort, ecat ); mergesort ( gcatunsort, gcat ); (* next section adds computer system date to catalogs and libraries to make checking for mismatched information quick and simple *) catlibdate ( dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8, ecat, gcat ) end; (* order *) (* end module dbcat.order *) begin (* dbcat *) writeln ( output, ' DBCAT ', version:4:2 ); order ( dbl1, dbl2, dbl3, dbl4, dbl5, dbl6, dbl7, dbl8, ecatunsort, ecat, gcatunsort, gcat ); 1: end. (* dbcat *)