program unnumber(infile, unnumberp, outfile, output); (* unnumber: remove numbers from a PDF Dr. Thomas D. Schneider National Institutes of Health National Cancer Institute Center for Cancer Research Nanobiology Program Molecular Information Theory Group Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu (use only if first address fails) http://www.ccrnp.ncifcrf.gov/~toms/ *) label 1; (* end of program *) const (* begin module version *) version = 1.01; (* of unnumber.p 2006 Dec 14 2006 Dec 14, 1.01: start changing 2006 Dec 14, 1.00: origin - successfully copies *) updateversion = 1.00; (* defines lowest acceptable current parameter file *) (* end module version *) (* begin module describe.unnumber *) (* name unnumber: remove numbers from a PDF synopsis unnumber(infile: in, unnumberp: in, outfile: out, output: out) files infile: PDF from Nucleic Acids Research converted to text using pdftotext outfile: the text file with numbers removed. unnumberp: parameters to control the program. The file must contain the following parameters, one per line: parameterversion: The version number of the program. This allows the user to be warned if an old parameter file is used. output: messages to the user description PDF numbers differ between proofs making comparison difficult. Remove them. examples documentation see also author Thomas Dana Schneider bugs Unfortunately some numbers have to be rearranged by hand. technical notes *) (* end module describe.unnumber *) (* LOCK begin module string.const *) maxstring = 20000; (* the maximum string *) (* LOCK end module string.const *) type (* begin module string.type *) stringptr = ^string; (* pointer to a string *) string = record (* a string of characters *) letters: array[1..maxstring] of char; (* the letters in the string *) length: integer; (* the number of characters in the string *) current: integer; (* the letter we are working on *) next: stringptr; (* the next string in a series *) end; (* end module string.type version = 5.31; (@ of prgmod.p 2006 Oct 10 *) var infile, (* file used by this program *) unnumberp, (* file used by this program *) outfile: text; (* file used by this program *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 5.31; (@ of prgmod.p 2006 Oct 10 *) (* begin module clearstring *) (* These modules clear strings in various ways *) (* ---- *) procedure emptystring(var ribbon: string); (* empty the contents of the string but do NOT remove the pointer. This is useful for clearing one string within a linked list of them. *) var index: integer; (* to the ribbon *) begin (* clearstring *) with ribbon do begin for index := 1 to maxstring do letters[index] := ' '; length := 0; current := 0; end end; (* emptystring *) (* ---- *) procedure clearstring(var ribbon: string); (* empty the string and remove the pointer *) begin (* clearstring *) with ribbon do begin emptystring(ribbon); next := nil; end end; (* clearstring *) (* ---- *) procedure initializestring(var ribbon: string); (* start the string with a nil pointer. This routine should be called before doing linked list work. This allows the standard string routines to clear the string without killing the pointer. This is now deprecated, do not use it since clearstring still clears the next pointer. *) begin (* initializestring *) writeln(output,'remove initializestring routine!'); writeln(output,'replace it with clearstring routine!'); halt; (* to force deprecation *) clearstring(ribbon); ribbon.next := nil; end; (* initializestring *) (* end module clearstring version = 5.31; (@ of prgmod.p 2006 Oct 10 *) (* begin module writestring *) procedure writestring(var tofile: text; var s: string); (* write the string s to file tofile, no writeln *) var i: integer; (* index to s *) begin (* writestring *) with s do for i := 1 to length do write(tofile, letters[i]) end; (* writestring *) (* end module writestring version = 5.31; (@ of prgmod.p 2006 Oct 10 *) (* begin module interact.getstring *) procedure getstring(var afile: text; var buffer: string; var gotten: boolean); (* get a line (as a string) from a file not using string calls. this lets one obtain lines from a file without interactive prompts *) var index: integer; (* of buffer *) begin (* getstring *) clearstring(buffer); if eof(afile) then gotten := false else begin index := 0; while (not eoln(afile)) and (index < maxstring) do begin index := succ(index); read(afile, buffer.letters[index]) end; if not eoln(afile) then begin writeln(output, ' getstring: a line exceeds maximum string size (', maxstring:1,')'); halt end; buffer.length := index; buffer.current := 1; readln(afile); gotten := true end end; (* getstring *) (* end module interact.getstring version = 5.31; (@ of prgmod.p 2006 Oct 10 *) (* begin module makenumber *) procedure makenumber(name: string; var number: integer; var found: boolean); (* make a integer number from the name. If a number was not detected, found is false. *) var l: integer; (* position in the string *) begin found := false; number := 0; for l := 1 to name.length do begin if name.letters[l] in ['0','1','2','3','4','5','6','7','8','9'] then begin found := true; number := number * 10; (* make room for the next digit *) case name.letters[l] of '0': number := number + 0; '1': number := number + 1; '2': number := number + 2; '3': number := number + 3; '4': number := number + 4; '5': number := number + 5; '6': number := number + 6; '7': number := number + 7; '8': number := number + 8; '9': number := number + 9; end end end end; (* end module makenumber version = 5.31; (@ of prgmod.p 2006 Oct 10 *) (* begin module isonlynumber *) function isonlynumber(l: string): boolean; (* does the string only contain digits? *) var alldigits: boolean; (* true if all digits so far are numbers *) p: integer; (* position in the string l *) begin if l.length > 0 then begin alldigits := true; (* optimistic! *) p := 1; while (p <= l.length) and alldigits do begin if not (l.letters[p] in ['0','1','2','3','4','5','6','7','8','9']) then alldigits := false; p := succ(p); end; isonlynumber := alldigits end else isonlynumber := false end; (* end module isonlynumber *) (* begin module unnumber.themain *) procedure themain(var infile, unnumberp, outfile: text); (* the main procedure of the program *) var parameterversion: real; (* parameter version number *) aline: string; (* a line from the infile *) linenumber: integer; (* line number of the PDF. These start at 10 and increments by 5. *) increment: integer; (* the increment of linenumber to use *) gotten: boolean; (* was aline gotten? *) numberfound: boolean; (* was a number found in makenumber? *) readnumber: integer; (* the number read by makenumber? *) begin writeln(output,'unnumber ',version:4:2); reset(unnumberp); readln(unnumberp, parameterversion); if round(100*parameterversion) < round(100*updateversion) then begin writeln(output, 'You have an old parameter file!'); halt end; reset(infile); rewrite(outfile); linenumber := 10; increment := 5; clearstring(aline); gotten := true; while gotten do begin getstring(infile, aline, gotten); if gotten then begin (* decide what to do with this string Does it contain a pure number, left justified? *) if isonlynumber(aline) then begin makenumber(aline, readnumber, numberfound); if not numberfound then begin writeln(output,'program error - not numberfound'); halt end else begin if readnumber = linenumber then begin (* It's a line number! Print to output instead of outfile! *) write(output,'line number: '); writestring(output,aline); writeln(output); linenumber := linenumber + increment; end else begin (* ok, it's not a line number. Let it pass. *) writestring(outfile, aline); writeln(outfile); end; end end else begin writestring(outfile, aline); writeln(outfile); end end; end; writeln(output,'unnumber is done'); end; (* end module unnumber.themain *) begin themain(infile, unnumberp, outfile); 1: end.