program cnsr(input, output); (* cnsr: removes text from a file Dr. Thomas D. Schneider National Institutes of Health National Cancer Institute Center for Cancer Research Nanobiology Program Molecular Information Theory Group Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu (use only if first address fails) http://www.ccrnp.ncifcrf.gov/~toms/ *) label 1; (* end of program *) const (* begin module version *) version = 1.01; (* of cnsr.p 2007 May 22 2007 May 22, 1.01: functional but messy 2007 May 22, 1.00: origin from version 1.47 censor.p 1996 January 28 *) (* end module version *) (* begin module describe.cnsr *) (* name cnsr: removes text from a file synopsis cnsr(input: in, output: out) files input: input file with marked text output: output file with marked text removed description The cnsr program allows one to manipulate a text file to remove marked text automatically. Any text surrounded by [[ and ]] will not be copied to the output. This includes the double brackets themselves. The program originated from the censor program. The censor program requires that the [[ and ]] be inside Pascal comments, which restricts its use to files that have those. That is an unnecessary restriction since Pascal code doesn't contain [[ and ]] anyway. However, for stability of the many scripts that use censor, the cnsr program was written as a replacement but censor is still around. examples documentation see also {Original program for censoring Pascal programs:} censor.p author Thomas Dana Schneider bugs technical notes *) (* end module describe.cnsr *) (* begin module cnsr.const *) maxstring = 1500; (* the maximum string *) (* end module cnsr.const *) fillermax = 10; (* the size of the filler array for a string *) type (* begin module interact.type *) string = record (* a string of characters *) letters: array[1..maxstring] of char; (* the letters in the string *) length: integer; (* the number of characters in the string *) current: integer; (* the letter we are working on *) end; (* end module interact.type version = 4.09; (@ of prgmod.p 1990 May 18 *) (* begin module filler.type *) (* the following is an array used to fill a string. it is convenient to have it much shorter than the maxstring, so that it is easy to fill the string using procedure fillstring. the user must declare the value of constant fillermax. *) filler = packed array[1..fillermax] of char; (* end module filler.type version = 4.09; (@ of prgmod.p 1990 May 18 *) (* begin module trigger.type *) trigger = record (* an object to be searched for *) seek: string; (* the characters looked for *) state: integer; (* how close to triggering we are *) skip: boolean; (* trigger not found- skip the line *) found: boolean (* the trigger was found *) end; (* end module trigger.type version = 4.09; (@ of prgmod.p 1990 May 18 *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 4.09; (@ of prgmod.p 1990 May 18 *) (* begin module interact.clearstring *) procedure clearstring(var ribbon: string); (* empty the string *) var index: integer; (* to the ribbon *) begin (* clearstring *) with ribbon do begin for index := 1 to maxstring do letters[index] := ' '; length := 0; current := 0; end end; (* clearstring *) (* end module interact.clearstring version = 4.09; (@ of prgmod.p 1990 May 18 *) (* begin module interact.getstring *) procedure getstring(var afile: text; var buffer: string; var gotten: boolean); (* get a string from a file not using string calls. this lets one obtain lines from a file without interactive prompts *) var index: integer; (* of buffer *) begin (* getstring *) clearstring(buffer); if eof(afile) then gotten := false else begin index := 0; while (not eoln(afile)) and (index < maxstring) do begin index := succ(index); read(afile, buffer.letters[index]) end; if not eoln(afile) then begin writeln(output, ' getstring: a line exceeds maximum string size (', maxstring:1,')'); halt end; buffer.length := index; buffer.current := 1; readln(afile); gotten := true end end; (* getstring *) (* end module interact.getstring version = 4.09; (@ of prgmod.p 1990 May 18 *) (* begin module interact.writestring *) procedure writestring(var tofile: text; var s: string); (* write the string s to file tofile, no writeln *) var i: integer; (* index to s *) begin (* writestring *) with s do for i := 1 to length do write(tofile, letters[i]) end; (* writestring *) (* end module interact.writestring version = 4.09; (@ of prgmod.p 1990 May 18 *) (* begin module filler.fillstring *) procedure fillstring(var s: string; a: filler); (* this procedure makes it reasonably easy to fill the string s with characters. one calls the procedure as: *) (* 1 2 3 4 5 *) (* 12345678901234567890123456789012345678901234567890 *) (* fillstring(s, 'this-is-the-string '); the two comments make it easy to line the characters up. also, for this example, it was assumed that the length of filler as defined by the constant fillermax was 50. *) var length: integer; (* of the string without trailing blanks *) index: integer; (* of s *) begin (* fillstring *) clearstring(s); length := fillermax; while (length > 1) and (a[length] = ' ') do length := pred(length); if (length = 1) and (a[length] = ' ') then begin writeln(output, 'fillstring: the string is empty'); halt end; for index := 1 to length do s.letters[index] := a[index]; s.length := length; s.current := 1 end; (* fillstring *) (* end module filler.fillstring version = 4.09; (@ of prgmod.p 1990 May 18 *) (* begin module filler.filltrigger *) procedure filltrigger(var t: trigger; a: filler); (* fill the trigger t *) begin (* filltrigger *) fillstring(t.seek,a) end; (* fillstring *) (* end module filler.filltrigger version = 4.09; (@ of prgmod.p 1990 May 18 *) (* begin module trigger.proc *) (* this module allows one to scan a series of characters, as from an array or a file, and to "trigger" or detect a simple string in the series. the advantage of the trigger is that several triggers can "observe" a stream of characters at once, each looking for a different thing. some other modules required: interact.const, interact.type *) procedure resettrigger(var t: trigger); (* reset the trigger to ground state *) begin (* resettrigger *) with t do begin state := 0; skip := false; found := false end end; (* resettrigger *) procedure testfortrigger(ch: char; var t: trigger); (* look at the character ch. if it is part of the trigger (at the current trigger state), then the trigger state goes higher. if it is not part of the trigger then the trigger state is reset, skip is true and one should skip onward to find the trigger. if the trigger is found, found is true. *) begin (* testfortrigger *) with t do begin state := succ(state); (* if debugging then begin writestring(list,seek); writeln(list,'testfortrigger seek.letters[',state:1,']:', seek.letters[state],' ch:',ch); end;*) if seek.letters[state] = ch then begin skip := false; if state = seek.length then found := true else found := false end else begin (* reset trigger *) state := 0; skip := true; found := false end end end; (* testfortrigger *) (* end module trigger.proc version = 4.09; (@ of prgmod.p 1990 May 18 *) (* begin module cnsr.tocharacter *) function tocharacter(n: integer): char; (* convert the integer n to a character *) begin tocharacter := chr(n + ord('0')); end; (* end module cnsr.tocharacter *) (* begin module cnsr.writeedit *) procedure writeedit(var tofile: text; state: integer; s, e: string); (* write the string s to file tofile, with writeln. Edit out the portions of s for which e is '1' *) { of s for which e is '2','3','5', or '6' *) OLD } var doreturn: boolean; (* if there were any printed characters, be sure to produce a carriage return for the line. *) i: integer; (* index to s *) printing: boolean; (* if true, print *) begin (* writeedit *) doreturn := false; if s.length > 0 then for i := 1 to s.length do begin { printing := e.letters[i] in ['0','1','4']; } printing := e.letters[i] in ['0']; if printing then begin write(tofile, s.letters[i]); doreturn := true end else doreturn := false end else doreturn := (state = 0); { else doreturn := (state = 0) or (state = 1) or (state = 4); } doreturn := (state = 0); (* if we are at the end of the line, and the state is to print, put a carriage return *) if doreturn then writeln(tofile); end; (* writeedit *) (* end module cnsr.writeedit *) (* begin module cnsr.themain *) procedure themain(var infile, outfile: text); (* the main procedure of the program. *) var buffer: string; (* buffer of a line from infile *) c: char; (* a character from the buffer *) debugging: boolean; (* set to true if debugging *) idline: string; (* identifier line for the states *) gotten: boolean; (* if true, a line was obtained *) state: integer; (* state of the program. state = 0; scan and copy text when '[[' is found, move to state 1 t1b state = 1; scan and delete text when ']]' is found, move to state 0 t2b *) readpoint: integer; (* the point we are 'reading' in the buffer *) t1b,t2b: trigger; (* triggers for each state *) procedure fillback(back: integer); (* fill the buffer back several spots up to the current spot *) var spot: integer; (* a point on the buffer *) begin if debugging then begin writestring(outfile,idline); writeln(outfile,'| fillback before'); end; for spot := readpoint - back to readpoint do idline.letters[spot] := tocharacter(state); if debugging then begin writestring(outfile,idline); writeln(outfile,'| fillback after'); end; end; begin debugging := true; (* set to true if debugging *) debugging := false; (* set to true if debugging *) state := 0; (* 123456789- *) filltrigger(t1b,'[[ '); filltrigger(t2b,']] '); while not eof(infile) do begin getstring(infile,buffer,gotten); if gotten then begin if debugging then begin writestring(outfile,buffer); writeln(outfile); end; clearstring(idline); resettrigger(t1b); resettrigger(t2b); idline.length := buffer.length; for readpoint := 1 to buffer.length do begin idline.letters[readpoint] := tocharacter(state); c := buffer.letters[readpoint]; testfortrigger(c,t1b); testfortrigger(c,t2b); case state of 0: begin if t1b.found then begin state := 1; fillback(1); end else if t2b.found then begin state := 0; fillback(0); end; end; 1: begin if t1b.found then begin state := 1; fillback(1); end else if t2b.found then begin state := 0; end; end; end; end; if debugging then begin writestring(outfile,idline); write(outfile,'|'); write(outfile,' state = ', state:1); writeln(outfile); end; writeedit(outfile,state,buffer,idline); end; end; end; (* end module cnsr.themain *) begin themain(input,output); 1: end.