program untex(input,output); (* untex: remove tex and latex constructs Dr. Thomas D. Schneider National Cancer Institute Laboratory of Experimental and Computational Biology Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu (use only if first address fails) http://www.lecb.ncifcrf.gov/~toms/ module libraries required: delman, prgmods *) label 1; (* end of program *) const (* begin module version *) version = 1.41; (* of untex.p 2002 Oct 14 2002 Oct 14: 1.41: update documentation 2002 Oct 14: 1.40: finish \command removal: recursion! 2002 Oct 14: 1.39: '\command' becomes nothing. removecommand created 2001 Nov 16: 1.38: '\command{stuff}' becomes just 'stuff' 2001 Jul 13: 1.37: tilde (~) becomes space 2001 May 4: 1.36: underscore (_) removed 1999 April 5: 1.35: '\xxx{stuff}' is converted to ' stuff' so that \emph{italics} are ok origin 1988 march 1 from untitle *) (* end module version *) (* begin module describe.untex *) (* name untex: remove tex and latex constructs synopsis untex(input: in, output: out) files input: a tex or latex file output: the file with: '\xxx' command words converted to spaces, '{$_}' deleted '~' converted to spaces free floating '.' ',' '(' ')' removed comments (%) removed \% is turned into % to preserve the percent '\xxx{stuff}' is converted to ' stuff' so that \emph{italics} are ok '\xxx{\another{thing}}' is converted to ' thing' by recursion. multiple spaces are comressed to single spaces. multiple lines are compressed to 2 lines (to preserve the paragraph structure). description This reduces the number of words counted by wc to something close to correct. author Thomas D. Schneider bugs citations and comments on lines by themselves leave a blank line. *) (* end module describe.untex *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 'delmod 6.16 84 mar 12 tds/gds'; *) (* begin module themain *) procedure themain(var fin, fout: text); (* the main procedure of the program *) var blanks: integer; (* number of blanks stored up so far *) ch: char; (* a character read from fin *) lines: integer; (* count of number of blank lines stored up so far *) procedure out(c: char); (* output the character c. reduce the blanks before c. '\' means a carriage return is to be output *) begin if c = ' ' then blanks := blanks + 1 else if c <> '\' then begin (* output any lines that were stored *) if lines > 0 then begin if lines > 1 then writeln(fout); (* paragraph representation *) writeln(fout); lines := 0; (* reset stored lines to zero if characters written *) blanks := 0; (* wipe out the previous blanks *) end; if blanks > 0 then begin if blanks = 2 then write(fout,' ') else write(fout,' '); end; write(fout,c); blanks := 0; end else begin (* store up blank lines *) lines := lines + 1; end end; procedure removecommand; (* remove a whole command following a \ This should handle the form: \newcommand{\runningtitle}{A title goes here} further test cases: \markright{Bush {\em et al.}, \runningtitle} %Running title: \runningtitle *) begin read(fin,ch); (* look at the next character *) if (ch = '%') or (ch = '&') then begin (* keep the percent!!! *) out(ch) end else begin out(' '); (* replace previous character with a space *) while (not eoln(fin)) and (ch in ['a','b','c','d','e','f','g','h','i','j','k','l', 'm','n','o','p','q','r','s','t','u','v','w','x','y','z', 'A','B','C','D','E','F','G','H','I','J','K','L', 'M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z', '/']) do read(fin,ch); if ch in ['{','['] then begin (* oh oh an argument to delete! - old method *) (* that deleted \emph{italics} so keep it: *) write(fout,' '); (* make a blank for this *) while (not eoln(fin)) and (ch <> '}') do begin read(fin,ch); if ch = '\' then removecommand (* RECURSE!! *) else if not (ch in ['}',']']) then write(fout,ch); end; end end end; (* removecommand *) begin blanks := 0; lines := 0; {write(output,'GIMEA BREAK!!');} while not eof(fin) do begin while not eoln(fin) do begin read(fin,ch); (* bracketed text and equations *) if ch in ['{','$','_','}'] then begin (* drop it!! *) end else if ch in ['~'] then out(' ') (* the following is not good to do because it deletes leading parens!!! *) (* free floating commas or periods eg after "\cite{...}." and parens *) { else if (ch in ['.',',','(',')']) and (blanks>0) then begin (* nothing! *) end } (* commands *) else if ch = '\' then begin (* delete the whole command *) removecommand; end (* comments *) else if ch = '%' then begin(* delete comment line *) (* note: \% is handled by the previous step *) while not eoln(fin) do read(fin,ch) end (* other text *) else out(ch); end; readln(fin); writeln(fout); { This misses some times ... out('\'); } end; writeln(fout) end; (* end module untex.themain *) begin themain(input, output); 1: end.