program decom(input, output); (* decom: remove comment starts from within a comment Dr. Thomas D. Schneider National Cancer Institute Laboratory of Experimental and Computational Biology Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu http://www.lecb.ncifcrf.gov/~toms/ *) const (* begin module version *) version = 1.07; (* of decom.p 2000 Oct 24 2000 Oct 24, 1.06: upgrade documentation 1996 Feb 19, 1.05: previous version rebuilt to handle all cases 1996 Feb 19 origin 1988 jan 6 *) (* end module version *) (* begin module describe.decom *) (* name decom: remove comments from within a comment synopsis decom(input: in; output: out) files input: a program having comments within comments. output: the same program with internal comments neutralized. description In Pascal there are two kinds of comments, brace and two-character. On occasion one will have one of these inside the other (eg, this was originally generated by the module.p program). Some compilers cannot handle this situation. This program destroys the comments inside other comments by replacing their parts with other characters. It is smart enough not to destroy comments inside quote strings. see also {Program mentioned in the description above: } module.p {Other comment manipulation programs: } nocom.p codecomments.p {The gpc Gnu Pascal Compiler} http://agnes.dida.physik.uni-essen.de/~gnu-pascal/home.html {can handle embeded comments by using the --no-mixed-comments flag. See:} ftp://ftp.ncifcrf.gov/pub/delila/gpcc author Thomas Dana Schneider bugs technical notes The program defines several characters that are used to replace comment characters. If the ones provided do not work on your system, you can use different ones. The most convenient one is one which rarely occurs in a program and yet is easily found by a search in your editor. In Unix for the vi editor the pound sign '#' is a reasonable choice. *) (* end module describe.decom *) const debug = false; (* turn on to see states changing *) zap = '#'; (* general replacement character *) zap2begin = zap; (* character to replace brace comment begin *) zap2end = zap; (* character to replace brace comment end *) zap3begin = zap; (* character to replace two-char comment begin *) zap3end = zap; (* character to replace two-char comment end *) var c: char; (* the current character just read *) o: char; (* the output character *) p: char; (* the character previous to c *) state: integer; (* state of the program. The program moves between four states depending on the characters it sees: 0: outside comments and quotes 1: inside quotes 2: inside brace comment 3: inside two-character comment *) begin state := 0; (* start outside the comments *) c := ' '; (* previous character is neutral *) while not eof(input) do begin while not eoln(input) do begin p := c; read(input,c); o := c; (* presume we will output this, subject to below *) case state of 0: begin (* outside comments and quotes *) if c = '''' then state := 1; if c = '{' then state := 2; if (p = '(') and (c = '*') then state := 3; if debug then o := '0'; end; 1: begin (* inside quotes *) if c = '''' then state := 0; if debug then o := '1'; end; 2: begin (* inside brace comment *) if (p = '(') and (c = '*') then o := zap2begin; if (p = '*') and (c = ')') then o := zap2end; if c = '}' then state := 0; if debug then o := '2'; end; 3: begin (* inside two-character type comment *) if c = '{' then o := zap3begin; if c = '}' then o := zap3end; if (p = '*') and (c = ')') then state := 0; if debug then o := '3'; end; end; write(output,o); end; readln(input); writeln(output); p := ' '; (* ignore last character on previous line *) end; end.