program htmlink(input, htmlinkp, list, output); (* htmlink: insert html links to program references in a file Dr. Thomas D. Schneider National Cancer Institute Laboratory of Experimental and Computational Biology Molecular Information Theory Group Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu (use only if first address fails) http://www.lecb.ncifcrf.gov/~toms/ *) label 1; (* end of program *) const (* begin module version *) version = 1.52; (* of htmlink.p 2004 Sep 15 2004 Sep 15: 1.52: document upgrade 2004 Jul 29: 1.48: update url for gpc 2004 Jul 29: 1.47: use strong 1.46; 2000 Oct 24: implement ftp:// 1.40; 2000 Jul 16: implement links for images: .gif and .jpg 1.39; 2000 Jul 15: "see also" only recognized at start of line!! 1.36; 2000 Jun 20: make {} comment marks go away on the html page. 1.35; 2000 Jan 10: fix bug in final pointer at page bottom 1.28; 2000 Jan 3: upgrade to recognize http links, clean up and do {}. 1.27; 1998 Mar 30: previous changes origin 1995 March 10 *) (* end module version *) (* begin module describe.htmlink *) (* name htmlink: insert html links to program references in a file synopsis htmlink(input: in, htmlinkp: in, list: out, output: out) files input: a delila module or other file containing references to programs in the form *.p and references to other files between the key words "see also" and "author". Comments: the material contained within curlie brackets, "{" and "}", will not be processed into hyper links. To create one of these characters, put a backslash "\" in front of it. The key words "see also" and "author" MUST be at the beginning of the line. This allows one to say "see also" in the middle of other descriptions. This is not a harsh restriction, since everything is indented as a standard anyway! htmlinkp: A parameter file that defines: first line: the ARCHIVE where files other than programs should be linked to. second line: font size for the output. list: a list of the files being linked to. output: the same file with the *.p converted to references to *.html, and the other references converted to hypertext links to ARCHIVE/reference. To prevent weird links from being made, this only occurs between the lines 'see also' and 'author', when these key words are at the start of the line. New as of 2000 Jan 3: names that begin with http:// are presented as straight html links. New as of 2000 Oct 24: names that begin with ftp:// are presented as straight html links. description This program allows one to convert the delila manual pages to hypertext linked objects. The program copies the input to the output, and only modifies the region between key words 'see also' and 'author' when these are left justified on the line. This defines the standard place that Delila documentation points to other files. Pascal programs ending in ".p" are made into the form: [A HREF ="doodle.html"]doodle.p[/A] (where the program is 'doodle.p and I have replaced "less than" with "[" and "greater than" with "]" to prevent the browser from interpreting this as a link.) so that they will point to other programs in the same directory, while other files are made into the form: [A HREF ="ARCHIVE/xyplop"]xyplop[/A] where ARCHIVE is a string read in from the htmlinkp parameter file and the file is "xyplop". The program is used by the shell script "dth" (delila to html). examples Below is the material in the "see also" section. After processing, you will see that the text looks the same except that things outside the curlie braces are hyperlinked. {\{ left curlie} { \{ left curlie} { { left curlie} {\} right curlie} { \} right curlie} { \} right curlie} { \ escape} { \\ escape} { \\\ escape} {Some programs that use this processing:} alist.p delila.p {A general hyperlink is demonstrated by my home page which is at} http://www.lecb.ncifcrf.gov/~toms/ {The mechanism even allows images!} http://www.lecb.ncifcrf.gov/~toms/icons/tinygumball.gif {An ftp hyperlink to the a script for running the Gnu Pascal Compiler:} ftp://ftp.ncifcrf.gov/pub/delila/gpcc {(The GNU Pascal Compiler is at} http://www.gnu-pascal.de/gpc/h-index.html{)} documentation see also {Example parameter file:} htmlinkp {A program that extracts the manual page of Delila programs:} makman.p {A general script for storing Delila programs in an archive:} dth {Here is the processing of the examples above.} {\{ left curlie} { \{ left curlie} { { left curlie} {\} right curlie} { \} right curlie} { \} right curlie} { \ escape} { \\ escape} { \\\ escape} {Some programs that use this processing:} alist.p delila.p {A general hyperlink is demonstrated by my home page which is at} http://www.lecb.ncifcrf.gov/~toms/ {The mechanism even allows images!} http://www.lecb.ncifcrf.gov/~toms/icons/tinygumball.gif {An ftp hyperlink to the a script for running the Gnu Pascal Compiler:} ftp://ftp.ncifcrf.gov/pub/delila/gpcc {(The GNU Pascal Compiler is at} http://www.gnu-pascal.de/gpc/h-index.html{)} author Thomas Dana Schneider bugs The triggers "see also" and "author" and their positions on the line should be generalized to allow the program to process any text. This would not be hard since it would only require new parameters being read in. technical notes Note: this program surrounds the file with [PRE] and [/PRE]. *) (* end module describe.htmlink *) (* more constants *) maxline = 100; (* longest line allowed *) fontsize = 2; (* size of fonts *) (* begin module interact.const *) maxstring = 150; (* the maximum string *) (* end module interact.const version = 4.15; (@ of prgmod.p 1994 November 12 *) (* begin module filler.const *) fillermax = 50; (* the size of the filler array for a string *) (* end module filler.const version = 4.15; (@ of prgmod.p 1994 November 12 *) type (* begin module interact.type *) string = record (* a string of characters *) letters: array[1..maxstring] of char; (* the letters in the string *) length: integer; (* the number of characters in the string *) current: integer; (* the letter we are working on *) end; (* end module interact.type version = 4.15; (@ of prgmod.p 1994 November 12 *) (* begin module trigger.type *) trigger = record (* an object to be searched for *) seek: string; (* the characters looked for *) state: integer; (* how close to triggering we are *) skip: boolean; (* trigger not found- skip the line *) found: boolean (* the trigger was found *) end; (* end module trigger.type version = 4.15; (@ of prgmod.p 1994 November 12 *) (* begin module filler.type *) (* the following is an array used to fill a string. it is convenient to have it much shorter than the maxstring, so that it is easy to fill the string using procedure fillstring. the user must declare the value of constant fillermax. *) filler = packed array[1..fillermax] of char; (* end module filler.type version = 4.15; (@ of prgmod.p 1994 November 12 *) var (* begin module const.htmlink *) htmlinkp: text; (* parameters to control the program *) list: text; (* list of files linked to *) (* end module const.htmlink *) (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 4.15; (@ of prgmod.p 1994 November 12 *) (* begin module interact.clearstring *) procedure clearstring(var ribbon: string); (* empty the string *) var index: integer; (* to the ribbon *) begin (* clearstring *) with ribbon do begin for index := 1 to maxstring do letters[index] := ' '; length := 0; current := 0; end end; (* clearstring *) (* end module interact.clearstring version = 4.15; (@ of prgmod.p 1994 November 12 *) (* begin module filler.fillstring *) procedure fillstring(var s: string; a: filler); (* this procedure makes it reasonably easy to fill the string s with characters. one calls the procedure as: *) (* 1 2 3 4 5 *) (* 12345678901234567890123456789012345678901234567890 *) (* fillstring(s, 'this-is-the-string '); the two comments make it easy to line the characters up. also, for this example, it was assumed that the length of filler as defined by the constant fillermax was 50. *) var length: integer; (* of the string without trailing blanks *) index: integer; (* of s *) begin (* fillstring *) clearstring(s); length := fillermax; while (length > 1) and (a[length] = ' ') do length := pred(length); if (length = 1) and (a[length] = ' ') then begin writeln(output, 'fillstring: the string is empty'); halt end; for index := 1 to length do s.letters[index] := a[index]; s.length := length; s.current := 1 end; (* fillstring *) (* end module filler.fillstring version = 4.15; (@ of prgmod.p 1994 November 12 *) (* begin module filler.filltrigger *) procedure filltrigger(var t: trigger; a: filler); (* fill the trigger t *) begin (* filltrigger *) fillstring(t.seek,a) end; (* fillstring *) (* end module filler.filltrigger version = 4.15; (@ of prgmod.p 1994 November 12 *) (* begin module trigger.proc *) (* this module allows one to scan a series of characters, as from an array or a file, and to "trigger" or detect a simple string in the series. the advantage of the trigger is that several triggers can "observe" a stream of characters at once, each looking for a different thing. some other modules required: interact.const, interact.type *) procedure resettrigger(var t: trigger); (* reset the trigger to ground state *) begin (* resettrigger *) with t do begin state := 0; skip := false; found := false end end; (* resettrigger *) procedure testfortrigger(ch: char; var t: trigger); (* look at the character ch. if it is part of the trigger (at the current trigger state), then the trigger state goes higher. if it is not part of the trigger then the trigger state is reset, skip is true and one should skip onward to find the trigger. if the trigger is found, found is true. *) begin (* testfortrigger *) with t do begin state := succ(state); (* if debugging then begin writestring(list,seek); writeln(list,'testfortrigger seek.letters[',state:1,']:', seek.letters[state],' ch:',ch); end;*) if seek.letters[state] = ch then begin skip := false; if state = seek.length then found := true else found := false end else begin (* reset trigger *) state := 0; skip := true; found := false end end end; (* testfortrigger *) (* end module trigger.proc version = 4.15; (@ of prgmod.p 1994 November 12 *) (* begin module interact.getstring *) procedure getstring(var afile: text; var buffer: string; var gotten: boolean); (* get a string from a file not using string calls. this lets one obtain lines from a file without interactive prompts *) var index: integer; (* of buffer *) begin (* getstring *) clearstring(buffer); if eof(afile) then gotten := false else begin index := 0; while (not eoln(afile)) and (index < maxstring) do begin index := succ(index); read(afile, buffer.letters[index]) end; if not eoln(afile) then begin writeln(output, ' getstring: a line exceeds maximum string size (', maxstring:1,')'); halt end; buffer.length := index; buffer.current := 1; readln(afile); gotten := true end end; (* getstring *) (* end module interact.getstring version = 4.15; (@ of prgmod.p 1994 November 12 *) (* begin module interact.writestring *) procedure writestring(var tofile: text; var s: string); (* write the string s to file tofile, no writeln *) var i: integer; (* index to s *) begin (* writestring *) with s do for i := 1 to length do write(tofile, letters[i]) end; (* writestring *) (* end module interact.writestring version = 4.15; (@ of prgmod.p 1994 November 12 *) (* begin module htmlink.writealine *) procedure writealine(var fout: text; aline: string; start, stop: integer; escape: char); (* write the aline string to fout from start to stop. Do not show escape symbols. *) var c: char; (* a character in aline *) i: integer; (* index to aline *) p: char; (* the character before c *) begin c := ' '; for i := start to stop do begin p := c; c := aline.letters[i]; if p = escape then write(fout,c) else if (c <> escape) and (p <> escape) then write(fout,c); { if (c <> escape) and (p <> escape) then write(fout,c); } end; end; (* end module htmlink.writealine *) (* begin module htmlink.themain *) procedure themain(var htmlinkp, list: text); (* the main procedure of the program *) const escape = '\'; (* the character that prevents processing of { or } *) var ARCHIVE: string; (* the location of other files *) aline: string; (* a line of text *) c: char; (* a character being processed *) comment: boolean; (* true when inside a comment *) insidecomment: boolean; (* previous stuff put us inside a comment, so if we see a \{ or \} these should be SHOWN. *) cprevious: char; (* the character before c *) done: boolean; (* done parsing a link? *) l: integer; (* current position in the buffer, and its length *) fontsize: integer; (* html font size *) gotten: boolean; (* was htmlinkp readable? *) follow: integer; (* index to b that follows behind i *) i: integer; (* index to b *) image: boolean; (* true when the link should be an image *) linkprocess: boolean; (* true only when we want to process a link *) seealsocount: integer; (* count of 'see also's to prevent more than the first in the file from being used *) potentiate: boolean; (* becomes true when we want to process a link. We don't want to process on the same line that "see also" is because then "see also" becomes part of the html rather than the copied material. So we hold off (by using this variable) one line before turning linkproces on. *) pureurl: boolean; (* is this a pure url and not an archive link? *) seealso: trigger; (* trigger for "see also" *) author: trigger; (* trigger for "author" *) http: trigger; (* trigger for "http" *) ftp: trigger; (* trigger for "ftp" *) gif: trigger; (* trigger for ".gif" *) jpg: trigger; (* trigger for ".jpg" *) begin (* set up the triggers *) (* 1 2 3 4 5 *) (* 12345678901234567890123456789012345678901234567890 *) filltrigger(seealso, 'see also '); filltrigger(author , 'author '); filltrigger(http , 'http:// '); filltrigger(ftp , 'ftp:// '); filltrigger(gif , '.gif '); filltrigger(jpg , '.jpg '); reset(htmlinkp); getstring(htmlinkp,ARCHIVE,gotten); { if not gotten then begin writeln(output,'ARCHIVE pointer is empty because htmlinkp is empty'); halt end; } readln(htmlinkp, fontsize); rewrite(list); writeln(output,''); writeln(output,''); writeln(output,''); writeln(output,'
');

   clearstring(aline);
   linkprocess := false;
   potentiate := false;
   comment := false;
   seealsocount := 0;
   while not eof(input) do begin

      (* read in a line of text *)
      getstring(input,aline,gotten);
      if not gotten then begin
         writeln(output,'htmlink: program error?');
         halt
      end;

      resettrigger(seealso);
      resettrigger(author);
      for l := 1 to aline.length do begin
         c := aline.letters[l];

         testfortrigger(c, seealso);
         if l = 8 then begin (* "see also" is 8 characters long *)
            if seealso.found then begin
               seealsocount := succ(seealsocount);
               if seealsocount = 1 then potentiate := true;
            end;
         end;

         testfortrigger(c, author);
         if l = 6 then begin (* "author" is 8 characters long *)
            if author.found then begin
               linkprocess := false;
               potentiate := false;
            end;
         end;
      end;

      if linkprocess then begin (* process the link section *)

         (* parse out the named objects on one line *)
         follow := 1;

         pureurl := false;
         c := ' ';
         while follow <= aline.length do begin

{
writeln(output,'l=',follow:1);
}
            cprevious := c;
            c := aline.letters[follow];
            insidecomment := (comment = true);
            if c = '{' then comment := true;
            if (cprevious <> escape) and (c = '}') then comment := false;
            if c = ' ' then write(output,' ');
            if c = ' ' then write(list,' ');
            if insidecomment and (cprevious = escape) then begin
{
            if cprevious = escape then begin
}
               if c = '{' then write(output,'{');
               if c = '{' then write(list,'{');
               if c = '}' then write(output,'}');
               if c = '}' then write(list,'}');
            end;

            if (c <> ' ') and (c <> ',')
            and (c <> '{') and (c <> '}')
            then begin
               (* parse *)
               resettrigger(http);
               resettrigger(ftp);
               resettrigger(gif);
               resettrigger(jpg);
               image := false;
               l := follow;
{
writeln(output); write(output,'parsed string: "');
}
               done := false;
               while not done do begin
                  testfortrigger(aline.letters[l], http);
                  if http.found then pureurl := true;

                  testfortrigger(aline.letters[l], ftp);
                  if ftp.found then pureurl := true;

                  testfortrigger(aline.letters[l], gif);
                  if gif.found then image := true;

                  testfortrigger(aline.letters[l], jpg);
                  if jpg.found then image := true;

{
write(output,aline.letters[l]);
}

                  if (l = aline.length)
                  then done := true
                  else begin
                     l := succ(l);
                     if (aline.letters[l] = ' ') or
                        (aline.letters[l] = ',') or
                        (aline.letters[l] = '}') or
                        (aline.letters[l] = '{')
                     then begin
                         done := true;
                         l := pred(l); (* don't include that *)
                     end
                  end
               end;
{
writeln(output,'"');
}

               if comment then begin
                  writealine(output, aline, follow, l, escape);
                  writealine(list,   aline, follow, l, escape);
               end
               else begin
                  (* if we just found *.p, write it out as HTML *)
                  if (aline.letters[l-1] = '.') and (aline.letters[l] = 'p')
                  then begin
                     write(output,''); (* replaces .p with .html *)
                     for i := follow to l do write(output,aline.letters[i]);
                     write(output,'');
                     for i := follow to l do write(list,aline.letters[i]);
                  end

                  else begin

                     if image then begin
                        write(output,'');

                     if not image then begin
                        (* complete the link by giving it again: *)
                        for i := follow to l
                        do write(output,aline.letters[i]);
                        write(output,'');
                     end;

                     for i := follow to l
                     do write(list,aline.letters[i]);

                  end;
               end;

               follow := l; (* step parsing forward across the word *)
            end;
            if c = ',' then write(output,',');
            follow := succ(follow);

         end;

         writeln(output);
         writeln(list);
      end
      else begin (* just copy the line *)
         writestring(output,aline);
         writeln(output);
      end;

      (* this line had a "see also" so process the next line *)
      if potentiate then linkprocess := true;

   end;

   writeln(output,'
'); writeln(output,'
'); writeln(output,'{created by htmlink ', version:4:2 ,'}'); end; (* end module htmlink.themain *) begin themain(htmlinkp, list); 1: end.