/* Output from p2c 1.21alpha-07.Dec.93, the Pascal-to-C translator */ /* From input file "dbclean.p" */ #include /* dbclean: remove non-entry material Tom Schneider NCI/FCRDC Bldg 469. Room 144 P.O. Box B Frederick, MD 21702-1201 (301) 846-5581 (-5532 for messages) toms@ncifcrf.gov http://www-lmmb.ncifcrf.gov/~toms/ National Cancer Institute Laboratory of Mathematical Biology */ /* end of program */ /* begin module version */ #define version 1.05 /* of dbclean.p 1996 Oct 8 origin 1996 August 24 */ /* end module version */ /* begin module describe.dbclean */ /* name dbclean: remove non-entry material synopsis dbclean(dbin: in, dbout: out) files dbin: a file containing genbank flat-file entries and any other junk. dbout: just the genbank entires without the junk. output: messages to the user description The retrieve@ncbi.nlm.nih.gov will return genbank entries with other junk. This program removes the junk. examples documentation see also author Thomas Dana Schneider bugs technical notes */ /* end module describe.dbclean */ /* begin module interact.const */ #define maxstring 150 /* the maximum string */ /* end module interact.const version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module my.filler.const */ #define fillermax 20 /* the size of the filler array for a string */ /* end module my.filler.const */ /* from filler.const version = 4.13; (@ of prgmod.p 1994 sep 5 */ /* begin module interact.type */ typedef struct string { /* a string of characters */ Char letters[maxstring]; /* the letters in the string */ long length; /* the number of characters in the string */ long current; /* the letter we are working on */ } string; /* end module interact.type version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module filler.type */ /* the following is an array used to fill a string. it is convenient to have it much shorter than the maxstring, so that it is easy to fill the string using procedure fillstring. the user must declare the value of constant fillermax. */ typedef Char filler[fillermax]; /* end module filler.type version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module trigger.type */ typedef struct trigger { /* an object to be searched for */ string seek; /* the characters looked for */ long state; /* how close to triggering we are */ boolean skip; /* trigger not found- skip the line */ /* the trigger was found */ boolean found; } trigger; /* end module trigger.type version = 4.16; (@ of prgmod.p 1996 August 12 */ Static _TEXT dbin, dbout; /* files used by this program */ Static jmp_buf _JL1; /* begin module halt */ Static Void halt() { /* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. */ printf(" program halt.\n"); longjmp(_JL1, 1); } /* end module halt version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module interact.clearstring */ Static Void clearstring(ribbon) string *ribbon; { /* empty the string */ long index; /* to the ribbon */ for (index = 0; index < maxstring; index++) ribbon->letters[index] = ' '; ribbon->length = 0; ribbon->current = 0; } /* clearstring */ /* end module interact.clearstring version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module interact.writestring */ Static Void writestring(tofile, s) _TEXT *tofile; string *s; { /* write the string s to file tofile, no writeln */ long i; /* index to s */ long FORLIM; FORLIM = s->length; for (i = 0; i < FORLIM; i++) putc(s->letters[i], tofile->f); } /* writestring */ /* end module interact.writestring version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module filler.fillstring */ Static Void fillstring(s, a) string *s; Char *a; { /* this procedure makes it reasonably easy to fill the string s with characters. one calls the procedure as: */ /* 1 2 3 4 5 */ /* 12345678901234567890123456789012345678901234567890 */ /* fillstring(s, 'this-is-the-string '); the two comments make it easy to line the characters up. also, for this example, it was assumed that the length of filler as defined by the constant fillermax was 50. */ long length = fillermax; /* of the string without trailing blanks */ long index; /* of s */ clearstring(s); while (length > 1 && a[length-1] == ' ') length--; if (length == 1 && a[length-1] == ' ') { printf("fillstring: the string is empty\n"); halt(); } for (index = 0; index < length; index++) s->letters[index] = a[index]; s->length = length; s->current = 1; } /* fillstring */ /* end module filler.fillstring version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module filler.filltrigger */ Static Void filltrigger(t, a) trigger *t; Char *a; { /* fill the trigger t */ fillstring(&t->seek, a); } /* fillstring */ /* end module filler.filltrigger version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module trigger.proc */ /* this module allows one to scan a series of characters, as from an array or a file, and to "trigger" or detect a simple string in the series. the advantage of the trigger is that several triggers can "observe" a stream of characters at once, each looking for a different thing. some other modules required: interact.const, interact.type */ Static Void resettrigger(t) trigger *t; { /* reset the trigger to ground state */ t->state = 0; t->skip = false; t->found = false; } /* resettrigger */ Static Void testfortrigger(ch, t) Char ch; trigger *t; { /* look at the character ch. if it is part of the trigger (at the current trigger state), then the trigger state goes higher. if it is not part of the trigger then the trigger state is reset, skip is true and one should skip onward to find the trigger. if the trigger is found, found is true. */ t->state++; /* writestring(output,seek); writeln(output,'testfortrigger seek.letters[',state:1,']:"', seek.letters[state],'" ch:"',ch,'"'); writeln(output,'ord(ch) = ',ord(ch):1); writeln(output,'ord(seek.letters[state]) = ',ord(seek.letters[state]):1); */ /* if debugging then begin writestring(list,seek); writeln(list,'testfortrigger seek.letters[',state:1,']:', seek.letters[state],' ch:',ch); end;*/ if (t->seek.letters[t->state - 1] == ch) { t->skip = false; if (t->state == t->seek.length) t->found = true; else t->found = false; return; } t->state = 0; t->skip = true; t->found = false; /* reset trigger */ } /* testfortrigger */ /* end module trigger.proc version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module copyaline */ Static Void copyaline(fin, fout) _TEXT *fin, *fout; { /* copy a line from file fin to file fout */ while (!P_eoln(fin->f)) { putc(P_peek(fin->f), fout->f); getc(fin->f); } fscanf(fin->f, "%*[^\n]"); getc(fin->f); putc('\n', fout->f); } /* copyaline */ /* end module copyaline version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module copyline */ Static Void copyline(fin, fout) _TEXT *fin, *fout; { /* copy a line from file fin to file fout but DO NOT CARRIAGE RETURN on the fout. Carriage return on the fin. */ while (!P_eoln(fin->f)) { putc(P_peek(fin->f), fout->f); getc(fin->f); } fscanf(fin->f, "%*[^\n]"); getc(fin->f); } /* copyline */ /* end module copyline version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module skipblanks */ Static Void skipblanks(thefile) _TEXT *thefile; { /* skip over blanks until a non-blank, or end of line, is found */ while ((P_peek(thefile->f) == ' ') & (!P_eoln(thefile->f))) getc(thefile->f); } Static Void skipnonblanks(thefile) _TEXT *thefile; { /* skip over nonblanks until a blank, or end of line, is found */ while ((P_peek(thefile->f) != ' ') & (!P_eoln(thefile->f))) getc(thefile->f); } Static Void skipcolumn(thefile) _TEXT *thefile; { /* skip over a data column */ skipblanks(thefile); skipnonblanks(thefile); } /* end module skipblanks version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module grabtoken */ Static Void grabtoken(thefile, thestring) _TEXT *thefile; string *thestring; { /* skip any blanks and then grab the next token from the file */ Char c; /* a character in thefile */ boolean done = false; /* done finding the name */ skipblanks(thefile); thestring->length = 0; while (!done) { if (P_eoln(thefile->f)) { done = true; break; } c = getc(thefile->f); if (c == '\n') c = ' '; if (c == ' ') done = true; else { thestring->length++; thestring->letters[thestring->length - 1] = c; } } } /* end module grabtoken version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module dbclean.themain */ Static Void themain(dbin, dbout) _TEXT *dbin, *dbout; { /* the main procedure of the program */ Char c; /* a character in db */ boolean done; /* done copying locus? */ long linenumber = 1; /* count of the lines in db */ trigger locus; /* trigger to find the LOCUS pattern */ trigger locusend; /* trigger to find the end of the LOCUS pattern */ long locuscount = 0; /* count of locus */ printf("dbclean %4.2f\n", version); if (*dbin->name != '\0') { if (dbin->f != NULL) dbin->f = freopen(dbin->name, "r", dbin->f); else dbin->f = fopen(dbin->name, "r"); } else rewind(dbin->f); if (dbin->f == NULL) _EscIO2(FileNotFound, dbin->name); RESETBUF(dbin->f, Char); if (*dbout->name != '\0') { if (dbout->f != NULL) dbout->f = freopen(dbout->name, "w", dbout->f); else dbout->f = fopen(dbout->name, "w"); } else { if (dbout->f != NULL) rewind(dbout->f); else dbout->f = tmpfile(); } if (dbout->f == NULL) _EscIO2(FileNotFound, dbout->name); SETUPBUF(dbout->f, Char); /* 1 2 3 4 5 */ /* 12345678901234567890123456789012345678901234567890 */ filltrigger(&locus, "LOCUS "); filltrigger(&locusend, "// "); resettrigger(&locus); resettrigger(&locusend); /* we are already at the first line */ while (!BUFEOF(dbin->f)) { c = getc(dbin->f); if (c == '\n') c = ' '; testfortrigger(c, &locus); if (locus.found) { locuscount++; fprintf(dbout->f, "LOCUS"); /* copy the locus */ done = false; while (!done) { while (!P_eoln(dbin->f)) { c = getc(dbin->f); if (c == '\n') c = ' '; putc(c, dbout->f); testfortrigger(c, &locusend); if (locusend.found) { putc('\n', dbout->f); done = true; } /* no need to copy the locusend, but add blank: */ } resettrigger(&locus); resettrigger(&locusend); fscanf(dbin->f, "%*[^\n]"); getc(dbin->f); putc('\n', dbout->f); if (BUFEOF(dbin->f)) { if (done) break; printf("BAD DATABASE: end of dbin before end of an entry\n"); halt(); } } } if (BUFEOF(dbin->f)) break; if (P_eoln(dbin->f)) { fscanf(dbin->f, "%*[^\n]"); getc(dbin->f); } } if (locuscount != 1) printf("%ld loci\n", locuscount); else printf("%ld locus\n", locuscount); } /* end module dbclean.themain */ main(argc, argv) int argc; Char *argv[]; { PASCAL_MAIN(argc, argv); if (setjmp(_JL1)) goto _L1; dbout.f = NULL; strcpy(dbout.name, "dbout"); dbin.f = NULL; strcpy(dbin.name, "dbin"); themain(&dbin, &dbout); _L1: if (dbin.f != NULL) fclose(dbin.f); if (dbout.f != NULL) fclose(dbout.f); exit(EXIT_SUCCESS); } /* End. */