/* Output from p2c 1.21alpha-07.Dec.93, the Pascal-to-C translator */ /* From input file "sortbibtex.p" */ #include /* sortbibtex: sort a bibtex database Dr. Thomas D. Schneider National Institutes of Health National Cancer Institute Center for Cancer Research Nanobiology Program Molecular Information Theory Group Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu (use only if first address fails) http://www.ccrnp.ncifcrf.gov/~toms/ */ /* end of program */ /* begin module version */ #define version 2.29 /* of sortbibtex.p 2007 Jun 14 2007 Jun 14, 2.29: remove extra blanks between entries 2007 Jun 14, 2.28: remove duplicates 2005 May 11, 2.27: Make compatable with the Gnu Pascal Compiler (GPC) Mostly this is that GPC cannot read into packed arrays, so I switched to unpacked arrays. Hopefully there is enough memory to do the sort! 1997 Apr 15, 2.14: upgrade to sort on year forwards or backwards origin 1990 May 2 */ #define updateversion 2.25 /* defines lowest acceptable current parameter file */ /* end module version */ /* begin module describe.sortbibtex */ /* name sortbibtex: sort a bibtex database synopsis sortbibtex(fin: in, sortbibtexp: in, fout: out, output: out) files fin: a bibtex database Each entry is identified by the '@' symbol; all other lines are ignored. This means that if you have blanks on lines between entries, the entries will be fused together. To avoid this, pass the database through the rembla program first. sortbibtexp: parameters to control the program. The file must contain the following parameters, one per line: The version number of the program. This allows the user to be warned if an old parameter file is used. sortcontrol: the first character on the line controls the sorting. k(ey): sort on key y(ear): sort on year r(everse year): sort on year in reverse order numbered: if the first character is 'n', number the entries in increasing year order. removeduplicates: if the first character is 'r', remove duplicate entries. If two entries are not the same data will be lost. fout: bibtex database sorted by the key output: messages to the user, including errors in the structure of the database and duplicate entries. description Sort a BibTeX database by the citation keys. If you want to collect unverified or incomplete references in a raw database as BiBTeX format, you can replace the "@" of an entry with a "#". Sortbibtex will drop these entries, giving a functional database. examples example of sortbibtexp: 2.14 version of sortbibtex that this parameter file is designed for. y sortcontrol: k(ey), y(ear), r(everse year) n numbered: n means number documentation see also rembla.p author Thomas Dana Schneider bugs Entries are defined by blank lines. Use rembla to make sure that there are no extra spaces on the ends of lines. technical notes */ /* end module describe.sortbibtex */ /* const */ #define mapmax 200000L /* largest number of entries that can be handled by the program. It determines the size of the map array. */ #define linewidth 80 /* maximum width of lines in the file (one should never exceed 80 characters. This is for safety of transportation of files on tape and over the net. It also avoids confusion with wrapped lines. */ typedef long position; /* somewhere on the map. note: position 0 is not used, but it allows the quicksort to function properly */ /*GPC cannot read into packed arrays entryline = packed record (* a line of an entry *) */ typedef struct entryline { /* a line of an entry */ Char string[linewidth]; /* a character string */ long stringlength; /* length of the string */ struct entryline *next; /* pointer to the next line */ } entryline; /* pointer to an entry */ /*GPC cannot read into packed arrays entry = packed record (* entry of a bibtex database *) */ typedef struct entry_ { /* entry of a bibtex database */ Char key[linewidth]; /* the citation key to sort on */ long yearkey; /* the year taken from the key (for sorting) */ long yearentry; /* the year taken from the entry (for sorting) */ entryline *line; /* the lines of the entry */ long number; /* the entry number */ } entry_; Static _TEXT fin, sortbibtexp, fout; /* files used by this program */ /* the entire set of entries read in */ /*GPC cannot read into packed arrays map: packed array[1..mapmax] of entryptr; */ Static entry_ *map[mapmax]; Static Char sortcontrol; /* k(ey), y(ear), r(everse year) */ Static jmp_buf _JL1; /* begin module halt */ Static Void halt() { /* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. */ printf(" program halt.\n"); longjmp(_JL1, 1); } /* end module halt version = 'prgmod 4.05 89 Aug 28 tds'; */ Static Void writekey(f, e) _TEXT *f; entry_ *e; { /* write the key of the entry e */ long k = 1; /* position in a key */ while (e->key[k-1] != ' ') { putc(e->key[k-1], f->f); k++; } } Static boolean lessthan(alow, blow) position alow, blow; { /* Is the entry at alow less than (before, alphabetically or by year) the entry at blow? */ boolean Result; Char a, b; /* characters in the two keys */ boolean done; /* are we done yet? */ long k; /* position in a key */ Char sortit; /* intermediate sort control */ if (sortcontrol == 'k') sortit = sortcontrol; else { if (map[alow-1]->yearentry == map[blow-1]->yearentry || map[alow-1]->yearentry == 0 || map[blow-1]->yearentry == 0) sortit = 'k'; else sortit = sortcontrol; if (map[alow-1]->yearentry == 0 && map[blow-1]->yearentry != 0) sortit = 'a'; if (map[blow-1]->yearentry == 0 && map[alow-1]->yearentry != 0) sortit = 'b'; } /* if the years are the same, or one is 0 (no year) then sort on the key */ /* writeln('sortcontrol=',sortcontrol); writeln('sortit=',sortit); */ /* writeln(output,'-----------'); write(output,'sortit=',sortit); if alow = blow then write(output,' EQUAL') else write(output,' -----'); write(output,' alow=',alow:2); write(output,' blow=',blow:2); write(output,' "'); writekey(output,map[alow]); write(output,' " vs "'); writekey(output,map[blow]); writeln(output,'"'); */ switch (sortit) { case 'a': Result = true; break; case 'b': Result = false; break; case 'y': if (map[alow-1]->yearentry < map[blow-1]->yearentry) Result = true; else Result = false; break; case 'r': if (map[alow-1]->yearentry < map[blow-1]->yearentry) Result = false; else Result = true; break; case 'k': done = false; k = 0; /* writeln(output,'lessthan:'); write(output,'"'); writekey(output,map[alow]); write(output,'" vs "'); writekey(output,map[blow]); writeln(output,'"'); */ while (!done) { k++; a = map[alow-1]->key[k-1]; b = map[blow-1]->key[k-1]; /* writeln(output,'a=',a,' b=',b); */ if (a == ' ' && b == ' ') { /* writeln(output,'identical entry keys: '); writekey(output,map[alow]); */ done = true; Result = false; } if (done) break; if (a == ' ' || b == ' ') { done = true; if (a == ' ') Result = true; else Result = false; continue; } if (a < b) { /* writekey(output,map[alow]); write(output,' < '); writekey(output,map[blow]); writeln(output); */ Result = true; done = true; } if (a > b) { /* writekey(output,map[alow]); write(output,' > '); writekey(output,map[blow]); writeln(output); */ Result = false; done = true; } } break; } return Result; } Static Void swap_(a, b) position a, b; { /* switch positions a and b */ entry_ *hold; hold = map[a-1]; map[a-1] = map[b-1]; map[b-1] = hold; } /* begin module quicksort */ Static Void quicksort(left, right) position left, right; { /* quick sort a list between positions left and right, into ascending order. a position is simply a scalar of the form 0..max. the array to be sorted is dimensioned 1..max. (the difference in the ranges is important to the correct operation of the sort...) two external routines are used: function lessthan(a, b: position): boolean is a generalized test for value-at-a < value-at-b. procedure swap(a, b: position) switches the items at positions a and b. since these routines are external, the procedure is general. this procedure taken from the book 'algorithms + data structures = programs' by niklaus wirth, prentice-hall, inc., englewood cliffs, n.j.(1976), pp. 76-82 */ position lower = left; position upper; /* the positions looked at currently */ position center; /* the rough center of the region being sorted */ center = (left + right) / 2; upper = right; do { while (lessthan(lower, center)) lower++; while (lessthan(center, upper)) upper--; if (lower <= upper) { /* keep track of the center through the map: */ if (lower == center) center = upper; else if (upper == center) center = lower; swap_(lower, upper); lower++; upper--; } } while (lower <= upper); if (left < upper) quicksort(left, upper); if (lower < right) quicksort(lower, right); } /* end module quicksort version = 'prgmod 4.05 89 Aug 28 tds'; */ Static Void readline(f, l, linenumber) _TEXT *f; entryline **l; long linenumber; { /* read a bibtex line from f into l. The line number is linenumber */ Char c; entryline *WITH; /* a character read in. GPC cannot read directly into the string */ *l = (entryline *)Malloc(sizeof(entryline)); WITH = *l; WITH->stringlength = 0; while (!P_eoln(f->f) && WITH->stringlength < linewidth) { WITH->stringlength++; /* read(f,l^.string[stringlength]); */ c = getc(f->f); if (c == '\n') c = ' '; (*l)->string[WITH->stringlength - 1] = c; if ((WITH->stringlength == linewidth) & (!P_eoln(f->f))) printf("line %ld is longer than %ld characters. Make it two lines.\n", linenumber, (long)linewidth); } fscanf(f->f, "%*[^\n]"); getc(f->f); WITH->next = NULL; } Static Void readentry(f, linenumber, e) _TEXT *f; long *linenumber; entry_ **e; { /* read a bibtex entry from f into e, keep track of the current line number in linenumber. If the end of file is found, e is nil. */ Char c; /* a character in the key of the entry */ entryline *l; /* pointer to a line of text */ long i; /* index to a line of an entry */ long numberlength = 0; /* the length of the number */ long numbers[3]; long p = 1; /* position on a line */ long pkey; /* position on a line that the key starts */ entry_ *WITH; entryline *WITH1; _TEXT TEMP; P_addset(P_expset(numbers, 0L), '0'); P_addset(numbers, '1'); P_addset(numbers, '2'); P_addset(numbers, '3'); P_addset(numbers, '4'); P_addset(numbers, '5'); P_addset(numbers, '6'); P_addset(numbers, '7'); P_addset(numbers, '8'); P_addset(numbers, '9'); *e = (entry_ *)Malloc(sizeof(entry_)); WITH = *e; /* read the entry in */ /* locate the start of the entry */ while ((!BUFEOF(f->f)) & (P_peek(f->f) != '@')) { fscanf(f->f, "%*[^\n]"); getc(f->f); (*linenumber)++; } if (BUFEOF(f->f)) { Free(*e); *e = NULL; /* there was junk at the end of the file, clear out this entry it is meaningless */ return; } /* read the first line in */ readline(f, &WITH->line, *linenumber); (*linenumber)++; /* find the citation key on the line */ while (WITH->line->string[p-1] != '{') { if (p == linewidth) { printf("line %ld is missing the \"{\"\n", *linenumber); halt(); } p++; } p++; pkey = p; /* read in the key */ WITH->yearkey = 0; while (WITH->line->string[p-1] != ',') { if (p == linewidth) { printf("line %ld is missing the \",\"\n", *linenumber); halt(); } c = WITH->line->string[p-1]; WITH->key[p - pkey] = c; /* pull out year as a number */ if (P_inset(c, numbers)) { WITH->yearkey = WITH->yearkey * 10 + c - '0'; numberlength++; } else { /* if the yearkey is not THE LAST 4 characters, kill it: */ WITH->yearkey = 0; } p++; } if (numberlength < 4) WITH->yearkey = 0; /* finish the key with a blank character */ WITH->key[p - pkey] = ' '; /* write(output,'line ',linenumber:1,' "'); writekey(output,e); writeln(output,'"'); */ /* read the rest of the entry */ l = WITH->line; while ((!BUFEOF(f->f)) & (!P_eoln(f->f))) { (*linenumber)++; readline(f, &l->next, *linenumber); if (!BUFEOF(f->f)) l = l->next; } /* redo year from rest of entry */ l = WITH->line; WITH->yearentry = 0; /* look for 'year' */ while (l != NULL) { WITH1 = l; i = 1; /* skip blanks */ while (i < WITH1->stringlength && WITH1->string[i-1] == ' ') i++; if (WITH1->string[i-1] == 'y') { if (WITH1->string[i] == 'e') { if (WITH1->string[i+1] == 'a') { if (WITH1->string[i+2] == 'r') { /* skip blanks */ i += 4; while (i < WITH1->stringlength && WITH1->string[i-1] == ' ') i++; if (WITH1->string[i-1] == '=') { i++; while (i < WITH1->stringlength && WITH1->string[i-1] == ' ') i++; if (WITH1->string[i-1] == '"') i++; numberlength = 0; while (P_inset(WITH1->string[i-1], numbers) && numberlength <= 4) { c = WITH1->string[i-1]; WITH->yearentry = WITH->yearentry * 10 + c - '0'; numberlength++; i++; } if (numberlength != 4) { printf("bad year found in:\n"); TEMP.f = stdout; *TEMP.name = '\0'; writekey(&TEMP, *e); putchar('\n'); } } } } } } l = l->next; } /* writeln(output,yearentry:1, ' ',yearkey:1); */ if (WITH->yearentry == WITH->yearkey || WITH->yearkey == 0) return; printf("year in entry not equal to year in key in:\n"); TEMP.f = stdout; *TEMP.name = '\0'; writekey(&TEMP, *e); putchar('\n'); } Static Void writeentry(f, e) _TEXT *f; entry_ *e; { /* write the entry e to f */ entryline *l; /* pointer to a line of text */ long p; /* position on a line */ entryline *WITH; long FORLIM; l = e->line; while (l != NULL) { WITH = l; FORLIM = WITH->stringlength; for (p = 0; p < FORLIM; p++) putc(WITH->string[p], f->f); putc('\n', f->f); l = l->next; } } Static Void showentries(afile, entries) _TEXT *afile; long entries; { /* show all entries */ position e; /* index to the entries */ entry_ *WITH; for (e = 0; e <= entries - 1; e++) { WITH = map[e]; if (WITH->yearentry != 0) fprintf(afile->f, "%4ld ", WITH->yearentry); else fprintf(afile->f, "%5c", ' '); writekey(afile, map[e]); putc('\n', afile->f); } } /* Local variables for themain: */ struct LOC_themain { _TEXT *fout; long entries; /* counter of the entries read in */ position e; /* index to the entries */ } ; /* if the first character is 'r', remove duplicate entries. If two entries are not the same data will be lost. */ Local Void giveentry(LINK) struct LOC_themain *LINK; { /* give the entry with or without blanks between */ writeentry(LINK->fout, map[LINK->e-1]); if (LINK->e != LINK->entries) putc('\n', LINK->fout->f); /* space between entries */ } /* begin module sortbibtex.themain */ Static Void themain(fin, fout_) _TEXT *fin, *fout_; { /* the main procedure of the program */ struct LOC_themain V; long linenumber = 0; /* line number in the file */ Char numbered; /* if 'n' number the entries */ double parameterversion; /* parameter version number */ Char removeduplicates; _TEXT TEMP; position FORLIM; /* else writeentry(fout,map[e]); end else writeentry(fout,map[e]); end else writeentry(fout,map[e]); if e <> entries then writeln(fout) (* space between entries *) */ V.fout = fout_; printf("sortbibtex %4.2f\n", version); if (*sortbibtexp.name != '\0') { if (sortbibtexp.f != NULL) sortbibtexp.f = freopen(sortbibtexp.name, "r", sortbibtexp.f); else sortbibtexp.f = fopen(sortbibtexp.name, "r"); } else rewind(sortbibtexp.f); if (sortbibtexp.f == NULL) _EscIO2(FileNotFound, sortbibtexp.name); RESETBUF(sortbibtexp.f, Char); fscanf(sortbibtexp.f, "%lg%*[^\n]", ¶meterversion); getc(sortbibtexp.f); if (parameterversion < updateversion) { printf("You have an old parameter file!\n"); halt(); } fscanf(sortbibtexp.f, "%c%*[^\n]", &sortcontrol); getc(sortbibtexp.f); if (sortcontrol == '\n') sortcontrol = ' '; if (sortcontrol != 'r' && sortcontrol != 'y' && sortcontrol != 'k') { printf("sortcontrol must be one of k, y, r\n"); halt(); } fscanf(sortbibtexp.f, "%c%*[^\n]", &numbered); getc(sortbibtexp.f); if (numbered == '\n') numbered = ' '; fscanf(sortbibtexp.f, "%c%*[^\n]", &removeduplicates); getc(sortbibtexp.f); if (removeduplicates == '\n') removeduplicates = ' '; if (*fin->name != '\0') { if (fin->f != NULL) fin->f = freopen(fin->name, "r", fin->f); else fin->f = fopen(fin->name, "r"); } else rewind(fin->f); if (fin->f == NULL) _EscIO2(FileNotFound, fin->name); RESETBUF(fin->f, Char); if (*V.fout->name != '\0') { if (V.fout->f != NULL) V.fout->f = freopen(V.fout->name, "w", V.fout->f); else V.fout->f = fopen(V.fout->name, "w"); } else { if (V.fout->f != NULL) rewind(V.fout->f); else V.fout->f = tmpfile(); } if (V.fout->f == NULL) _EscIO2(FileNotFound, V.fout->name); SETUPBUF(V.fout->f, Char); V.entries = 0; while (!BUFEOF(fin->f)) { V.entries++; readentry(fin, &linenumber, &map[V.entries-1]); if (map[V.entries-1] == NULL) V.entries--; } printf("%ld entries read\n", V.entries); quicksort(1L, V.entries); TEMP.f = stdout; *TEMP.name = '\0'; showentries(&TEMP, V.entries); FORLIM = V.entries; /* for e := 2 to entries do begin if not(lessthan(e, e-1)) and not(lessthan(e-1, e)) then begin (* must be identical! *) write(output,'duplicate entry: '); writekey(output,map[e]); writeln(output); end end; */ /* put the entire set of entries into fout */ for (V.e = 1; V.e <= FORLIM; V.e++) { if (numbered == 'n') { if (sortcontrol == 'r') fprintf(V.fout->f, "%% %ld\n", V.entries - V.e + 1); else fprintf(V.fout->f, "%% %ld\n", V.e); } if (V.e > 1) { if (removeduplicates == 'r') { if ((!lessthan(V.e, V.e - 1)) & (!lessthan(V.e - 1, V.e))) { /* must be identical! */ printf("duplicate entry: "); TEMP.f = stdout; *TEMP.name = '\0'; writekey(&TEMP, map[V.e-1]); putchar('\n'); } else giveentry(&V); } else giveentry(&V); } else giveentry(&V); } } /* end module sortbibtex.themain */ main(argc, argv) int argc; Char *argv[]; { PASCAL_MAIN(argc, argv); if (setjmp(_JL1)) goto _L1; fout.f = NULL; strcpy(fout.name, "fout"); sortbibtexp.f = NULL; strcpy(sortbibtexp.name, "sortbibtexp"); fin.f = NULL; strcpy(fin.name, "fin"); themain(&fin, &fout); _L1: if (fin.f != NULL) fclose(fin.f); if (sortbibtexp.f != NULL) fclose(sortbibtexp.f); if (fout.f != NULL) fclose(fout.f); exit(EXIT_SUCCESS); } /* End. */