/* Output from p2c 1.21alpha-07.Dec.93, the Pascal-to-C translator */ /* From input file "malin.p" */ #include /* malin: make delila instructions from nth alignment of malign Dr. Thomas D. Schneider National Institutes of Health National Cancer Institute Center for Cancer Research Nanobiology Program Molecular Information Theory Group Frederick, Maryland 21702-1201 toms@ncifcrf.gov permanent email: toms@alum.mit.edu (use only if first address fails) http://www.ccrnp.ncifcrf.gov/~toms/ */ /* end of program */ /* begin module version */ #define version 1.14 /* of malin.p 2009 Apr 11 2009 Apr 11, 1.14: put version next to malin in distribution output file 2005 Sep 28, 1.13: malinp example 2001 May 7, 1.12: clean up output 1.12 2001 May 7: clean up output 1.10 2000 Dec 14: handle quotes and {} comments in inst. 1.09 1999 June 15: allow: "get from 5 -0 to piece end -5;" format 1.08 1997 April 10: program generates distribution file. 1.07 1997 April 10: ability to change zero base, version control. origin 1995 October 6 */ #define updateversion 1.07 /* defines lowest acceptable current parameter file */ /* end module version */ /* begin module describe.malin */ /* name malin: make delila instructions from nth alignment of malign synopsis malin(optinst: in, optalign: in, inst: in, malinp: in, cinst: out, distribution: out, output: out) files optinst: output of malign program containing absolute alignments optalign: output of malign program containing relative alignments inst: Delila instructions Allowed forms: get from 5 -5 to 5 +5; get from 5 -5 to same +5; get from 5 -5 to piece end -5; malinp: parameters to control the program first line: The version number of the program. This allows the user to be warned if an old parameter file is used. second line: one integer that defines which alignment to use to create the cinst. third line: one integer that defines how much to add to move the location of the zero base in the new instructions. cinst: Delila instructions of inst converted to the alignment of optinst chosen in malinp distribution: The distribution of the realignment. Lines that begin with "*" are comments. Otherwise, one integer per line, which is the separation in bases between the initial and final alignments. output: output program without private text description This program allows one to select one of the alignments created by malign and to make the corresponding Delila instructions. Because it copies the inst file it keeps the organism and chromosome information (along with all comments) so it is better than the "bestinst" file created by malign! examples documentation see also {example parameter file:} malinp {related programs:} malign.p, malopt.p author Thomas Dana Schneider bugs WARNING: This program does not use a book and so the coordinate shift of the third parameter will not work on coordinates that jump. technical notes NOTE: THIS PROGRAM WILL NOT HANDLE COMMENTS WITHIN THE DELILA INSTRUCTION! It must be of the form: get from 193 -20 to 193 +21; comments are allowed outside these statements. */ /* end module describe.malin */ /* begin module malin.const */ #define maxstring 1500 /* the maximum string */ /* end module malin.const */ #define fillermax 10 /* the size of the filler array for a string */ /* begin module interact.type */ typedef struct string { /* a string of characters */ Char letters[maxstring]; /* the letters in the string */ long length; /* the number of characters in the string */ long current; /* the letter we are working on */ } string; /* end module interact.type version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module filler.type */ /* the following is an array used to fill a string. it is convenient to have it much shorter than the maxstring, so that it is easy to fill the string using procedure fillstring. the user must declare the value of constant fillermax. */ typedef Char filler[fillermax]; /* end module filler.type version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module trigger.type */ typedef struct trigger { /* an object to be searched for */ string seek; /* the characters looked for */ long state; /* how close to triggering we are */ boolean skip; /* trigger not found- skip the line */ /* the trigger was found */ boolean found; } trigger; /* end module trigger.type version = 4.16; (@ of prgmod.p 1996 August 12 */ Static _TEXT optinst, optalign, inst, malinp, cinst, distribution; /* files of the program */ Static jmp_buf _JL1; /* begin module halt */ Static Void halt() { /* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. */ printf(" program halt.\n"); longjmp(_JL1, 1); } /* end module halt version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module interact.clearstring */ Static Void clearstring(ribbon) string *ribbon; { /* empty the string */ long index; /* to the ribbon */ for (index = 0; index < maxstring; index++) ribbon->letters[index] = ' '; ribbon->length = 0; ribbon->current = 0; } /* clearstring */ /* end module interact.clearstring version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module interact.getstring */ Static Void getstring(afile, buffer, gotten) _TEXT *afile; string *buffer; boolean *gotten; { /* get a string from a file not using string calls. this lets one obtain lines from a file without interactive prompts */ long index = 0; /* of buffer */ clearstring(buffer); if (BUFEOF(afile->f)) { *gotten = false; return; } while (!P_eoln(afile->f) && index < maxstring) { index++; buffer->letters[index-1] = getc(afile->f); if (buffer->letters[index-1] == '\n') buffer->letters[index-1] = ' '; } if (!P_eoln(afile->f)) { printf(" getstring: a line exceeds maximum string size (%ld)\n", (long)maxstring); halt(); } buffer->length = index; buffer->current = 1; fscanf(afile->f, "%*[^\n]"); getc(afile->f); *gotten = true; } /* getstring */ /* end module interact.getstring version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module interact.writestring */ Static Void writestring(tofile, s) _TEXT *tofile; string *s; { /* write the string s to file tofile, no writeln */ long i; /* index to s */ long FORLIM; FORLIM = s->length; for (i = 0; i < FORLIM; i++) putc(s->letters[i], tofile->f); } /* writestring */ /* end module interact.writestring version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module filler.fillstring */ Static Void fillstring(s, a) string *s; Char *a; { /* this procedure makes it reasonably easy to fill the string s with characters. one calls the procedure as: */ /* 1 2 3 4 5 */ /* 12345678901234567890123456789012345678901234567890 */ /* fillstring(s, 'this-is-the-string '); the two comments make it easy to line the characters up. also, for this example, it was assumed that the length of filler as defined by the constant fillermax was 50. */ long length = fillermax; /* of the string without trailing blanks */ long index; /* of s */ clearstring(s); while (length > 1 && a[length-1] == ' ') length--; if (length == 1 && a[length-1] == ' ') { printf("fillstring: the string is empty\n"); halt(); } for (index = 0; index < length; index++) s->letters[index] = a[index]; s->length = length; s->current = 1; } /* fillstring */ /* end module filler.fillstring version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module filler.filltrigger */ Static Void filltrigger(t, a) trigger *t; Char *a; { /* fill the trigger t */ fillstring(&t->seek, a); } /* fillstring */ /* end module filler.filltrigger version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module trigger.proc */ /* this module allows one to scan a series of characters, as from an array or a file, and to "trigger" or detect a simple string in the series. the advantage of the trigger is that several triggers can "observe" a stream of characters at once, each looking for a different thing. some other modules required: interact.const, interact.type */ Static Void resettrigger(t) trigger *t; { /* reset the trigger to ground state */ t->state = 0; t->skip = false; t->found = false; } /* resettrigger */ Static Void testfortrigger(ch, t) Char ch; trigger *t; { /* look at the character ch. if it is part of the trigger (at the current trigger state), then the trigger state goes higher. if it is not part of the trigger then the trigger state is reset, skip is true and one should skip onward to find the trigger. if the trigger is found, found is true. */ t->state++; /* if debugging then begin writestring(list,seek); writeln(list,'testfortrigger seek.letters[',state:1,']:', seek.letters[state],' ch:',ch); end;*/ if (t->seek.letters[t->state - 1] == ch) { t->skip = false; if (t->state == t->seek.length) t->found = true; else t->found = false; return; } t->state = 0; t->skip = true; t->found = false; /* reset trigger */ } /* testfortrigger */ /* end module trigger.proc version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module skipblanks */ Static Void skipblanks(thefile) _TEXT *thefile; { /* skip over blanks until a non-blank, or end of line, is found */ while ((P_peek(thefile->f) == ' ') & (!P_eoln(thefile->f))) getc(thefile->f); } Static Void skipnonblanks(thefile) _TEXT *thefile; { /* skip over nonblanks until a blank, or end of line, is found */ while ((P_peek(thefile->f) != ' ') & (!P_eoln(thefile->f))) getc(thefile->f); } Static Void skipcolumn(thefile) _TEXT *thefile; { /* skip over a data column */ skipblanks(thefile); skipnonblanks(thefile); } Local Char sign(i) long i; { if (i >= 0) return '+'; else return '-'; } /* end module skipblanks version = 4.16; (@ of prgmod.p 1996 August 12 */ /* begin module malin.themain */ Static Void themain(optinst, optalign, inst, malinp, cinst, distribution) _TEXT *optinst, *optalign, *inst, *malinp, *cinst, *distribution; { /* The main procedure of the program. NOTE: '@' represents '*' in comments below. */ long a = 1; /* index for reading through alignments */ long alignments; /* number of alignments in optinst */ long analignment; /* an alignment to use from optinst */ Char c; /* a character from the inst */ long class_; /* index to the classes of alignments */ boolean debugging = false; /* set to true if debugging */ long fromvalue, fromrange; /* the from coordinate and the range from the from */ double H; /* uncertainty, bits */ long occurences; /* number of times an alignment appeared */ double parameterversion; /* parameter version number */ long s; /* index for reading through alignment sequence numbers */ long sequences; /* number of sequences */ long shift; /* how much a sequence is shifted */ long state = 0; /* state of the program. state = 0; scan and copy, outside comments when '(@' is found, move to state 1 t0a when 'get' is found, move to state 2 t0b when '{' is found, move to state 5 t0c when '"' is found, move to state 6 t0d when "'" is found, move to state 7 t0e state = 1; scan and copy, copy program comments when '@)' is found, move to state 0 t1a state = 2; scan and copy, find from when 'from' is found, move to state 3 t2a state = 3; read from value, relative from value, scan for 'to' when 'to' is found, move to state 4 t3a state = 4; read to values, relative to value move to state 0 state = 5; scan and copy {} comment when '}' is found, move to state 0 t5a state = 6; scan and copy " string when '"' is found, move to state 0 state = 7; scan and copy ' string when "'" is found, move to state 0 */ boolean shutup = false; /* stop copying for a while */ trigger t0a, t0b, t1a; /*t1b,*/ trigger t2a; /*t2b,*/ trigger t3a, t0c, t0d, t0e; /* new as of 2000 Dec 14 */ trigger t5a; /* triggers for each state */ long tovalue, torange; /* the to coordinate and the range from the to */ long theclass; /* the current class according to optinst */ long zerobase; /* the new zero coordinate */ printf("malin%5.2f\n", version); /* set to true if debugging */ filltrigger(&t0a, "(* "); filltrigger(&t0b, "get "); filltrigger(&t0c, "{ "); filltrigger(&t0d, "\" "); filltrigger(&t0e, "' "); filltrigger(&t1a, "*) "); filltrigger(&t2a, "from "); filltrigger(&t3a, "to "); filltrigger(&t5a, "} "); /* read parameters */ if (*malinp->name != '\0') { if (malinp->f != NULL) malinp->f = freopen(malinp->name, "r", malinp->f); else malinp->f = fopen(malinp->name, "r"); } else rewind(malinp->f); if (malinp->f == NULL) _EscIO2(FileNotFound, malinp->name); RESETBUF(malinp->f, Char); fscanf(malinp->f, "%lg%*[^\n]", ¶meterversion); getc(malinp->f); if (parameterversion < updateversion) { printf("You have an old parameter file!\n"); halt(); } fscanf(malinp->f, "%ld%*[^\n]", &analignment); getc(malinp->f); fscanf(malinp->f, "%ld%*[^\n]", &zerobase); getc(malinp->f); if (*optinst->name != '\0') { if (optinst->f != NULL) optinst->f = freopen(optinst->name, "r", optinst->f); else optinst->f = fopen(optinst->name, "r"); } else rewind(optinst->f); if (optinst->f == NULL) _EscIO2(FileNotFound, optinst->name); RESETBUF(optinst->f, Char); fscanf(optinst->f, "%ld%ld%*[^\n]", &sequences, &alignments); getc(optinst->f); /* read through optinst to get to the desired alignment */ while (a < analignment) { if (BUFEOF(optinst->f)) { printf("alignment %ld does not exist\n", analignment); halt(); } a++; fscanf(optinst->f, "%ld%lg%*[^\n]", &occurences, &H); getc(optinst->f); for (s = 1; s <= sequences; s++) fscanf(optinst->f, "%ld", &fromvalue); fscanf(optinst->f, "%*[^\n]"); getc(optinst->f); } fscanf(optinst->f, "%ld%lg%*[^\n]", &occurences, &H); getc(optinst->f); if (*inst->name != '\0') { if (inst->f != NULL) inst->f = freopen(inst->name, "r", inst->f); else inst->f = fopen(inst->name, "r"); } else rewind(inst->f); if (inst->f == NULL) _EscIO2(FileNotFound, inst->name); RESETBUF(inst->f, Char); if (*cinst->name != '\0') { if (cinst->f != NULL) cinst->f = freopen(cinst->name, "w", cinst->f); else cinst->f = fopen(cinst->name, "w"); } else { if (cinst->f != NULL) rewind(cinst->f); else cinst->f = tmpfile(); } if (cinst->f == NULL) _EscIO2(FileNotFound, cinst->name); SETUPBUF(cinst->f, Char); if (*distribution->name != '\0') { if (distribution->f != NULL) distribution->f = freopen(distribution->name, "w", distribution->f); else distribution->f = fopen(distribution->name, "w"); } else { if (distribution->f != NULL) rewind(distribution->f); else distribution->f = tmpfile(); } if (distribution->f == NULL) _EscIO2(FileNotFound, distribution->name); SETUPBUF(distribution->f, Char); fprintf(distribution->f, "* malin %5.2f alignment distribution\n", version); fprintf(distribution->f, "* alignment class: %ld\n", analignment); /* the method of reading optalign comes from malopt.p */ if (*optalign->name != '\0') { if (optalign->f != NULL) optalign->f = freopen(optalign->name, "r", optalign->f); else optalign->f = fopen(optalign->name, "r"); } else rewind(optalign->f); if (optalign->f == NULL) _EscIO2(FileNotFound, optalign->name); RESETBUF(optalign->f, Char); /*zzz move to start for reading optalign*/ while (P_peek(optalign->f) == '*') { fscanf(optalign->f, "%*[^\n]"); getc(optalign->f); } fscanf(optalign->f, "%*[^\n]"); getc(optalign->f); /* skip blank line */ fscanf(optalign->f, "%*[^\n]"); getc(optalign->f); /* skip sequences, it should match the other file */ fscanf(optalign->f, "%*[^\n]"); getc(optalign->f); /* skip max R line */ /* get distribution */ for (class_ = 1; class_ <= analignment; class_++) { if (BUFEOF(optalign->f)) { printf("premature end of optalign\n"); halt(); } fscanf(optalign->f, "%*[^\n]"); getc(optalign->f); /* skip blank line */ fscanf(optalign->f, "%ld%*[^\n]", &theclass); getc(optalign->f); if (theclass != class_) { printf("classes not being read correctly\n"); halt(); } for (s = 1; s <= sequences; s++) { fscanf(optalign->f, "%ld", &shift); if (class_ == analignment) { if (zerobase >= 0) fprintf(distribution->f, "%ld\n", shift + zerobase); else { fprintf(distribution->f, "%ld\n", -(shift + zerobase)); /*zzz That's how to handle the negative???*/ } } } } /* * relative aligned bases for the set of optimal alignments 23 sequences, 18 optimal alignments in 1001 runs 11.9820040 = H for original alignment 1) 874 occurrences, H = 7.2347730, relative aligned bases: -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 0 3 3 1 1 1 1 1 1 1 2) 72 occurrences, H = 7.4625137, relative aligned bases: 1 1 1 1 2 2 2 2 2 2 2 2 2 2 5 5 -5 3 3 3 3 3 3 */ while (!BUFEOF(inst->f)) { if (!P_eoln(inst->f)) { resettrigger(&t0a); resettrigger(&t0b); resettrigger(&t0c); resettrigger(&t0d); resettrigger(&t0e); resettrigger(&t1a); resettrigger(&t2a); resettrigger(&t3a); resettrigger(&t5a); while (!P_eoln(inst->f)) { c = getc(inst->f); if (c == '\n') c = ' '; if (!shutup) putc(c, cinst->f); if (debugging) /*zzz*/ printf("%c[%ld]", c, state); testfortrigger(c, &t0a); testfortrigger(c, &t0b); testfortrigger(c, &t0c); testfortrigger(c, &t0d); testfortrigger(c, &t0e); testfortrigger(c, &t1a); testfortrigger(c, &t2a); testfortrigger(c, &t3a); testfortrigger(c, &t5a); switch (state) { case 0: if (t0a.found) state = 1; else if (t0b.found) state = 2; else if (t0c.found) state = 5; else if (t0d.found) state = 6; else if (t0e.found) { state = 7; } break; case 1: if (t1a.found) state = 0; break; case 2: if (t2a.found) { fscanf(inst->f, "%ld%ld", &fromvalue, &fromrange); /* grab replacement fromvalue! */ fscanf(optinst->f, "%ld", &fromvalue); /*zzz here is where the book would have to be read so that the conversion of fromvalue uses book coordinates*/ /* modify fromvalue here */ fromvalue += zerobase; fprintf(cinst->f, " %ld %c%ld", fromvalue, sign(fromrange), labs(fromrange)); state = 3; } break; case 3: if (t3a.found) state = 4; break; case 4: skipblanks(inst); if (P_peek(inst->f) == 's') { /* it must be a "same" instruction - use the fromvalue! */ tovalue = fromvalue; skipnonblanks(inst); fscanf(inst->f, "%ld", &torange); /* writeln(output,'same ', tovalue:1); */ fprintf(cinst->f, "same %c%ld", sign(torange), labs(torange)); } else if (P_peek(inst->f) == 'p') { /* it must be a "piece" instruction - use the fromvalue! */ tovalue = fromvalue; skipnonblanks(inst); /* torange := -fromrange; */ torange = 100; /*zzz fix this later*/ shutup = true; /* writeln(output,'same ', tovalue:1); */ fprintf(cinst->f, "same %c%ld", sign(torange), labs(torange)); } else { fscanf(inst->f, "%ld%ld", &tovalue, &torange); fprintf(cinst->f, "%ld %c%ld", fromvalue, sign(torange), labs(torange)); } state = 0; break; case 5: /*zzz*/ if (t5a.found) state = 0; break; case 6: if (t0d.found) state = 0; break; case 7: if (t0e.found) state = 0; break; } } continue; } fscanf(inst->f, "%*[^\n]"); getc(inst->f); if (debugging) /*zzz*/ putchar('\n'); if (shutup) { fprintf(cinst->f, ";\n"); shutup = false; } else putc('\n', cinst->f); } fprintf(cinst->f, "\n(* malin%5.2f *)\n", version); fprintf(cinst->f, "(* alignment: %ld, occurences: %ld, H: %10.5f bits *)\n", analignment, occurences, H); printf("alignment: %ld, occurences: %ld, H: %10.5f bits\n", analignment, occurences, H); } /* end module malin.themain */ main(argc, argv) int argc; Char *argv[]; { PASCAL_MAIN(argc, argv); if (setjmp(_JL1)) goto _L1; distribution.f = NULL; strcpy(distribution.name, "distribution"); cinst.f = NULL; strcpy(cinst.name, "cinst"); malinp.f = NULL; strcpy(malinp.name, "malinp"); inst.f = NULL; strcpy(inst.name, "inst"); optalign.f = NULL; strcpy(optalign.name, "optalign"); optinst.f = NULL; strcpy(optinst.name, "optinst"); themain(&optinst, &optalign, &inst, &malinp, &cinst, &distribution); _L1: if (optinst.f != NULL) fclose(optinst.f); if (optalign.f != NULL) fclose(optalign.f); if (inst.f != NULL) fclose(inst.f); if (malinp.f != NULL) fclose(malinp.f); if (cinst.f != NULL) fclose(cinst.f); if (distribution.f != NULL) fclose(distribution.f); exit(EXIT_SUCCESS); } /* End. */