// crm114_.c - Controllable Regex Mutilator, version v1.0 // Copyright 2001-2004 William S. Yerazunis, all rights reserved. // // This software is licensed to the public under the Free Software // Foundation's GNU GPL, version 2. You may obtain a copy of the // GPL by visiting the Free Software Foundations web site at // www.fsf.org, and a copy is included in this distribution. // // Other licenses may be negotiated; contact the // author for details. // // include some standard files #include "crm114_sysincludes.h" // include any local crm114 configuration file #include "crm114_config.h" // include the crm114 data structures file #include "crm114_structs.h" // and include the routine declarations file #include "crm114.h" // the command line argc, argv extern int prog_argc; extern char **prog_argv; // the auxilliary input buffer (for WINDOW input) extern char *newinputbuf; // the globals used when we need a big buffer - allocated once, used // wherever needed. These are sized to the same size as the data window. extern char *inbuf; extern char *outbuf; extern char *tempbuf; // // Here it is, the core of CRM114 - the execution engine toplevel, // which, given a CSL and a CDW, executes the CSL against the CDW // int crm_invoke (CSL_CELL *csl, CSL_CELL *cdw) { long i, j, k; long status; long done; long slen; // timer1, timer2, and tstmt are for time profiling. // TMS_STRUCT timer1, timer2; long tstmt; tstmt = 0; i = j = k = 0; status = 0; // Sanity check - don't try to execute a file before compilation if (csl->mct == NULL) { untrappableerror ( "Can't execute a file without compiling first.\n", "This means that CRM114 is somehow broken."); }; // empty out the alius stack (nothing FAILed yet.) // for (i = 0; i < MAX_BRACKETDEPTH; i++) csl->aliusstk[i] = 1; // if there was a command-line-specified BREAK, set it. // if (cmdline_break > 0) { if (cmdline_break <= csl->nstmts) { csl->mct[cmdline_break]->stmt_break = 1; }; }; if (user_trace > 0) fprintf (stderr, "Starting to execute %s at line %ld\n", csl->filename, csl->cstmt); invoke_top: // initialize timers ? if (profile_execution) { tstmt = csl->cstmt; times ( (void *) &timer1); }; if (csl->cstmt >= csl->nstmts) { // OK, we're at the end of the program. When this happens, // we know we can exit this invocation of the invoker if (user_trace > 0 ) fprintf (stderr, "Finished the program %s.\n", csl->filename); done = 1; status = 0; goto invoke_done; }; slen = (csl->mct[csl->cstmt+1]->fchar) - (csl->mct[csl->cstmt ]->fchar); if (user_trace > 0) { fprintf (stderr, "\nParsing line %ld :\n", csl->cstmt); fprintf (stderr, " --> "); for (i = 0; i < slen; i++) fprintf (stderr, "%c", csl->filetext[csl->mct[csl->cstmt]->fchar+i]); }; // Invoke the common declensional parser on the statement only if it's // an executable statement. // switch ( csl->mct[csl->cstmt]->stmt_type ) { // // Do the processing that all statements need (well, _almost_ all.) // case CRM_LABEL: case CRM_NOOP: case CRM_BOGUS: break; default: i = crm_statement_parse ( &(csl->filetext[csl->mct[csl->cstmt]->fchar]), slen, apb); // and run the standard flag parser against the flags found (if any) // { char flagz[MAX_PATTERN]; long fl; fl = MAX_PATTERN; crm_get_pgm_arg (flagz, fl, apb->a1start, apb->a1len); fl = crm_nexpandvar (flagz, apb->a1len, MAX_PATTERN); // fprintf (stderr, "flagz --%s-- len %d\n", flagz, strlen(flagz)); apb->sflags = crm_flagparse (flagz, fl); }; break; }; // and maybe drop into the debugger? // cycle_counter++; if (debug_countdown > 0) debug_countdown--; if (debug_countdown == 0 || csl->mct[csl->cstmt]->stmt_break == 1 ) { i = crm_debugger (); if (i == -1) exit ( EXIT_SUCCESS ); if (i == 1) goto invoke_top; }; if (user_trace > 0) { fprintf (stderr, "\nExecuting line %ld :\n", csl->cstmt); }; // so, we're not off the end of the program (yet), which means look // at the statement type and see if it's somethign we know how to // do, otherwise we make a nasty little noise and continue onward. // Dispatch is done on a big SWITCH statement switch ( csl->mct[csl->cstmt]->stmt_type ) { case CRM_NOOP: case CRM_LABEL: { if (user_trace) fprintf (stderr, "Statement %ld is non-executable, continuing.\n", csl->cstmt); } break; case CRM_OPENBRACKET: { // the nest_level+1 is because the statements in front are at +1 depth csl->aliusstk [ csl->mct [ csl->cstmt] -> nest_level+1 ] = 1; if (user_trace) fprintf (stderr, "Statement %ld is an openbracket. depth now %d.\n", csl->cstmt, 1 + csl->mct [ csl->cstmt]->nest_level); } break; case CRM_CLOSEBRACKET: { if (user_trace) fprintf (stderr, "Statement %ld is a closebracket. depth now %d.\n", csl->cstmt, csl->mct[ csl->cstmt]->nest_level); } break; case CRM_BOGUS: { char bogusbuffer[1024]; char bogusstmt [1024]; sprintf (bogusbuffer, "Statement %ld is bogus!!! Here's the text: \n", csl->cstmt); memmove (bogusstmt, &csl->filetext[csl->mct[csl->cstmt]->start], csl->mct[csl->cstmt+1]->start - csl->mct[csl->cstmt]->start); bogusstmt [csl->mct[csl->cstmt+1]->start - csl->mct[csl->cstmt]->start] = '\000'; fatalerror (bogusbuffer, bogusstmt); } break; case CRM_EXIT: { int retval; long retlen; char retstr [MAX_PATTERN]; crm_get_pgm_arg (retstr, MAX_VARNAME, apb->s1start, apb->s1len); retlen = apb->s1len; retlen = crm_nexpandvar (retstr, retlen, MAX_VARNAME); retval = 0; if (retlen > 0) sscanf (retstr, "%d", &retval); if (user_trace) fprintf (stderr, "Exiting at statement %ld with value %d\n", csl->cstmt, retval); //if (profile_execution) // crm_output_profile (csl); // exit (retval); status = retval; done = 1; goto invoke_exit; } break; case CRM_RETURN: { if (user_trace) fprintf (stderr, "Returning to caller at statement %ld\n", csl->cstmt); return (0); } break; case CRM_GOTO: { char target[MAX_VARNAME]; long tarlen; // look up the variable name in the vht. If it's not there, or // not in our file, call a fatal error. crm_get_pgm_arg ( target, MAX_VARNAME, apb->s1start, apb->s1len); tarlen = apb->s1len; if (internal_trace) fprintf (stderr, "\n untranslated label %s , ", target); // do indirection if needed. tarlen = crm_nexpandvar (target, tarlen, MAX_VARNAME); if (internal_trace) fprintf (stderr, " translates to %s .", target); k = crm_lookupvarline (vht, target, 0, tarlen); if (k > 0) { if (user_trace) fprintf (stderr, "GOTO from line %ld to line %ld\n", csl->cstmt, k); csl->cstmt = k; // and going here didn't fail... csl->aliusstk [ csl->mct[csl->cstmt]->nest_level ] = 1; } else { // this is recoverable if we have a trap... so we continue // execution right to the BREAK. fatalerror (" Can't GOTO the nonexistent label: ", target); }; } break; case CRM_FAIL: { // If we get a FAIL, then we should branch to the statement // pointed to by the fail_index entry for that line. // // note we cheat - we branch to "fail_index - 1" // and let the increment happen. if (user_trace) fprintf (stderr, "Executing hard-FAIL at line %ld\n", csl->cstmt); csl->cstmt = csl->mct[csl->cstmt]->fail_index - 1; // and mark that we "failed", so an ALIUS will take this as a // failing statement block csl->aliusstk [ csl->mct[csl->cstmt]->nest_level ] = -1; }; break; case CRM_LIAF: { // If we get a LIAF, then we should branch to the statement // pointed to by the liaf_index entry for that line. // // (note the "liaf-index - 1" cheat - we branch to // liaf_index -1 and let the incrment happen) if (user_trace) fprintf (stderr, "Executing hard-LIAF at line %ld\n", csl->cstmt); csl->cstmt = csl->mct[csl->cstmt]->liaf_index - 1 ; }; break; case CRM_ALIUS: { // ALIUS looks at the finish state of the last bracket - if it // was a FAIL-to, then ALIUS is a no-op. If it was NOT a fail-to, // then ALIUS itself is a FAIL if (user_trace) fprintf (stderr, "Executing ALIUS at line %ld\n", csl->cstmt); if (csl->aliusstk [csl->mct[csl->cstmt]->nest_level + 1] == 1) { if (user_trace) fprintf (stderr, "prior group exit OK, ALIUS fails forward. \n"); csl->cstmt = csl->mct[csl->cstmt]->fail_index - 1; }; } break; case CRM_TRAP: { // TRAP is a placeholder statement that holds the regex that // the faulting statement must match. The background support // code is in crm_trigger_fault that will look at the error string // and see if it matches the regex. // // If we get to a TRAP statement itself, we should treat it as // a skip to end of block (that's a SKIP, not a FAIL) if (user_trace) { fprintf (stderr, "Executing a TRAP statement..."); fprintf (stderr, " this is a NOOP unless you have a live FAULT\n"); } csl->cstmt = csl->mct[csl->cstmt]->fail_index - 1; } break; case CRM_FAULT: { char *reason; char rbuf [MAX_PATTERN]; long rlen; long fresult; // FAULT forces the triggering of the TRAP; it's a super-FAIL // statement that can skip downward a large number of blocks. // if (user_trace) fprintf (stderr, "Forcing a FAULT at line %ld\n", csl->cstmt); crm_get_pgm_arg ( rbuf, MAX_PATTERN, apb->s1start, apb->s1len ); rlen = crm_nexpandvar (rbuf, apb->s1len, MAX_PATTERN); // We malloc the reason - better free() it when we take the trap. // in crm_trigger_fault // reason = malloc (rlen + 5); if (!reason) untrappableerror( "Couldn't malloc 'reason' in CRM_FAULT - out of memory.\n", "Don't you just HATE it when the error fixup routine gets" "an error?!?!"); strncpy (reason, rbuf, rlen+1); fresult = crm_trigger_fault (reason); if (fresult != 0) fatalerror("Your program has no TRAP for the user defined fault:", reason); } break; case CRM_ACCEPT: { char varname [MAX_VARNAME]; long varidx; // Accept: take the current window, and output it to // standard output. // // if (user_trace) fprintf (stderr, "Executing an ACCEPT \n"); // // varname[0] = '\0'; strcpy (varname, ":_dw:"); varidx = crm_vht_lookup (vht, varname, strlen (varname)); if (varidx == 0 || vht[varidx] == NULL) { fatalerror ("This is very strange... there is no data window!", "As such, death is our only option."); } else { fwrite (&(vht[varidx]->valtxt[vht[varidx]->vstart]), vht[varidx]->vlen, 1, stdout); fflush (stdout); } // WE USED TO DO CHARACTER I/O. OUCH!!! // for (i = 0; i < cdw->nchars ; i++) // fprintf (stdout, "%c", cdw->filetext[i]); } break; case CRM_MATCH: { crm_expr_match (csl, apb); } break; case CRM_OUTPUT: { crm_expr_output (csl, apb); } break; case CRM_WINDOW: { long i; i = crm_expr_window (csl, apb); if (i == 1) goto invoke_bailout; } break; case CRM_ALTER: { crm_expr_alter (csl, apb); } break; case CRM_EVAL: { crm_expr_eval (csl, apb); } break; case CRM_HASH: { // here's where we surgiclly alter a variable to a hash. // We have to watch out in case a variable is not in the // cdw (it might be in tdw; that's legal as well. syntax // is to replace the contents of the variable in the // varlist with hash of the evaluated string. char varname[MAX_VARNAME]; long varlen; long vns, vnl; char newstr [MAX_VARNAME]; long newstrlen; unsigned long hval; // hash value if (user_trace) fprintf (stderr, "Executing a HASHing\n"); // get the variable name crm_get_pgm_arg (varname, MAX_VARNAME, apb->p1start, apb->p1len); varlen = apb->p1len; varlen = crm_nexpandvar (varname, varlen, MAX_VARNAME); crm_nextword (varname, varlen, 0, &vns, &vnl); // If we didn't get a variable name, we replace the data window! if (vnl == 0) { strcpy (varname, ":_dw:"); vnl = strlen (varname); } // get the to-be-hashed pattern, and expand it. crm_get_pgm_arg (tempbuf, data_window_size, apb->s1start, apb->s1len); newstrlen = apb->s1len; // // if no var given, hash the full data window. if (newstrlen == 0) { strcpy (tempbuf, ":*:_dw:"); newstrlen = strlen (tempbuf); } newstrlen = crm_nexpandvar (tempbuf, newstrlen, data_window_size); // The pattern is now expanded, we can hash it to obscure meaning. hval = strnhash (tempbuf, newstrlen ); sprintf (newstr, "%08lX", hval); if (internal_trace) { fprintf (stderr, "String: '%s' \n hashed to: %08lX\n", tempbuf, hval); }; // and stuff the new value in. crm_destructive_alter_nvariable (&varname[vns], vnl, newstr, strlen (newstr)); }; break; case CRM_LEARN: { crm_expr_learn (csl, apb); }; break; // we had to split out classify- it was just too big. case CRM_CLASSIFY: crm_expr_classify ( csl, apb); break; case CRM_ISOLATE: crm_expr_isolate (csl, apb); break; case CRM_INPUT: { crm_expr_input (csl, apb); }; break; case CRM_SYSCALL: { crm_expr_syscall (csl, apb); } break; case CRM_INTERSECT: // Calculate the intersection of N variables; the result // replaces the captured value of the first variable. // Captured values not in the data window are ignored. { char temp_vars [MAX_VARNAME]; long tvlen; char out_var [MAX_VARNAME]; long ovstart; long ovlen; long vstart; long vend; long vlen; long istart, iend, ilen, i_index; long mc; long done; if (user_trace) fprintf (stderr, "executing an INTERSECT statement"); // get the output variable (the one we're gonna whack) // crm_get_pgm_arg (out_var, MAX_VARNAME, apb->p1start, apb->p1len); ovstart = 0; ovlen = crm_nexpandvar (out_var, apb->p1len, MAX_VARNAME); // get the list of variable names // // note- since vars never contain wchars, we're OK here. crm_get_pgm_arg (temp_vars, MAX_VARNAME, apb->b1start, apb->b1len); tvlen = crm_nexpandvar (temp_vars, apb->b1len, MAX_VARNAME); if (internal_trace) { fprintf (stderr, " Intersecting vars: ***%s***\n", temp_vars); fprintf (stderr, " with result in ***%s***\n", out_var); }; done = 0; mc = 0; vstart = 0; vend = 0; istart = 0; iend = cdw->nchars; ilen = 0; i_index = -1; while (!done) { while (temp_vars[vstart] < 0x021 && vstart < tvlen ) // was temp_vars[vstart] != '\000') vstart++; vlen = 0; while (temp_vars[vstart+vlen] >= 0x021 && vstart+vlen < tvlen ) vlen++; if (vlen == 0) { done = 1; } else { long vht_index; // // look up the variable vht_index = crm_vht_lookup (vht, &temp_vars[vstart], vlen); if (vht[vht_index] == NULL ) { char varname[MAX_VARNAME]; strncpy (varname, &temp_vars[vstart], vlen); varname[vlen] = '\000'; nonfatalerror ( "can't intersection a nonexistent variable.", varname); goto invoke_bailout; } else { // it was a good var, make sure it's in the data window // if (vht[vht_index] -> valtxt != cdw->filetext) { char varname[MAX_VARNAME]; strncpy (varname, &temp_vars[vstart], vlen); varname[vlen] = '\000'; nonfatalerror ( "can't intersect isolated variable.", varname); goto invoke_bailout; } else { // it's a cdw variable; go for it. if (vht[vht_index] -> vstart > istart) istart = vht[vht_index] -> vstart; if ((vht[vht_index]->vstart + vht[vht_index]->vlen) < iend) iend = vht[vht_index]->vstart + vht[vht_index]->vlen; }; }; }; vstart = vstart + vlen; if (temp_vars[vstart] == '\000') done = 1; }; // // all done with the looping, set the start and length of the // first var. vlen = iend - istart; if (vlen < 0 ) vlen = 0; crm_nextword (out_var, ovlen, 0, &ovstart, &ovlen); crm_set_windowed_nvar (&out_var[ovstart], ovlen, cdw->filetext, istart, vlen, csl->cstmt); } break; case CRM_UNION: // Calculate the union of N variables; the result // replaces the captured value of the first variable. // Captured values not in the data window are ignored. { char temp_vars [MAX_VARNAME]; long tvlen; char out_var[MAX_VARNAME]; long ovstart; long ovlen; long vstart; long vend; long vlen; long istart, iend, ilen, i_index; long mc; long done; if (user_trace) fprintf (stderr, "executing a UNION statement"); // get the output variable (the one we're gonna whack) // crm_get_pgm_arg (out_var, MAX_VARNAME, apb->p1start, apb->p1len); ovstart = 0; ovlen = crm_nexpandvar (out_var, apb->p1len, MAX_VARNAME); // get the list of variable names // // since vars never contain wchars, we don't have to be 8-bit-safe crm_get_pgm_arg (temp_vars, MAX_VARNAME, apb->b1start, apb->b1len); tvlen = crm_nexpandvar (temp_vars, apb->b1len, MAX_VARNAME); if (internal_trace) fprintf (stderr, " Uniting vars: ***%s***\n", temp_vars); done = 0; mc = 0; vstart = 0; vend = 0; istart = cdw->nchars; iend = 0; ilen = 0; i_index = -1; while (!done) { while (temp_vars[vstart] < 0x021 && vstart < tvlen) // was temp_vars[vstart] != '\000') vstart++; vlen = 0; while (temp_vars[vstart+vlen] >= 0x021 && vstart+vlen < tvlen) vlen++; if (vlen == 0) { done = 1; } else { long vht_index; // // look up the variable vht_index = crm_vht_lookup (vht, &temp_vars[vstart], vlen); if (vht[vht_index] == NULL ) { char varname[MAX_VARNAME]; strncpy (varname, &temp_vars[vstart], vlen); varname[vlen] = '\000'; nonfatalerror ( "can't intersect a nonexistent variable.", varname); goto invoke_bailout; } else { // it was a good var, make sure it's in the data window // if (vht[vht_index] -> valtxt != cdw->filetext) { char varname[MAX_VARNAME]; strncpy (varname, &temp_vars[vstart], vlen); varname[vlen] = '\000'; nonfatalerror ( "can't intersect isolated variable.", varname); goto invoke_bailout; } else { // it's a cdw variable; go for it. if (vht[vht_index] -> vstart < istart) istart = vht[vht_index] -> vstart; if ((vht[vht_index]->vstart + vht[vht_index]->vlen) > iend) iend = vht[vht_index]->vstart + vht[vht_index]->vlen; }; }; }; vstart = vstart + vlen; if (temp_vars[vstart] == '\000') done = 1; }; // // all done with the looping, set the start and length of the // output var. vlen = iend - istart; if (vlen < 0 ) vlen = 0; crm_nextword (out_var, ovlen, 0, &ovstart, &ovlen); crm_set_windowed_nvar (&out_var[ovstart], ovlen, cdw->filetext, istart, vlen, csl->cstmt); } break; case CRM_UNIMPLEMENTED: { char bogusbuffer[1024]; char bogusstmt [1024]; sprintf (bogusbuffer, "Statement %ld NOT YET IMPLEMENTED !!!" "Here's the text: \n", csl->cstmt); memmove(bogusstmt, &csl->filetext[csl->mct[csl->cstmt]->start], csl->mct[csl->cstmt+1]->start - csl->mct[csl->cstmt]->start); bogusstmt [csl->mct[csl->cstmt+1]->start - csl->mct[csl->cstmt]->start] = '\000'; fatalerror (bogusbuffer, bogusstmt); goto invoke_bailout; }; break; default: { char bogusbuffer[1024]; char bogusstmt [1024]; sprintf (bogusbuffer, "Statement %ld way, way bizarre !!! Here's the text: \n", csl->cstmt); memmove (bogusstmt, &csl->filetext[csl->mct[csl->cstmt]->start], csl->mct[csl->cstmt+1]->start - csl->mct[csl->cstmt]->start); bogusstmt [csl->mct[csl->cstmt+1]->start - csl->mct[csl->cstmt]->start] = '\000'; fatalerror (bogusbuffer, bogusstmt); goto invoke_bailout; }; } // If we're in some sort of strange abort mode, and we just need to move // on to the next statement, we branch here. invoke_bailout: // grab end-of-statement timers ? if (profile_execution) { times ( (void *) &timer2); csl->mct[tstmt]->stmt_utime += (timer2.tms_utime - timer1.tms_utime); csl->mct[tstmt]->stmt_stime += (timer2.tms_stime - timer1.tms_stime); }; // go on to next statement (unless we're failing, laifing, etc, // in which case we have no business getting to here. csl->cstmt ++; goto invoke_top; invoke_done: // give the debugger one last chance to do things. if (debug_countdown == 0) i=crm_debugger (); if (csl->cstmt < csl->nstmts) goto invoke_top; invoke_exit: // if we asked for an output profile, give it to us. if (profile_execution) crm_output_profile (csl); return (status); }