// crm114_.c - Controllable Regex Mutilator, version v1.0 // Copyright 2001-2004 William S. Yerazunis, all rights reserved. // // This software is licensed to the public under the Free Software // Foundation's GNU GPL, version 2. You may obtain a copy of the // GPL by visiting the Free Software Foundations web site at // www.fsf.org, and a copy is included in this distribution. // // Other licenses may be negotiated; contact the // author for details. // // include some standard files #include "crm114_sysincludes.h" // include any local crm114 configuration file #include "crm114_config.h" // include the crm114 data structures file #include "crm114_structs.h" // and include the routine declarations file #include "crm114.h" // the command line argc, argv extern int prog_argc; extern char **prog_argv; // the auxilliary input buffer (for WINDOW input) extern char *newinputbuf; // the globals used when we need a big buffer - allocated once, used // wherever needed. These are sized to the same size as the data window. extern char *inbuf; extern char *outbuf; extern char *tempbuf; // // // crm_nexpandvar - given a string and it's length, go through it // and if there's a variable expansion called for (by the :*: // operator) expand the variable. // // the inputs are a buffer with the NULL-safe string in it, the // length of this string, and the maximum allocated length of the // buffer. This function returns the new length of the buffer. // It will NOT increase the buffer length past maxlen, so // expansions beyond that will cause a nonfatal error and be // aborted. // // Algorithm: // 1) efficiency check- do we need to do any expansions at all. // 2) Start at buf[0], work up to buf[buflen]-3 // 2a) do \n, \r, \a, \xHH and \oOOO // 3) are we looking at :*:? // 4) no: copy 1 character, increment from and to indexes, go to step 3 // 5) yes: skip from index ahead 3, from there to next : is the varname // 6) copy var value to tbuf, incrementing tobuf index. // 7) set from-index to third colon index + 1 // 8) go to 2 (modulo last two chars need copying) // long crm_nexpandvar (char *buf, long inlen, long maxlen) { long is, id; long vht_index; long q; // a temporary work buffer... char *tbuf; // and another for variable names... char *vname; char *cp; long vlen; // efficiency check - do we even _have_ a :*: in the buffer? // if (inlen == 0) return (0); if (q_expansion_mode >= 2) return (crm_qexpandvar (buf, inlen, maxlen, NULL)); // GROT GROT GROT must fix this for 8-bit safe error messages if (inlen > maxlen) { q = fatalerror ( "You have blown the gaskets while building a string. Orig string was: ", buf); if (q == 0 ) return (inlen); goto bailout; }; // First thing- do the \-escapes // is = 0; id = 0; for (is = 0; is < inlen ; is++) { if (buf[is] != '\\' ) { buf [id] = buf [is]; id++; } else { // we're looking at a '\\' character. // // Check for a few common things: \n, \a, \xNN, \oNNN is++; // switch (buf[is]) { case '0': { // it's a NULL. buf[id] = '\0'; id++; } break; case 'b': { // it's a backspace buf[id] = '\b'; id++; } break; case 't': { // it's a tab buf[id] = '\t'; id++; } break; case 'n': { // it's a newline. stuff in a newline. buf[id] = '\n'; id++; } break; case 'v': { // it's a vtab buf[id] = '\v'; id++; } break; case 'f': { // it's a form feed. buf[id] = '\f'; id++; } break; case 'r': { // it's a carriage return buf[id] = '\r'; id++; } break; case 'a': { // it's a BELL. put that in. buf[id] = '\a'; id++; } break; case 'x': case 'X': { // it's a hex char constant. read it and stuff it. unsigned int value; is++; sscanf (&buf[is], "%2X", &value); buf[id] = value; id++; is++; } break; case 'o': case 'O': { // it's an octal char constant. read it and stuff it. unsigned int value; is++; sscanf (&buf[is], "%o3", &value); buf[id] = value; id++; is++; is++; } break; case '>': case ')': case ']': case '/': case ';': case '{': case '}': case '#': case '\\': { // >, ), ], ;, {, }, #, and / are themselves after a '\', // but need the \ escape to pass thru the parser // without terminating their enclosed args buf[id] = buf[is]; id++; }; break; default: { // if it's "none of the above" characters, then // the '\' character _stays_ as a literal buf[id] = '\\'; id++; buf[id] = buf[is]; id++; }; break; }; }; }; // and update the new inlen inlen = id ; buf[inlen] = '\000'; // needed because slimy old GNU REGEX needs it. // if no *, then no :*: and so no expansions needed cp = memchr (buf, '*', inlen); if (cp == NULL) { return (inlen); }; // OK, we might have a :*: substitution operator, so we actually have // to do some work. // allocate some memory for tbuf and vname; tbuf = (char *) malloc (maxlen); vname = (char *) malloc (maxlen); if (tbuf == NULL || vname == NULL) { q = fatalerror ("Couldn't allocate memory for variable expansion!", "Try making the window set smaller with the -w option"); if (q == 0) return (inlen); }; is = 0; // is is the input position index id = 0; // id is the destination position index for (is = 0; is <= inlen && id < maxlen; is++) { if (is <= inlen - 5 // check only if :*:c:" possible && buf[is] == ':' && buf[is+1] == '*' && buf[is+2] ==':') { // yes, it's a probable variable. // copy everything from the colon to the second colon // into the vname buffer. is = is + 2; vname [0] = buf[is]; vlen = 1; is++; while (is < maxlen && is <= inlen && buf [is] != ':') { vname[vlen] = buf[is]; is++; vlen++; }; // // check for the second colon as well... if (buf[is] == ':') { vname[vlen] = ':'; vlen++; } vname [vlen] = '\000'; // // Now we've got the variable name in vname, we can // go get it's value and copy _that_ into tbuf as well. if (internal_trace) fprintf (stderr, "looking up variable >%s<\n", vname); vht_index = crm_vht_lookup (vht, vname, vlen); if (vht[vht_index] == NULL) { // there was no variable by that name, just put the // name itself there. Note that we retain the :'s // but that the :* prefix goes away. for (q = 0; q < vlen && id < maxlen; q++) { tbuf[id] = vname[q]; id++; } } else { // There really was a variable value by that name. // suck it out, and splice it in! // if this was :_iso:, update iso's length if (strncmp( (char *) &vht[vht_index]->nametxt[vht[vht_index]->nstart], ":_iso:", 6) == 0) { vht[vht_index]->vlen = tdw->nchars; }; for (q = 0; q < vht[vht_index]->vlen && id < maxlen; q++) { tbuf[id] = vht[vht_index]->valtxt [(vht[vht_index]->vstart)+q]; id++; } }; } // Now, handle the case where we were NOT looking at // :*:c: in buf else { tbuf[id] = buf[is]; id++; } } // That's all, folks! Clean up the temporary buffer. We null-terminate // it in case we need to do stupid non-8-bit-clean IO on it. tbuf[id] = '\000'; memmove (buf, tbuf, id); free (tbuf); free (vname); id--; // the actual length is id-1, since id is the next available char if (internal_trace) fprintf (stderr, " Returned length from nexpandvar is %ld\n", id); return (id); bailout: return (inlen); } // crm_qexpandvar - "expanded" expandvar. Like nexpandvar, but moreso. // // nexpandvar just does \ and :*: expansion. qexpandvar also does // :#:, :$:, and a bunch of other stuff. (not recursively though.) // // the inputs are a buffer with the NULL-safe string in it, the // length of this string, and the maximum allocated length of the // buffer. This function returns the new length of the buffer. // It will NOT increase the buffer length past maxlen, so // expansions beyond that will cause a nonfatal error and be // aborted. // // Algorithm: // 1) efficiency check- do we need to do any expansions at all. // 2) Start at buf[0], work up to buf[buflen]-3 // 2a) do \n, \r, \a, \xHH and \oOOO // 3) are we looking at ::? // 4) no: copy 1 character, increment from and to indexes, go to step 3 // 5) yes: skip from index ahead 3, from there to next : is the varname // 6) copy var value to tbuf, incrementing tobuf index. // 7) set from-index to third colon index + 1 // 8) go to 2 (modulo last two chars need copying) // long crm_qexpandvar (char *buf, long inlen, long maxlen, long *retstat) { long is, id; long vht_index; long q; // a temporary work buffer... char *tbuf; // and another for variable names... char *vname; char *cp; long vlen; char opchar; // efficiency check - do we even _have_ a :*: in the buffer? // if (inlen == 0) return (0); if (internal_trace) fprintf (stderr, "qexpandvar on =%s= len %ld\n", buf, inlen); // GROT GROT GROT must fix this for 8-bit safe error messages if (inlen > maxlen) { q = fatalerror ( "You have blown the gaskets while building a string. Orig string was: ", buf); if (q == 0 ) return (inlen); goto bailout; }; // First thing- do the \-escapes // is = 0; id = 0; for (is = 0; is < inlen ; is++) { if (buf[is] != '\\' ) { buf [id] = buf [is]; id++; } else { // we're looking at a '\\'. // // Check for a few common things: \n, \a, \xNN, \oNNN is++; // switch (buf[is]) { case '0': { // it's a NULL. buf[id] = '\0'; id++; } break; case 'b': { // it's a backspace buf[id] = '\b'; id++; } break; case 't': { // it's a tab buf[id] = '\t'; id++; } break; case 'n': { // it's a newline. stuff in a newline. buf[id] = '\n'; id++; } break; case 'v': { // it's a vtab buf[id] = '\v'; id++; } break; case 'f': { // it's a form feed. buf[id] = '\f'; id++; } break; case 'r': { // it's a carriage return buf[id] = '\r'; id++; } break; case 'a': { // it's a BELL. put that in. buf[id] = '\a'; id++; } break; case 'x': case 'X': { // it's a hex char constant. read it and stuff it. unsigned int value; is++; sscanf (&buf[is], "%2X", &value); buf[id] = value; id++; is++; } break; case 'o': case 'O': { // it's an octal char constant. read it and stuff it. unsigned int value; is++; sscanf (&buf[is], "%o3", &value); buf[id] = value; id++; is++; is++; } break; case '>': case ')': case ']': case '/': case ';': case '{': case '}': case '#': case '\\': { // >, ), ], ;, {, }, #, and / are themselves after a '\', // but need the \ escape to pass thru the parser // without terminating their enclosed args buf[id] = buf[is]; id++; }; break; default: { // if it's "none of the above" characters, then // the '\' character _stays_ as a literal buf[id] = '\\'; id++; buf[id] = buf[is]; id++; }; break; }; }; }; // and update the new inlen inlen = id ; buf[inlen] = '\000'; // needed because slimy old GNU REGEX needs it. if (internal_trace) fprintf (stderr, "backslash expansion yields: =%s= len %ld\n", buf, inlen); // if no :, then no operators possible. cp = memchr (buf, ':', inlen); if (cp == NULL) { return (inlen); }; // OK, we might have a :*: substitution operator, so we actually have // to do some work. // allocate some memory for tbuf and vname; tbuf = (char *) malloc (maxlen); vname = (char *) malloc (maxlen); if (tbuf == NULL || vname == NULL) { q = fatalerror ("Couldn't allocate memory for Q-variable expansion!", "Try making the window set smaller with the -w option"); if (q == 0) return (inlen); }; is = 0; // is is the input position index id = 0; // id is the destination position index // // First time through the loop, for :*: (variable expansion) // for (is = 0; is <= inlen && id < maxlen; is++) { if (is <= inlen - 5 // check only if :*:c:" possible && buf[is] == ':' && ( buf[is+1] == '*' ) && buf[is+2] ==':') { // yes, it's probably an expansion of some sort. opchar = buf[is+1]; // copy everything from the colon to the second colon // ( or the end of the string) into the vname buffer. is = is + 2; vname [0] = buf[is]; vlen = 1; is++; while (is < maxlen && is <= inlen && buf [is] != ':') { vname[vlen] = buf[is]; is++; vlen++; }; // // check for the second colon as well... if (buf[is] == ':') { vname[vlen] = ':'; vlen++; } vname [vlen] = '\000'; // // Now we've got the variable name in vname, we can // go get it's value and copy _that_ into tbuf as well. if (internal_trace) fprintf (stderr, "looking up variable >%s<\n", vname); vht_index = crm_vht_lookup (vht, vname, vlen); if (vht[vht_index] == NULL) { // there was no variable by that name, use the text itself switch (opchar) { case '*': { // // simply copy text till the close colon // for (q = 0; q < vlen && id < maxlen; q++) { tbuf[id] = vname[q]; id++; } } break; } } else { // There really was a variable value by that name. // suck it out, and splice it's text value // if this was :_iso:, update iso's length if (strncmp( (char *) &vht[vht_index]->nametxt[vht[vht_index]->nstart], ":_iso:", 6) == 0) { vht[vht_index]->vlen = tdw->nchars; }; switch (opchar) { case '*': { for (q = 0; q < vht[vht_index]->vlen && id < maxlen; q++) { tbuf[id] = vht[vht_index]->valtxt [(vht[vht_index]->vstart)+q]; id++; } } break; }; }; } // Now, handle the case where we were NOT looking at // :*:c: in buf else { tbuf[id] = buf[is]; id++; } } // // // Second time through the loop - expand :#: (string lengths) // strncpy (buf, tbuf, id); buf[id] = '\000'; inlen = id-1 ; // since id gets one last increment. if (internal_trace) fprintf (stderr, " var-expand yields: =%s= len %ld\n", buf, inlen); id = 0; for (is = 0; is <= inlen && id < maxlen; is++) { if (is <= inlen - 5 // check only if :#:c:" possible && buf[is] == ':' && ( buf[is+1] == '#' ) && buf[is+2] ==':') { // yes, it's probably an expansion of some sort. opchar = buf[is+1]; // copy everything from the colon to the second colon // into the vname buffer. is = is + 2; vname [0] = buf[is]; vlen = 1; is++; while (is < maxlen && is <= inlen && buf [is] != ':') { vname[vlen] = buf[is]; is++; vlen++; }; // // check for the second colon as well... if (buf[is] == ':') { vname[vlen] = ':'; vlen++; } vname [vlen] = '\000'; // // Now we've got the variable name in vname, we can // go get it's value and copy _that_ into tbuf as well. if (internal_trace) fprintf (stderr, "looking up variable >%s<\n", vname); vht_index = crm_vht_lookup (vht, vname, vlen); if (vht[vht_index] == NULL) { // there was no variable by that name, use the text itself switch (opchar) { case '#': { char lentext[MAX_VARNAME]; int m, mm; // the vlen-2 is because we need to get rid of the ':' sprintf (lentext, "%ld", vlen-2); mm = strlen (lentext); for (m = 0; m < mm && id < maxlen; m++) { tbuf[id] = lentext[m]; id++; }; } break; } } else { // There really was a variable value by that name. // suck it out, and splice it's text value // if this was :_iso:, update iso's length if (strncmp( (char *) &vht[vht_index]->nametxt[vht[vht_index]->nstart], ":_iso:", 6) == 0) { vht[vht_index]->vlen = tdw->nchars; }; switch (opchar) { case '#': { // // Actually, we want the _length_ of the variable // char lentext[MAX_VARNAME]; int m, mm; sprintf (lentext, "%ld", vht[vht_index]->vlen); mm = strlen (lentext); for (m = 0; m < mm && id < maxlen; m++) { tbuf[id] = lentext[m]; id++; }; }; break; }; }; } // Now, handle the case where we were NOT looking at // :*:c: in buf else { tbuf[id] = buf[is]; id++; } } // // // Third pass - handle :@: (math evaluations) // // strncpy (buf, tbuf, id); buf[id] = '\000'; inlen = id - 1; // since id got one extra increment if (internal_trace) fprintf (stderr, " length-expand yields: =%s= len %ld\n", buf, inlen); id = 0; for (is = 0; is <= inlen && id < maxlen; is++) { if (is <= inlen - 5 // check only if :*:c:" possible && buf[is] == ':' && ( buf[is+1] == '@' ) && buf[is+2] ==':') { // yes, it's probably an expansion of some sort. opchar = buf[is+1]; // copy everything from the colon to the second colon // into the vname buffer. is = is + 2; vname [0] = buf[is]; vlen = 1; is++; while (is < maxlen && is <= inlen && buf [is] != ':') { vname[vlen] = buf[is]; is++; vlen++; }; // // check for the second colon as well... if (buf[is] == ':') { vname[vlen] = ':'; vlen++; } vname [vlen] = '\000'; // // Now we've got the variable name in vname, we can // go get it's value and copy _that_ into tbuf as well. if (internal_trace) fprintf (stderr, "looking up variable >%s<\n", vname); vht_index = crm_vht_lookup (vht, vname, vlen); if (vht[vht_index] == NULL) { // there was no variable by that name, use the text itself switch (opchar) { case '@': { char mathtext[MAX_VARNAME]; int m, mm; strncpy (mathtext, &vname[1], vlen-2); mathtext[vlen-2] = '\000'; if (internal_trace) fprintf (stderr, "In-Mathtext is -'%s'-\n", mathtext); m = strmath (mathtext, vlen-2, MAX_VARNAME, retstat); if (internal_trace) fprintf (stderr, "Out-Mathtext is -'%s'-\n", mathtext); if (retstat && *retstat < 0) { q = fatalerror ("Problem during math evaluation of ", mathtext); if (q == 0) return (inlen); goto bailout; } mm = strlen (mathtext); for (m = 0; m < mm && id < maxlen; m++) { tbuf[id] = mathtext[m]; id++; }; } break; } } else { // There really was a variable value by that name. // suck it out, and splice it's text value // if this was :_iso:, update iso's length if (strncmp( (char *) &vht[vht_index]->nametxt[vht[vht_index]->nstart], ":_iso:", 6) == 0) { vht[vht_index]->vlen = tdw->nchars; }; switch (opchar) { case '@': { char mathtext[MAX_VARNAME]; int m, mm; m = 0; for (q = 0; q < vht[vht_index]->vlen && m < maxlen; q++) { mathtext[m] = vht[vht_index]->valtxt [(vht[vht_index]->vstart)+q]; m++; } mathtext[vlen-1] = '\000'; m = strmath (mathtext, vlen-2, MAX_VARNAME, retstat ); if (retstat && *retstat < 0) { q = fatalerror ("Problem during math evaluation of ", mathtext); if (q == 0) return (inlen); goto bailout; } mm = strlen (mathtext); for (m = 0; m < mm && id < maxlen; m++) { tbuf[id] = mathtext[m]; id++; }; } break; }; }; } // Now, handle the case where we were NOT looking at // :*:c: in buf else { tbuf[id] = buf[is]; id++; } } // That's all, folks! Clean up the temporary buffer. We null-terminate // it in case we need to do stupid non-8-bit-clean IO on it. tbuf[id] = '\000'; memmove (buf, tbuf, id); id--; // the actual length is id-1, since id is the next available char if (internal_trace) fprintf (stderr, " math-expand yields: =%s= len %ld\n", buf, id); free (tbuf); free (vname); if (internal_trace) { fprintf (stderr, " Returned length from qexpandvar is %ld\n", id); if (retstat) fprintf (stderr, "retstat was: %ld\n", *retstat); }; return (id); bailout: return (inlen); }