// crm_str_funcs.c - Controllable Regex Mutilator, version v1.0 // Copyright 2001-2004 William S. Yerazunis, all rights reserved. // // This software is licensed to the public under the Free Software // Foundation's GNU GPL, version 2. You may obtain a copy of the // GPL by visiting the Free Software Foundations web site at // www.fsf.org, and a copy is included in this distribution. // // Other licenses may be negotiated; contact the // author for details. // // include some standard files #include "crm114_sysincludes.h" // include any local crm114 configuration file #include "crm114_config.h" // include the crm114 data structures file #include "crm114_structs.h" // and include the routine declarations file #include "crm114.h" // the command line argc, argv extern int prog_argc; extern char **prog_argv; // the auxilliary input buffer (for WINDOW input) extern char *newinputbuf; // the globals used when we need a big buffer - allocated once, used // wherever needed. These are sized to the same size as the data window. extern char *inbuf; extern char *outbuf; extern char *tempbuf; // crm_extractflag - given an arbitrary string cmd (start/len) // with words delimited by spaces, and a second string "flag" // (start/len). // // 1) does "flag" exist in "cmd"? // 2) if so, where? // 3) what is the start/len of flag in cmd? // 4) what is the arg _after_ flag (start/len) // // Return value - pointer to start of flag in cmd. It's // unnecessary to return the length of flag, as we already know // what it is. also modifies nextarg start and length. long crm_extractflag (const char *cmd, long cmdl, const char *flag, long flagl, long *next, long *nextl) { long i, j, k; long is; if (internal_trace) { fprintf (stderr, " searching for flag "); for (k = 0; k < flagl; k++) fprintf (stderr, "%c", flag[k]); fprintf (stderr, " in "); for (k = 0; k < cmdl; k++) fprintf (stderr, "%c", cmd[k]); fprintf (stderr, "\n"); } // loop until we find the first character of flag, or are past end // of possible matching.. is = 0; firstcharscan: while ( cmd[is] != flag[0] && is <= (cmdl - flagl)) is++; if (is > cmdl - flagl) return (-1); // found the first char; now see if the rest of the chars match // as well. for ( i = is, j = 0; j < flagl ; i++, j++) { // if we don't match, just go up to firstcharscan , move one // character forward, and keep looking. if ( cmd [i] != flag [j] ) { is++; goto firstcharscan; } }; // If we got to here, we got thru the loop and found the flag, or // we're ar the end of cmd.. nextl = 0; if (is > cmdl - flagl ) { if (next) *next = -1; // mark next arg as invalid, if possible. return (-1); }; // check - has the caller requested next arg too? If not, we can // just return right now. if ( next == NULL ) return (is); // If we got here, we've found the flag and there's a request for // next argument as well. It's just a matter of going thru and // finding the next arg. // GROT GROT GROT This assumes that i is now pointed at the // last valid char of flag in cmd. That might not be true if // the flag was right at the end of the cmd. If it was, then // we should just return the data, markning next as invalid. if (i + 1 >= cmdl ) { *next = -1; return (is); }; // If we're here, there was space for a subsequent flag. // step forward until we hit a space or go off the end. while ( cmd[i] >= 0x021 && i < cmdl) i++; // did we fall off the end of cmd? If so, return "no next arg" if (i >= cmdl) { *next = -1; return (is); }; // No, we have valid spaces... skip over them. while (cmd[i] < 0x021 && i < cmdl) i++; // now we're hopefully into the nonblank chars... *next = i; while ( cmd[i] >= 0x021 && i < cmdl) { i++; *nextl = (*nextl) + 1; }; // and now we're completely done. Return from whence we came. return (is); } // strnhash - generate the hash of a string of length N // goals - fast, works well with short vars includng // letter pairs and palindromes, not crypto strong, generates // hashes that tend toward relative primality against common // hash table lengths (so taking the output of this function // modulo the hash table length gives a relatively uniform distribution // // In timing tests, this hash function can hash over 10 megabytes // per second (using as text the full 2.4.9 linux kernel source) // hashing individual whitespace-delimited tokens, on a Transmeta // 666 MHz. long strnhash (char *str, long len) { long i; long hval; char *hstr; char chtmp; // initialize hval hval= len; hstr = (char *) &hval; // for each character in the incoming text: for ( i = 0; i < len; i++) { // xor in the current byte against each byte of hval // (which alone gaurantees that every bit of input will have // an effect on the output) //hstr[0] = (hstr[0] & ( ~ str[i] ) ) | ((~ hstr [0]) & str[i]); //hstr[1] = (hstr[1] & ( ~ str[i] ) ) | ((~ hstr [1]) & str[i]); //hstr[2] = (hstr[2] & ( ~ str[i] ) ) | ((~ hstr [2]) & str[i]); //hstr[3] = (hstr[3] & ( ~ str[i] ) ) | ((~ hstr [3]) & str[i]); hstr[0] ^= str[i]; hstr[1] ^= str[i]; hstr[2] ^= str[i]; hstr[3] ^= str[i]; // add some bits out of the middle as low order bits. hval = hval + (( hval >> 12) & 0x0000ffff) ; // swap bytes 0 with 3 chtmp = hstr [0]; hstr[0] = hstr[3]; hstr [3] = chtmp; // rotate hval 3 bits to the left (thereby making the // 3rd msb of the above mess the hsb of the output hash) hval = (hval << 3 ) + (hval >> 29); } return (hval); }