//  crm_str_funcs.c  - Controllable Regex Mutilator,  version v1.0
//  Copyright 2001-2004  William S. Yerazunis, all rights reserved.
//  
//  This software is licensed to the public under the Free Software
//  Foundation's GNU GPL, version 2.  You may obtain a copy of the
//  GPL by visiting the Free Software Foundations web site at
//  www.fsf.org, and a copy is included in this distribution.  
//
//  Other licenses may be negotiated; contact the 
//  author for details.  
//
//  include some standard files
#include "crm114_sysincludes.h"

//  include any local crm114 configuration file
#include "crm114_config.h"

//  include the crm114 data structures file
#include "crm114_structs.h"

//  and include the routine declarations file
#include "crm114.h"

//    the command line argc, argv
extern int prog_argc;
extern char **prog_argv;

//    the auxilliary input buffer (for WINDOW input)
extern char *newinputbuf;

//    the globals used when we need a big buffer  - allocated once, used 
//    wherever needed.  These are sized to the same size as the data window.
extern char *inbuf;
extern char *outbuf;
extern char *tempbuf;


//     crm_extractflag - given an arbitrary string cmd (start/len)
//     with words delimited by spaces, and a second string "flag"
//     (start/len).
//
//      1) does "flag" exist in "cmd"?
//      2) if so, where?
//      3) what is the start/len of flag in cmd?
//      4) what is the arg _after_ flag (start/len)
//   
//     Return value - pointer to start of flag in cmd.  It's
//     unnecessary to return the length of flag, as we already know
//     what it is.  also modifies nextarg start and length.
 
long crm_extractflag (const char *cmd, long cmdl, const char *flag, long flagl,
		       long *next, long *nextl)
{
  long i, j, k;
  long is;

  if (internal_trace)
    { 
      fprintf (stderr, "      searching for flag ");
      for (k = 0; k < flagl; k++)
	fprintf (stderr, "%c", flag[k]);
      fprintf (stderr, " in ");

      for (k = 0; k < cmdl; k++)
	fprintf (stderr, "%c", cmd[k]);
      fprintf (stderr, "\n");
    }
  //     loop until we find the first character of flag, or are past end
  //     of possible matching..
  is = 0;
 firstcharscan:
  while ( cmd[is] != flag[0] 
	  && is <= (cmdl - flagl))
    is++;
  
  if (is > cmdl - flagl) 
    return (-1);
  //    found the first char; now see if the rest of the chars match
  //    as well.
  for (  i = is,  j = 0; j < flagl ; i++, j++)
    {
      //    if we don't match, just go up to firstcharscan , move one
      //    character forward, and keep looking.
      if ( cmd [i] != flag [j] )
	{
	  is++;
	  goto firstcharscan;
	}
    };

  //    If we got to here, we got thru the loop and found the flag, or 
  //    we're ar the end of cmd..
  nextl = 0;
  if (is > cmdl - flagl )
    {
      if (next) *next = -1;  //  mark next arg as invalid, if possible.
      return (-1);
    };
  //    check - has the caller requested next arg too?  If not, we can
  //    just return right now.
  if ( next == NULL )
       return (is);
	
  //    If we got here, we've found the flag and there's a request for
  //    next argument as well.  It's just a matter of going thru and
  //    finding the next arg.

  //    GROT GROT GROT   This assumes that i is now pointed at the 
  //    last valid char of flag in cmd. That might not be true if 
  //    the flag was right at the end of the cmd.  If it was, then
  //    we should just return the data, markning next as invalid.

  if (i + 1 >= cmdl )
    {
      *next = -1;
      return (is);
    };    


  //    If we're here, there was space for a subsequent flag.
  //    step forward until we hit a space or go off the end.
  while ( cmd[i] >= 0x021 && i < cmdl)
    i++;

  //    did we fall off the end of cmd?  If so, return "no next arg"
  if (i >=  cmdl)
    {
      *next = -1;
      return (is);
    };

  //   No, we have valid spaces... skip over them.
  while (cmd[i] < 0x021 && i < cmdl)
    i++;
  //       now we're hopefully into the nonblank chars...
  *next = i;
  while ( cmd[i] >= 0x021 && i < cmdl)
    {
      i++;
      *nextl = (*nextl) + 1;
    };

  //   and now we're completely done.  Return from whence we came.

  return (is);
}


//     strnhash - generate the hash of a string of length N
//     goals - fast, works well with short vars includng 
//     letter pairs and palindromes, not crypto strong, generates
//     hashes that tend toward relative primality against common
//     hash table lengths (so taking the output of this function
//     modulo the hash table length gives a relatively uniform distribution
//
//     In timing tests, this hash function can hash over 10 megabytes
//     per second (using as text the full 2.4.9 linux kernel source)
//     hashing individual whitespace-delimited tokens, on a Transmeta
//     666 MHz.

long strnhash (char *str, long len)
{
  long i;
  long hval;
  char *hstr;
  char chtmp;

  // initialize hval
  hval= len;

  hstr = (char *) &hval;

  //  for each character in the incoming text:

  for ( i = 0; i < len; i++)
    {
      //    xor in the current byte against each byte of hval
      //    (which alone gaurantees that every bit of input will have
      //    an effect on the output)
      //hstr[0] = (hstr[0] & ( ~ str[i] ) ) | ((~ hstr [0]) & str[i]);
      //hstr[1] = (hstr[1] & ( ~ str[i] ) ) | ((~ hstr [1]) & str[i]);
      //hstr[2] = (hstr[2] & ( ~ str[i] ) ) | ((~ hstr [2]) & str[i]);
      //hstr[3] = (hstr[3] & ( ~ str[i] ) ) | ((~ hstr [3]) & str[i]);

      hstr[0] ^= str[i];
      hstr[1] ^= str[i];
      hstr[2] ^= str[i];
      hstr[3] ^= str[i];

      //    add some bits out of the middle as low order bits.
      hval = hval + (( hval >> 12) & 0x0000ffff) ;
		     
      //     swap bytes 0 with 3 
      chtmp = hstr [0];
      hstr[0] = hstr[3];
      hstr [3] = chtmp;

      //    rotate hval 3 bits to the left (thereby making the
      //    3rd msb of the above mess the hsb of the output hash)
      hval = (hval << 3 ) + (hval >> 29);
    }
  return (hval);
}