// crm114_structs.h - Controllable Regex Mutilator structures, version X0.1 // Copyright 2001 William S. Yerazunis, all rights reserved. // // This software is licensed to the public under the Free Software // Foundation's GNU GPL, version 1.0. You may obtain a copy of the // GPL by visiting the Free Software Foundations web site at // www.fsf.org . Other licenses may be negotiated; contact the // author for details. // long vht_size; long cstk_limit; long max_pgmlines; long max_pgmsize; long max_pgmsize; long user_trace; long internal_trace; long debug_countdown; long cmdline_break; long cycle_counter; long ignore_environment_vars; long data_window_size; long sparse_spectrum_file_length; long profile_execution; // how should math be handled? // = 0 no extended (non-EVAL) math, use algebraic notation // = 1 no extended (non-EVAL) math, use RPN // = 2 extended (everywhere) math, use algebraic notation // = 3 extended (everywhere) math, use RPN long q_expansion_mode; // structure of a vht cell // note - each file gets an entry, with the name of the file // being the name of the variable - no colons! // // also note that there's no "next" pointer in a vht cell; this is because // we do in-table overflowing (if a table entry is in use, we use the next // available table entry, wrapping around. It's easy to change in any case. // typedef struct mythical_vht_cell { char *filename; // file where defined (or NULL) int filedesc; // filedesc of defining file (or NULL) char *nametxt; // block of text that hosts the variable name long nstart; // index into nametxt to start of varname long nlen; // length of name char *valtxt; // text block that hosts the captured value // vstart, vlen, mstart, and mlen are all measured // from the _start_ of valtxt, mstart relative to // vstart, etc!!! long vstart; // zero-base index of start of variable (inclusive) long vlen; // length of captured value : this plus vstart is where // you could put a NULL if you wanted to. long mstart; // zero-base start of most recent match of this var long mlen; // length of most recent match against this var; this // plus mstart is where you could put a NULL if you // wanted to. long linenumber; // linenumber of this variable (if known, else -1) } VHT_CELL; // structure of a microcompile table cell (one such per statement) // // These table entries get filled in during microcompile operation. // typedef struct mythical_mct_cell { char *hosttxt; long start; // zero-base index of start of statement (inclusive) long fchar; // zero-base index of non-blank stmt (for prettyprint) long achar; // zero-base index of start of args; long stmt_utime; // user time spent in this statement line; long stmt_stime; // system time spent in this statement line; int stmt_type; // statement type of this line int nest_level; // nesting level of this statement int fail_index; // if this statement failed, where would we go? int liaf_index; // if this statement liafed, where would we go? int trap_index; // if this statement faults, where would we go? int stmt_break; // 1 if "break" on this stmt, 0 otherwise. } MCT_CELL; // structure of a control stack level cell. // Nota Bene: CSL cells are used to both retain toplevel data about // any particular file being executed as well as being used to retain // data on any file that is data! If a file is executable, then the // mct pointer is a pointer to the compiled MCT table, else the mct // pointer is a NULL and the file is not executable. // typedef struct mythical_csl_cell { char *filename; //filename if any long rdwr; // 0=readonly, 1=rdwr long filedes; // file descriptor it's open on (if any) char *filetext; // text buffer long nchars; // characters of data we have long hash; // hash of this data (if done) MCT_CELL **mct; // microcompile (if compiled) long nstmts; // how many statements in the microcompile long preload_window; // do we preload the window or not? long cstmt; // current executing statement of this file void *caller; // pointer to this file's caller (if any) long aliusstk[MAX_BRACKETDEPTH]; // the status stack for ALIUS } CSL_CELL; // The argparse block is filled in at run time, though at least in // principle it could be done at microcompile time, but var-expansion // needs to be done at statement execution time.. // typedef struct mythical_argparse_block { char *a1start; long a1len; char *p1start; long p1len; char *p2start; long p2len; char *p3start; long p3len; char *b1start; long b1len; char *s1start; long s1len; char *s2start; long s2len; long sflags; } ARGPARSE_BLOCK; typedef struct { unsigned long hash; unsigned long key; unsigned long value; } FEATUREBUCKET_STRUCT; typedef struct { unsigned char version[4]; unsigned long flags; unsigned long skip_to; } FEATURE_HEADER_STRUCT; typedef struct { unsigned long hash; unsigned long key; float value; } WINNOW_FEATUREBUCKET_STRUCT; typedef struct { clock_t tms_utime; // user time clock_t tms_stime; // system time clock_t tms_cutime; // user time of children clock_t tms_cstime; // system time of children } TMS_STRUCT; // define statement types for microcompile // #define CRM_BOGUS 0 #define CRM_NOOP 1 #define CRM_EXIT 2 #define CRM_RETURN 3 #define CRM_LABEL 4 #define CRM_OPENBRACKET 5 #define CRM_CLOSEBRACKET 6 #define CRM_GOTO 7 #define CRM_MATCH 8 #define CRM_FAIL 9 #define CRM_LIAF 10 #define CRM_ACCEPT 11 #define CRM_TRAP 12 #define CRM_FAULT 13 #define CRM_OUTPUT 14 #define CRM_INVOKE 15 #define CRM_EXECUTE 16 #define CRM_WINDOW 17 #define CRM_ALTER 18 #define CRM_gooblegooblegooble_deprecated 19 #define CRM_LEARN 20 #define CRM_CLASSIFY 21 #define CRM_ISOLATE 22 #define CRM_PERM 23 #define CRM_INPUT 24 #define CRM_SYSCALL 25 #define CRM_HASH 26 #define CRM_INTERSECT 27 #define CRM_UNION 28 #define CRM_EVAL 29 #define CRM_ALIUS 30 #define CRM_UNIMPLEMENTED 31 // FLAGS FLAGS FLAGS // all of the valid CRM114 flags are listed here // // match searchstart flags #define CRM_FROMSTART (1 << 0) #define CRM_FROMNEXT (1 << 1) #define CRM_FROMEND (1 << 2) #define CRM_NEWEND (1 << 3) #define CRM_FROMCURRENT (1 << 4) // match control flags #define CRM_NOCASE (1 << 5) #define CRM_ABSENT (1 << 6) #define CRM_BASIC (1 << 7) #define CRM_NOMULTILINE (1 << 8) #define CRM_BACKWARDS (1 << 9) // window-input flags #define CRM_BYCHAR (1 << 10) #define CRM_BYEOF (1 << 11) #define CRM_EOFACCEPTS (1 << 12) #define CRM_EOFRETRY (1 << 13) // process control flags #define CRM_KEEP (1 << 14) #define CRM_ASYNC (1 << 15) // learn and classify #define CRM_REFUTE (1 << 16) #define CRM_MICROGROOM (1 << 17) #define CRM_LOGBOOST (1 << 18) #define CRM_APPEND (1 << 19) // // literal is a new match control flag... thanks, Ville. #define CRM_LITERAL (1 << 20) #define CRM_BYLINE (1 << 21) #define CRM_MARKOVIAN (1 << 22) #define CRM_OSB_BAYES (1 << 23) #define CRM_NEURAL_NET (1 << 24) #define CRM_CORRELATE (1 << 25) #define CRM_OSB_WINNOW (1 << 26) #define CRM_MAXFLAGS 26 //***************************************************************** // // The following table describes the statements allowed in CRM114. // // Each entry is one line of STMT_TABLE_TYPE, and gives the text // representation of the command, the internal dispatch code, // whether the statement is "executable" or not, what the minimum // and maximum number of slash-groups, paren-groups, and box-groups // are for the statement to make sense, and what flags are allowed // for that statement. // typedef struct { char *stmt_name; int stmt_code; int namelen; int is_executable; int minslashes; int maxslashes; int minparens; int maxparens; int minboxes; int maxboxes; int flags_allowed_mask; } STMT_TABLE_TYPE; // The compiler file actually contains this "for real", the // extern here is merely a reference to it. // #ifndef BASE_COMPILER_TABLE_HERE extern STMT_TABLE_TYPE stmt_table[]; #endif // these defines are for arg type... note that they must remain synched // IN THIS ORDER with the start chars and end chars in crm_statement_parse // #define CRM_ANGLES 0 #define CRM_PARENS 1 #define CRM_BOXES 2 #define CRM_SLASHES 3 // The possible exit codes #define CRM_EXIT_OK 0 #define CRM_EXIT_ERROR 1 #define CRM_EXIT_FATAL 2 #define CRM_EXIT_APOCALYPSE 666