//  crm114_config.h  - Controllable Regex Mutilator base config, version X0.1
//  Copyright 2001 William S. Yerazunis, all rights reserved.
//  
//  This software is licensed to the public under the Free Software
//  Foundation's GNU GPL, version 1.0.  You may obtain a copy of the
//  GPL by visiting the Free Software Foundations web site at
//  www.fsf.org .  Other licenses may be negotiated; contact the 
//  author for details.  
//
//   default size of the variables hashtable (a.k.a. the VHT)
#define DEFAULT_VHT_SIZE 4095
 
//   default limit on the control stack (for catching infinite loops,
//   not a preallocated variable)
#define DEFAULT_CSTK_LIMIT 1024

//   how many levels (pending operations) will we allow in 
//   math evaluations.  We _could_ have it be unlimited, but
//   this serves as an error catcher in runaway programs.
#define DEFAULT_MATHSTK_LIMIT 1024

//   default maximum number of lines in any program file
#define DEFAULT_MAX_PGMLINES 10000

//   define maximum number of inserts before we think we're in an
//   infinite loop...
#define DEFAULT_MAX_INSERTS 1024

//   default size of the data window: 8 megabytes.
#define DEFAULT_DATA_WINDOW  8388608
//#define DEFAULT_DATA_WINDOW 16777216
//#define DEFAULT_DATA_WINDOW 1048576

//    unmap cacheing length - only actually write out this often.
#define UNMAP_COUNT_MAX 500


//    do we use Sparse Binary Polynomial Hashing (sensitive to both
//    sequence and spacing of individual words), Token Grab Bag, or
//    Token Sequence Sensitive?  Testing against the SpamAssassin
//    "hard" database shows that SBPH, TGB, and TGB2, are somewhat
//    more accurate than TSS, and about 50% more accurate than First
//    Order Only.  However, that's for English, and other natural
//    languages may show a different statistical distribution.
//
//    Choose ONE of the following:
//          SBPH, TGB2, TGB, TSS, or ARBITRARY_WINDOW_LEN:
//   
//    *** DANGER, WILL ROBINSON ***  You MUST rebuild your .css files from
//    samples of text if you change this.  
//
//
//     Sparse Binary Polynomial Hashing
#define SBPH
//
//     Token Grab Bag, noaliasing
//#define TGB2
//
//     Token Grab Bag, aliasing
//#define TGB
//
//     Token Sequence Sensitive
//#define TSS
//
//     First Order Only (i.e. single words, like SpamBayes)
//    Note- if you use FOO, you must turn off weights!!
//#define FOO
//
//     Generalized format for the window length.
//
//  DO NOT SET THIS TO MORE THAN 10 WITHOUT LENGTHENING hctable
//  the classifier modules !!!!!!  "hctable" contains the pipeline
//  hashing coefficients and needs to be extended to 2 * WINDOW_LEN
//
//     Generic window length code
//#define ARBITRARY_WINDOW_LENGTH
//
#define MARKOVIAN_WINDOW_LEN 5
//
#define OSB_BAYES_WINDOW_LEN 5
//
//      DO NOT set this to more than 5 without lengthening the
//      htup1 and htup2 tables in crm_unified_bayes.c
//
#define UNIFIED_BAYES_WINDOW_LEN 5
//
//   
//         Winnow algorithm parameters here...  
//
#define OSB_WINNOW_WINDOW_LEN 5
#define OSB_WINNOW_PROMOTION 1.23
#define OSB_WINNOW_DEMOTION 0.83
//
//     Now, choose whether we want to use the "old" or the "new" local
//     probability calculation.  The "old" one works slightly better
//     for SBPH and much better for TSS, the "new" one works slightly
//     better for TGB and TGB2, and _much_ better for FOO
//
//     The current default (not necessarily optimal) 
//     is Markovian SBPH, STATIC_LOCAL_PROBABILITIES, 
//     LOCAL_PROB_DENOM = 16, and SUPER_MARKOV
//
//#define LOCAL_PROB_DENOM 2.0 
#define LOCAL_PROB_DENOM 16.0 
//#define LOCAL_PROB_DENOM 256.0 
#define STATIC_LOCAL_PROBABILITIES
//#define LENGTHBASED_LOCAL_PROBABILITIES
//
//#define ENTROPIC_WEIGHTS
//#define MARKOV_WEIGHTS
#define SUPER_MARKOV_WEIGHTS
//#define BREYER_CHHABRA_SIEFKES_WEIGHTS
//#define BREYER_CHHABRA_SIEFKES_BASE7_WEIGHTS
//#define BCS_MWS_WEIGHTS
//#define BCS_EXP_WEIGHTS 
//
//
//    Do we use learncount-based normalization in calculating probabilities?
#define OSB_LEARNCOUNTS
//
//
//    Do we take only the maximum probability feature?
//
//#define USE_PEAK
//
//
//    Should we use stochastic microgrooming, or weight-distance microgrooming-
//    Make sure ONE of these is turned on.
//#define STOCHASTIC_AMNESIA
#define WEIGHT_DISTANCE_AMNESIA
//
//    define the default max chain length in a .css file that triggers
//    autogrooming, the rescale factor when we rescale, and how often
//    we rescale, and what chance (mask and key) for any particular 
//    slot to get rescaled when a rescale is triggered for that slot chain.
//#define MICROGROOM_CHAIN_LENGTH 1024
#define MICROGROOM_CHAIN_LENGTH 256
//#define MICROGROOM_CHAIN_LENGTH 64
#define MICROGROOM_RESCALE_FACTOR .75
#define MICROGROOM_STOCHASTIC_MASK 0x0000000F
#define MICROGROOM_STOCHASTIC_KEY  0x00000001
#define MICROGROOM_STOP_AFTER 32    //  maximum number of buckets groom-zeroed
                                    
#define FEATURE_HIT_INCREMENT_SIZE 7

//    define the "block ratio" of how of a memory data window we're
//    willing to suck in from a minion process before we block on
//    input.  Normally a factor of 2 (1/4th of the size of a full memory
//    window, or 2 megabytes in the default configuraton) is sufficient.
#define SYSCALL_WINDOW_RATIO 2

//   define default internal debug level
#define DEFAULT_INTERNAL_TRACE_LEVEL 0

//   define default user debug level
#define DEFAULT_USER_TRACE_LEVEL 0

//   define maximum number of parenthesized sub regexes we'll accept
#define MAX_SUBREGEX 256

//   define maximum bracket depth nesting we'll allow....
#define MAX_BRACKETDEPTH 256

//   define maximum number of iterations allowed for EVAL expansion
//#define MAX_EVAL_ITERATIONS 16384
//#define MAX_EVAL_ITERATIONS 1024
#define MAX_EVAL_ITERATIONS 4096

//   define maximum size of a pattern in bytes
#define MAX_PATTERN 16384

//    and how long can a variable name be
#define MAX_VARNAME 2048

//   define the default number of bytes in a learning file hash table
//   (note that this should be a prime number, or at least one with a 
//    lot of big factors)
//
//       this value (1048577) is one more than a meg, for a .css of 12 megs
#define DEFAULT_SPARSE_SPECTRUM_FILE_LENGTH 1048577
#define DEFAULT_MARKOVIAN_SPARSE_SPECTRUM_FILE_LENGTH 1048577
#define DEFAULT_OSB_BAYES_SPARSE_SPECTRUM_FILE_LENGTH 1048577
#define DEFAULT_WINNOW_SPARSE_SPECTRUM_FILE_LENGTH 1048577


//       this value (2097153) is one more than 2 megs, for a .css of 24 megs
//#define DEFAULT_SPARSE_SPECTRUM_FILE_LENGTH 2097153
//

//    define the maximum length of a filename
#define MAX_FILE_NAME_LEN 255

//    define how many microseconds to sleep waiting for a minion process
//    to complete:
//#define MINION_SLEEP_USEC 1000000
//#define MINION_SLEEP_USEC 10000
//#define MINION_SLEEP_USEC 1000
//#define MINION_SLEEP_USEC 100
#define MINION_SLEEP_USEC 10

//    How many microseconds to sleep if we're looping on input WINDOW stmt.
//    try 1 millisecond for now
#define INPUT_WINDOW_SLEEP_USEC 1000

//     Maximum number of different .CSS files in a CLASSIFY
#define MAX_CLASSIFIERS 128

//     Maximum number of nonfatal errors we'll allow before tossing our
//     cookies on a fatal error
#define MAX_NONFATAL_ERRORS 100

//     How big is a feature bucket?  Is it a byte, a short, a long, 
//     a float, whatever.  :)
#define FEATUREBUCKET_TYPE FEATUREBUCKET_STRUCT
#define FEATUREBUCKET_VALUE_MAX 32767
//#define FEATUREBUCKET_VALUE_MAX 1000000
//#define FEATUREBUCKET_TYPE unsigned short

//    End of configurable parameters.

