#! @BIN_PATH@/crm -( learnspam learnnonspam learnfile stats_only config spamcss nonspamcss fileprefix force unlearn )
#
#        a whitelist / blacklist / command based mail sorter
#
#    Copyright (C) 2002-2004 William S. Yerazunis; licensed under the 
#    GNU Public License (GPL) version 2.  A copy of this license is included 
#    in the distribution media, or obtain one from www.fsf.org .
#
#   Note to BSD users - you MUST remove EVERYTHING on the first line of this
#   program from the first "-" to the end or you will not get what you 
#   expect.  This is due to a bug in the BASH code on BSD.
#
# --->>>     Design Philosophy ( do these IN ORDER )	
#
#       *  if --fileprefix is specified, all filenames EXCEPT --config 
#            are prefixed with that (You need a trailing slash on the prefix 
#            if it is a directory name.)
#       *  if --config , grab the config file from the specified place.
#       *  Load the mailfilter.cf config file from wherever config or 
#            fileprefix points (--config overrides --fileprefix).
#       *  If --spamcss is specified, use that as the spam.css file
#       *  If --nonspamcss is specified, use that as the nonspam.css file
#       *  If --learnspam, learn as spam and exit.
#	*  If --learnnonspam, learn as nonspam and exit
#       *  If --force, force-feed the learning
#       *  If --learnfile, use :learnfile:.css and :learnfile:text.txt
#       *  If --stats_only, do normal classification but don't do any 
#                forwarding, only output the status on stdout and return
#                the exit code.
#
#	*  check for the "command word", if present, execute the command
# 
#	*  check to see if any of the whitelist patterns apply.  If so, 
#	accept the mail to /var/spool/the_user (which is actually to
#       be found at /var/spool/mail/:*:_env_USER:
#
#       *  check to see if any of the blacklist patterns apply.  If so,
#       flush the mail to the "blacklisted" file.
#
#       *  check to see if it's commanded to be learned as a spam or a
#       nonspam model.  If so, learn it to the appropriate .css (Crm 
#       Sparse Spectra) file
#
#	*  run the email through the classifier.  If the classifier thinks
#  	it's good, send it to the /var/spool/mail/the_user file, else 
#	send it to the "doubtful" file.
#
##############################################################
#
#    ---  uncomment this if you want to include a "forced" 
#         configuration file  ---
# insert mailfilterconfig.crm
#
#
#    --- make a safe space to keep the results of our work ---
#
isolate (:classifier_reason:) /no reason yet/
#
isolate (:our_exit_code:) /0/
#
isolate (:stats:) / pR: 0.000000 /
#
isolate (:pr:) / pR: 0.00000/
#
isolate (:subj_text:) / (None) /
#
isolate (:add_extra_stuff:) //
#
#      Isolate these and give them values, in case the user doesn't.
isolate (:reject_address:) //
isolate (:fail_priority_mail_to:) //
isolate (:fail_blacklist_mail_to:) //
isolate (:fail_SSM_mail_to:)  //
isolate (:log_rejections:) //
#
#      this ISOLATE will gaurantee that :fileprefix: exists, and keep it's
#      prior (commandline) value if it does, and an empty string if it doesnt
isolate (:fileprefix:)
#
#       This ISOLATE will gaurantee that :force: will exist, and keep the
#       commandline value ("SET") , or the null string if the user doesn't 
#       use --force on the command line.
isolate (:force:)
#
#       This ISOLATE will gaurantee that :unlearn: will exist, and will keep
#       the commandline value ("SET") or the null string if the user doesn't
#       use --unlearn on the command line.
isolate (:unlearn:)
#
#       now, :clf: is the classify & learn flags; note that we have two 
#       separate flags here in a bizarre chain.  The reason is that :unlearn:
#       can have the value "SET", whereas :rft: needs "refute"
isolate (:clf:) //
#
#       This is the code to read the per-user configuration.  Note
#       that because this happens during the run, it will _override_
#       any comand line arguments that get set.
{
	isolate (:option_txt:)
	isolate (:ev:)
	isolate (:verbose_startup:)
#   read in the mail filter configuration file.
    isolate (:config:)
    {
        {
            match [:config:] /.+/
            input [:*:config:] (:option_txt:)
        }
        alius
        {
#           read in the standard mail filter configuration file.
               input [:*:fileprefix:mailfilter.cf] (:option_txt:)
        }
    }

#	output /options == :*:option_txt:\n/
#   reset loop for matching to start of :option_txt:
	match [:option_txt:] //

#   and loop till there are no more options.
	{
#   find a line that looks like a parameter setting...
		match < fromend nomultiline > (:line: :name: :value:) \
                 [:option_txt:]  /^[ ]*(:[[:graph:]]+:)[ \t]+\/(.*)\//
#	         [:option_txt:] /^([[:graph:]]+).*\/(.*)\/.*$/
		{
#   don't execute the assign if there's a # at the start of the line.
			match <absent> [:name:] /^\x23/
			{
#   debugging print	
				match [:verbose_startup:] /SET/
				output / :*:name:\n    :*:value:\n/
			}
			isolate (:*:name:) /:*:value:/
		}
		liaf
	} 
}
#
#     Now, a wierdness - we need to add "unlearn" to the :clf: 
#     if it was in the line params - but append, not replace.
#  
{
	match [:unlearn:] /SET/
	alter (:clf:) /:*:clf: refute/
}


#
#
#    Do a quick check- has the password been changed or not?  If it's
#    still the default, put in something that will be well-nigh unguessable
#    (esp. since it will contain recieved headers that the sender cannot
#    see nor control.)
{
	match [:spw:] /DEFAULT-PASSWORD/
	#  yes, it's the same as default.  So we scramble it just so
	#  nobody can hack in 
	hash (:spw:) /:*:_env_string::*:_dw:/
}
#
isolate (:reject_address:) /:*:general_fails_to:/
{
	match [:fail_priority_mail_to:] <absent> /[[:graph:]]/
	alter (:fail_priority_mail_to:) /:*:general_fails_to:/
}
{
	match [:fail_blacklist_mail_to:] <absent> /[[:graph:]]/
	alter (:fail_blacklist_mail_to:) /:*:general_fails_to:/
}
{
	match [:fail_SSM_mail_to:] <absent> /[[:graph:]]/
	alter (:fail_SSM_mail_to:) /:*:general_fails_to:/
}
#
#      Does the user want us to log all incoming mail?  This is handy for
#      testing and auditing purposes.
{
	match [:log_to_allmail.txt:] /yes/
	output [:*:fileprefix:allmail.txt] <append> /:*:_dw:/
}
#
#   m_text is "mutilated text" - the result of all our
#  machinations and hackages.  It will become an annotated _copy_ 
#  of the incoming text, with mutilations.
#
isolate (:m_text:) //
isolate (:b_text:) /:*:_dw:/
isolate (:i_text:) /:*:_dw:/
#
#
#   To start with, the commanded text is assumed to be the entire input.
#   THEN
#   If there's a command followed by text, we save the text so we can
#    put that, and _only_ that, into the .txt corpi.
{
	isolate (:cmd_txt:) /:*:_dw:/
	match (:z: :cmd_txt:) [:_dw:] /command :*:spw: [^\n]*\n(.*)/
}
#
#
#	do we do any expansions?
{

	#   expansion 1: - do we perform base64 expansions?
	{
		{
	        	match [:do_base64:] /yes/
    			{
			    #  yes, expand base64's if there are any
			    #
			    #    Note: some spams don't even bother to use
                            #    a 'Content-Transfer-Encoding' marker,
			    #    and even fewer use Content-Type: text/whatever
			    #    so we have to sort of wing it, when to expand
			    #    what _might_ be base64 and when to ignore it.
			    #    For now, if it says it's a base64, it gets
			    #    expanded, no matter what the type.  Maybe
			    #    someday someone will put in a lockout for
			    #    things like .jpg files, .doc files, etc.
                            # 
			    isolate (:exp_text:)
			    match [:b_text:] <nocase> (:a: :h: :b:) /(Content-Transfer-Encoding): base64(.*)/
			    match (:c:) [:b:] /([a-zA-Z0-9+=!\/]+:*:_nl:){2,200}/
			    #
			    syscall (:*:c:) (:exp_text:) /:*:mime_decoder: /
			    #   and stuff the result back into b_text for
			    #   classification right in context.
			    alter (:c:) /:*:exp_text:/
			    #   and mark this piece of mime as "prior".
			    alter (:h:) /Content-Transfer-Prior-Encoding/
			    #   repeat till no more Mime base64 encodings 
			    liaf
			}
    		}
		alius
		{
			#   if no base64 expansions enabled, empty out :b_text:
			#			
			alter (:b_text:) //
		}
	} 
	#
	#   If we had expansions, bust the html contents out of them, otherwise
	#   ignore b_text as it's redundant
	{
		{
			match [:b_text:] <nocase> /Content-Transfer-Prior-Encoding/
			alter (:i_text:) /:*:b_text:/
		}
		alius
		{
			#   if :b_text: _didn't_ have a base64, it's useless
			alter (:b_text:) //
		}
	}
	#   expansion 2 :  do we bust HTML comments ( a.k.a. 
	#    hypertextus interruptus) out?
	{
		match [:undo_interruptus:] /yes/
		isolate (:commentbin:) //
		{
		    match [:i_text:] (:comment:) /<!--([^-]|-[^-]|--[^>])*-->/
		    alter (:commentbin:) /:*:commentbin: :*:comment:/
		    alter (:comment:) //
		    liaf
                }
		#     if we had at least 80 characters worth of comments, then
		#     it's worth using the decommented text, else not.
		#     (this my personal judgement call)
		{
                   {
                       match [:commentbin:] /(.){80,}/
                   }
                   alius
                   {
                       alter (:i_text:) //
                   }
		}
	}
}
#    and reassemble the mucked-over text into the :m_text: var, always
#    with the base64's expanded, then a second decommented copy 
#
{
       alter (:m_text:) /:*:_dw: :*:_nl: :*:b_text: :*:_nl: :*:i_text: :*:_nl:/
}
#
#   Do we want to do any rewrites before running?  
#    
{
   match [:rewrites_enabled:] /yes/
   isolate (:rewrites:)
   input (:rewrites:) [:*:fileprefix:rewrites.mfp]
#    reset matching on rewrites to start of string - if no string, no more
#    processing of rewrites !!
   match [:rewrites:] //
   #
   #
   {
       #    Grab the next regex; turn the one-per-line patterns into a 
       #    regex and a replacement string.
       #    First, do the line-spanning regexes.
       match <fromend nomultiline> (:ch: :fr: :to:) [:rewrites:]  /(.+)>-->(.*)/
       #    see if the "fr" regex matches anywhere
       {
	   match [:m_text:] (:place:) /:*:fr:/
	   #  Yep, it matched... alter it and do it again
	   #
	   alter (:place:) /:*:to:/
	   liaf
       }
       #   Nope, didn't match... grab the next regex and try again,
       liaf
   }
   #
   #     reset back to the start of the rewrites.
   #
   match [:rewrites:] //
   #
   #      and do it again for non-line-spanners
   {
       #    Go through and do it again, except this time do it for
       #    the non-line-spanning regexes.
       match <fromend nomultiline> (:ch: :fr: :to:) [:rewrites:]  /(.+)>->(.*)/
       #    see if the "fr" regex matches anywhere
       {
	   match [:m_text:] <nomultiline> (:place:) /:*:fr:/
	   #  Yep, it matched... alter it and do it again
	   #
	   alter (:place:) /:*:to:/
	   liaf
       }
       #   Nope, didn't match... grab the next regex and try again,
       liaf
   }
}    #  done with rewrites.
#
#
# -------------do we have a --learnspam or --learnnonspam command line key?
#
match (:text:) [:m_text:] /.*/
isolate (:c:) //
isolate (:learnspam: :learnnonspam: :learnfile: :stats_only:)
isolate ( :spamcss: :nonspamcss: )
{
	match <absent> [:spamcss:] /./
	alter (:spamcss:) /spam.css/
}
{
	match <absent> [:nonspamcss:] /./
	alter (:nonspamcss:) /nonspam.css/
}
{
	match [:learnspam:] /SET/
	goto /:learnspamhere:/
}
{
	match [:learnnonspam:] /SET/
	goto /:learnnonspamhere:/
}
{	
	match (:trash: :file:) [:learnfile:] /(.+)/
	goto /:learntofilehere:/
} 	

#
#
#------------  Are we enabled for "inoculations" via email?
#
{
  match [:inoculations_enabled:] /yes/
  #
  #  see if we have an inoculation header.
  #
  match <nomultiline> [:m_text:] \
	/Inoculation-Sender: ([[:graph:]]+)/ (:x: :sender:)
  match <nomultiline> [:m_text:] \
	/Inoculation-Type: ([[:graph:]]+)/ (:x: :type:)
  match <nomultiline> [:m_text:] \
	/Inoculation-Authentication: (.*)$/ (:x: :auth:)
  #
  #   See if the sender is in our list, and if so, what is their secret?
  #
  isolate (:inoculation_passwd:) //
  input (:inoculation_passwd:) [:*:fileprefix:inoc_passwd.txt]
  match [:inoculation_passwd:] <nomultiline> \
	/:*:sender: :*:type: (.*)/ (:x: :secret:)
  #
  #    We now have the shared secret, calculate the checksum we should have
  # 
  #     grab the body...
  match /\n\n(.*)/ (:x: :body:)
  #   
  #     and calculate the hash.
  isolate (:md5out:)
  syscall (:*:secret::*:_nl::*:body:) (:md5out:) /md5sum/
  match [:md5out:] /([[:graph:]]+)/ (:x: :md5out:)
  #
  #     does this hash match with the given hash?
  match [:auth:] /:*:md5out:/
  #   
  #   	Yes, it matched.  It's a real innoculation.
  #  
  #     grab the text we want to actually learn (this is the payload)
  match [:m_text:] (:x: :text:) /\n\n(.*)/		
  #
  #     and learn it appropriately
  {
	match [:type:] /nonspam/
	goto /:learnnonspamhere:/
  }
  alius
  {
        match [:type:] /spam/
	goto /:learnspamhere:/
  }	
}
#
# -------------check for the COMMAND WORD ----------
#
{
    #
    #    grab the command word as :c:, password as :pw:, and any arg(s) as :a:
    #
    match <nomultiline> (:z: :pw: :c: ) [:_dw:] /^command ([[:graph:]]+) (.*)/
    #
    #    check the password.  If it's invalid, FAIL out of this bracket set
    #    and just treat this as ordinary (non-command) mail.
    match [:pw:] /:*:spw:/			
    {
	#    was it a command to add something to the whitelist?
	match <nomultiline> (:q: :a:) [:c:] /whitelist (.*)/
	output [:*:fileprefix:whitelist.mfp] <append> /:*:a::*:_nl:/
	alter (:z:) /*** :*:z: *** :*:_nl:Whitelist command executed! :*:_nl:/
	accept
	exit /:*:accepted_mail_exit_code:/
    }
    {
	#    was it a command to add something to the blacklist?
	match <nomultiline> (:q: :a:) [:c:] /blacklist (.*)/
	output [:*:fileprefix:blacklist.mfp] <append> /:*:a::*:_nl:/
	alter (:z:) /*** :*:z: *** :*:_nl:Blacklist command executed! :*:_nl:/
	accept
	exit /:*:accepted_mail_exit_code:/
    }
   #
   #    Did the user specify command "force"?
   #
    {
	match <nomultiline> [:c:] /force/
	#
	#    yep, so we set the "force" on.
	alter (:force:) /SET/
    }

   #
   #    Did the user specify command "unlearn"?
   #
    {
	match <nomultiline> [:c:] /unlearn/
	#
	#    yep, so we set the "force" on.
	alter (:unlearn:) /SET/
    }
   #
   #     Now, if :unlearn: is set, by either bashline or command, we
   #     set the :clf: flag to be "refute".   Otherwise, we set it to
   #     be what it was before.
   #	
    {
        match [:unlearn:] /SET/
	alter (:clf:) /:*:clf: refute/
    }	

   #   
   #     Now, the big mahonka.  Learn as nonspam, or as spam
   #      (note the three subpaths - one each for non-forced, forced, and
   #      non-forced error messages)
   #

    {
	#     was it a command to learn something as nonspam?
	match [:c:] /nonspam/
	match (:z: :text:) [:m_text:] /:*:_nl:command [[:graph:]]+ nonspam(.*)/
	#      and learn it as nonspam
 :learnnonspamhere:	
	{
		#
		#    Verify that we need to learn this first (TOE strategy)
		classify <:*:clf:> (:*:fileprefix::*:spamcss: | :*:fileprefix::*:nonspamcss: ) [:text:] /:*:lcr:/
		output [:*:fileprefix:nonspamtext.txt] <append> /\n\n:*:cmd_txt:\n/
		#    
		#     write out the pre-mutilation text, with newlines
		#
		learn <:*:clf:> (:*:fileprefix::*:nonspamcss:) [:text:] /:*:lcr:/
		syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Action: LEARNED NONSPAM :*:clf:"/
		accept
		exit /:*:accepted_mail_exit_code:/
	}
	alius
	{
		#
		#    Did the user specify "--force" on the command line?
		match [:force:] /SET/
		output [:*:fileprefix:nonspamtext.txt] <append> /\n\n:*:cmd_txt:\n/
		#    
		#     write out the pre-mutilation text, with newlines
		#
		learn < :*:clf: > (:*:fileprefix::*:nonspamcss:) [:text:] /:*:lcr:/
		syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Action: LEARNED NONSPAM (FORCED) :*:clf:"/
		accept
		exit /:*:accepted_mail_exit_code:/
	}
	alius
	{
		syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Action: LEARN AS NONSPAM UNNECESSARY - ALREADY CLASSIFIED CORRECTLY - NO ACTION TAKEN"/ 
		accept
		exit /:*:accepted_mail_exit_code:/
	}		
    }
    {
	#     was it a command to learn something as spam?
	match [:c:] /spam/
	match (:z: :text:) [:m_text:] /:*:_nl:command [[:graph:]]+ spam(.*)/
	#      and learn it as spam
 :learnspamhere: 
	{
		#
		#    Verify that we need to learn this first (TOE strategy)
		classify <:*:clf:> (:*:fileprefix::*:nonspamcss: | :*:fileprefix::*:spamcss: ) [:text:] /:*:lcr:/
		#    
		#     write out the pre-mutilation text, with newlines
		#
		output [:*:fileprefix:spamtext.txt] <append> /\n\n:*:cmd_txt: \n/
		learn < :*:clf:> (:*:fileprefix::*:spamcss:) [:text:] /:*:lcr:/
		syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Action: LEARNED SPAM :*:clf:"/
		accept
		exit /:*:accepted_mail_exit_code:/
	}
	alius
	{
		#    Did the user specify "--force" on the command line?
		match [:force:] /SET/
		#    
		#     write out the pre-mutilation text, with newlines
		#
		output [:*:fileprefix:spamtext.txt] <append> /\n\n:*:cmd_txt: \n/
		learn < :*:clf:> (:*:fileprefix::*:spamcss:) [:text:] /:*:lcr:/
		syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Action: LEARNED SPAM (FORCED) :*:clf:"/
		accept
		exit /:*:accepted_mail_exit_code:/
	}
	alius
	{
		syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Action: LEARN AS SPAM UNNECESSARY - ALREADY CLASSIFIED CORRECTLY - NO ACTION TAKEN"/ 
		accept
		exit /:*:accepted_mail_exit_code:/
	}		
    }
    {
	#     was it a command to learn something as an arbitrary type?
	# Note: the files this generates don't get used for anything unless
	#  you use --spamcss and --nonspamcss in your own scripts.
	#
	# Note: these "learns" are a-priori "force", since we don't know
	#  what other .css files we should compare this text to.
	#
	match [:c:] /learn/
	match (:z: :learnfile: :text:) [:m_text:] /:*:_nl:command [[:graph:]]+ learn ([[:graph:]]+)(.*)/
	#      and learn it
:learntofilehere:
	output [:*:fileprefix::*:learnfile:text.txt] <append> /:*:text:/
	learn < :*:clf:> (:*:fileprefix::*:learnfile:.css) [:text:] /:*:lcr:/
	syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Action: LEARNED :*:file: :*:clf:"/
	accept
	exit /:*:accepted_mail_exit_code:/
    }

}
#
#
#     George's Circuit Breaker - if the mail has already been processed
#     by CRM114, then send it directly to output, without further
#     processing.
#   
#     WE DON'T RISK THIS ANY MORE - WITH ~ A MILLION USERS, WE'RE NOW 
#     A TARGET FOR SPAMMERS TO USE THIS HACK.
#{
#	match /X-CRM114/
#	alter (:classifier_reason:) / This mail seems to have already been processed by CRM114. /
#	alter (:stats:) / pR: 999.99 /
#	goto /:looks_good:/
#}

#     none of the above - classify this incoming mail instead.
#     first according to priority action list,
#     then according to whitelist,
#     then according to blacklist,
#     then according to the CRM sparse spectral classifier.
#   
#     check it against the priority action list- this list is 
#     of the form of a + or -, then a pattern.  + means accept,
#     - means reject.  These are executed in order (which is 
#     different from whitelist or blacklist in that they occur
#     in order given, not whitelist-then-blacklist.  The priority
#     action list is tried before whitelist or blacklist.
#
isolate (:priolist:)
input (:priolist:) [:*:fileprefix:priolist.mfp]
#    reset matching on :priolist: to the start of the string
match [:priolist:] //
#
#     
{
	#... Grab the next regexturn the one-per-line patterns into a regex
	match <fromend nomultiline> (:w: :pm: :pat:) [:priolist:]  /(.)(.+)/
	#... see if this regex matches the incoming mail
	{
		match <nomultiline> (:reason:) /:*:pat:/
		#  Yep, it matched... branch based on pm
		#
		{
			match [:pm:] /[+]/
			# put in a little tag saying why prio-listed
			alter (:classifier_reason:) /** ACCEPT: CRM114 Priority Whitelisted by: :*:reason: **:*:_nl:/
			alter (:stats:) / pR: 999.99 /
			goto /:looks_good:/
		}	
		#   No, we didn't have a +, was it a '-'?
		{
			match [:pm:] /[-]/
		        alter (:classifier_reason:) /** REJECT: CRM114 Priority Blacklisted by: :*:reason: **:*:_nl:/
		        alter (:reject_address:) /:*:fail_priority_mail_to:/
		        {
			      match [:log_rejections:] /yes/
			      output [:*:fileprefix:rejected_by_blacklist.txt] <append> /:*:_dw:/
		        }
			alter (:stats:) / pR: -999.99 /
			goto /:looks_bad:/
		}
	}
	#   Nope, didn't match as a priority... grab the next regex
	liaf
}
#
#
#     check it against the whitelist... load the whitelist...
{
	isolate (:whitelist:)
	input (:whitelist:) [:*:fileprefix:whitelist.mfp]
	#    reset matching on whitelist to start of string
	match [:whitelist:] //
}
#
#     
{
	#... Grab the next regexturn the one-per-line patterns into a regex
	match <fromend nomultiline> (:waste: :whregex:) [:whitelist:]  /(.+)/
	#... see if this regex matches the incoming mail
	{
		match <nomultiline> (:reason:) /:*:whregex:/
		#  Yep, it matched... whitelist this email
		#
		# put in a little tag saying why whitelisted:
		alter (:classifier_reason:) /** ACCEPT: CRM114 Whitelisted by: :*:reason: **:*:_nl:/
		alter (:_dw:) /:*:_dw: ** CRM114 Whitelisted by: :*:reason: **:*:_nl:/
		alter (:stats:) / pR: 999.99 /
		goto /:looks_good:/
	}
	#   Nope, didn't match... grab the next regex and try again,
	liaf
}

#
#    No joy, maybe we should blacklist it.
#
#     check it against the blacklist
{
	isolate (:blacklist:)
	input (:blacklist:) [:*:fileprefix:blacklist.mfp]
	#    reset matching on blacklist to start of string
	match [:blacklist:] //
}
#     
{
	#... Grab the next regexturn the one-per-line patterns into a regex
	match <fromend nomultiline> (:waste: :blregex:) [:blacklist:]  /(.+)/
	#... see if this regex matches the incoming mail
	{
		match <nomultiline> (:reason:) /:*:blregex:/
		#  Yep, it matched... blacklist this email
		#
		# put in a little tag saying why blacklisted
		alter (:classifier_reason:) /** REJECT: CRM114 Blacklisted by: :*:reason: ** :*:_nl:/
		alter (:reject_address:) /:*:fail_blacklist_mail_to:/
				{
			match [:log_rejections:] /yes/
			output [:*:fileprefix:rejected_by_blacklist.txt] <append> /:*:_dw:/
		}
		alter (:stats:) / pR: -999.99 /
		goto /:looks_bad:/
	}
	#   Nope, didn't match... grab the next regex and try again
	liaf
}
#
#
#
#    End of blacklist processing.  
#
#
#      All else has failed- we now run our CLASSIFY algorithm
#	to make our best guess.
#
#
{
	# Run the CSS classifier against the "expanded" text -
	# if it classifies as SPAM
	#   then reject it as SPAM.
	#
	{
		classify <:*:clf:> ( :*:fileprefix::*:nonspamcss: | :*:fileprefix::*:spamcss: ) ( :stats: ) [:m_text:] /:*:lcr:/
		alter (:classifier_reason:) /** ACCEPT: CRM114 PASS Markovian Matcher ** :*:_nl::*:stats:/
		goto /:looks_good:/
	}
	alter (:classifier_reason:) /** REJECT: CRM114 FAIL Markovian Matcher ** :*:_nl::*:stats:/
	alter (:reject_address:) /:*:fail_SSM_mail_to:/
	{
		match [:log_rejections:] /yes/
		output [:*:fileprefix:rejected_by_css.txt] <append> /:*:_dw:/
	}
	goto /:looks_bad:/
}
#
#
#         Final wrap-up routines - dispose of the mail as appropriate.
#
{
	:looks_bad:
	#   is this a :stats_only: run (i.e. for CAMRAM)
	{
		match [:stats_only:] /SET/
		match <nomultiline> [:stats:] (:d: :pval:) /pR: (.*)/
		output /:*:pval: :*:_nl:/
		alter (:our_exit_code:) /:*:rejected_mail_exit_code:/
		goto /:finish_up:/
	}
	
	#    not stats_only.... we're doing major output.
	{
	   {
		match [:add_headers:] /yes/
		{
			match <nomultiline> [:stats:] (:pr:) /pR: .*$/
		}
	        syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Version: :*:_crm_version: MF-:*:_pgm_hash:"/
	        syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Status: SPAM  ( :*:pr: )"/
	   }
 	   #
           #      	Now, get the Subject: line.  If none, make one.
  	   {
		{
		    match (:subject_line: :subj_text:) <nocase nomultiline> \
			 /^Subject: (.*)/	
		}
		alius
		{
			match (:end_of_headers:) /\n\n/
			alter (:end_of_headers:) /\nSubject: (none)\n\n/
			match (:subject_line: :subj_text:) <nomultiline> /^Subject: (.*)/	}
	   }
	   {
		#
		#   If we are re-sending this, we want to de-fang the
		#   subject, otherwise we don't.
		match [:reject_address:]  /[a-zA-Z0-9]/
		#   Paolo P. suggests this alteration to avoid subversion
		#   by enclosing an alternate target in "marks".  We always
		#   have to do this.
		{
			match (:dq:) [:subj_text:] /\$/
			alter (:dq:) /USD/
			liaf
		}
		{
			match (:dq:) [:subj_text:] /[^-a-zA-Z0-9!., ]/
			alter (:dq:) //
			liaf
		}
		#
		#     We isolate subj_text here, so if later syscalls move
                #     things, the subject text used in "mail" is still OK.
		isolate (:subj_text:)
           }
	   #
	   #     If the user asked for a spam-flagging string, put the flagging
	   #     string into the subject.
	   #
  	   {
		match [:spam_flag_subject_string:] /./
		alter (:subj_text:) \
		/:*:spam_flag_subject_string: :*:subj_text:/
	   }
	   {
		match [:add_extra_stuff:] /text/
		#   get rid of any first-column 'From's as they are message breaks!
		#   this isn't necessary if we're mailing to someplace else...
		{
			match (:f:) <nomultiline> [:m_text:] /^From/
			alter (:f:) />:*:f:/
			liaf
		}
	       	alter (:_dw:) /:*:_dw:-=-Extra Stuff-=-\n\n:*:m_text: -0-0-0- :*:_nl:/		
	  }
    	  {
		match [:add_extra_stuff:] /attachment/
		#   get rid of any first-column 'From's as they are message breaks!
		#   this isn't necessary if we're mailing to someplace else...
		{
			match (:f:) <nomultiline> [:m_text:] /^From/
			alter (:f:) / :*:f:/
			liaf
		}


		isolate (:content_type:) //
	        syscall (:*:_dw:) (:content_type:) /formail -X "Content-Type"/
		isolate (:content_transfer_encoding:) //
	        syscall (:*:_dw:) (:content_transfer_encoding:) /formail -X "Content-Transfer-Encoding"/

 	        syscall (:*:_dw:) (:_dw:) /formail -A "MIME-Version: 1.0"/

	        syscall (:*:_dw:) (:_dw:) /formail -A "Content-Type: multipart\/mixed\; boundary=Attachment_Quote_Boundary_1234567890\n--Attachment_Quote_Boundary_1234567890\n:*:content_type::*:content_transfer_encoding:"/
	        syscall (:*:_dw:) (:_dw:) /formail -U "Content-Type"/
        		alter (:_dw:) /:*:_dw::*:_nl:\
--Attachment_Quote_Boundary_1234567890 :*:_nl:\
Content-Type: text\/plain :*:_nl:\
Content-Transfer-Encoding: quoted-printable \n\n\n:*:m_text:\
\n--Attachment_Quote_Boundary_1234567890--\n/
      	}

#
#            Make sure the mail is well-formed
	   syscall (:*:_dw:) (:_dw:) /formail/
# 
#     Decide if we forward or if we just output it.
          {      
	     {
		#   if this match succeeds, we should forward-to-an-address? 
		# Yes, but only if we _have_ a forward-to address.
	        match [:reject_address:]  /[a-zA-Z0-9]/	   
 	        {
		  #  -- put the classifier reason in as the first thing!
		  match [:add_verbose_stats:] /yes/
		  alter (:_dw:) /:*:_nl: :*:classifier_reason::*:_nl: :*:_dw: /
	        }
	        syscall (:*:_dw:) /mail :*:reject_address: -s ':*:subj_text:'/ 
	     }
	     alius
             {
 	       {
	         # -- put the classifier reason in at the end of the headers 
		 match [:add_verbose_stats:] /yes/
		 match (:start_of_data:) /\n\n/
		 alter (:start_of_data:) /\n\n :*:classifier_reason: \n /
	       }
               accept		
             }
           }
        }
	alter (:our_exit_code:) /:*:rejected_mail_exit_code:/
	goto /:finish_up:/
}
#
#          and here's where we accept something as good email.
{
    :looks_good:
    #   is this a :stats_only: run (i.e. for CAMRAM)
    {
		match [:stats_only:] /SET/
		match <nomultiline> [:stats:] (:d: :pval:) /pR: (.*)/
		output /:*:pval: :*:_nl:/
		alter (:our_exit_code:) /:*:accepted_mail_exit_code:/
		goto /:finish_up:/
    }

    #   Not stats-only; do the full output thing.
    {
	match [:add_verbose_stats:] /yes/	
        alter (:_dw:)  /:*:_dw: :*:_nl: :*:classifier_reason: :*:_nl:/
    }
    {
	match [:add_headers:] /yes/
		{
			match <nomultiline> [:stats:] (:pr:) /pR: .*$/
		}
	        syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Version: :*:_crm_version: MF-:*:_pgm_hash: "/
        syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Status: Good  ( :*:pr: )"/
    }
    {
	match [:add_extra_stuff:] /text/
	#   get rid of any first-column 'From's as they are message breaks!
	#   this isn't necessary if we're mailing to someplace else...
	{
		match (:f:) <nomultiline> [:m_text:] /^From/
		alter (:f:) / :*:f:/
		liaf
	}
       	alter (:_dw:) /:*:_dw:-=-Extra Stuff-=-\n\n :*:m_text: -0-0-0- \n/
     }
     {
	match [:add_extra_stuff:] /attachment/
	#   get rid of any first-column 'From's as they are message breaks!	#   this isn't necessary if we're mailing to someplace else...
	{
		match (:f:) <nomultiline> [:m_text:] /^From/
		alter (:f:) / :*:f:/
		liaf
	}
	isolate (:content_type:) //
	syscall (:*:_dw:) (:content_type:) /formail -X "Content-Type"/
	isolate (:content_transfer_encoding:) //
	syscall (:*:_dw:) (:content_transfer_encoding:) /formail -X "Content-Transfer-Encoding"/
 	syscall (:*:_dw:) (:_dw:) /formail -A "MIME-Version: 1.0"/
	syscall (:*:_dw:) (:_dw:) /formail -A "Content-Type: multipart\/mixed\; boundary=Attachment_Quote_Boundary_1234567890\n--Attachment_Quote_Boundary_1234567890\n:*:content_type::*:content_transfer_encoding:"/
	syscall (:*:_dw:) (:_dw:) /formail -U "Content-Type"/
       	alter (:_dw:) /:*:_dw::*:_nl:\
--Attachment_Quote_Boundary_1234567890 :*:_nl:\
Content-Type: text\/plain :*:_nl:\
Content-Transfer-Encoding: quoted-printable \n\n\n:*:m_text:\
\n--Attachment_Quote_Boundary_1234567890--\n/
     }

    accept
    alter (:our_exit_code:) /:*:accepted_mail_exit_code:/
    goto /:finish_up:/
}
#
#         Here's where we finish up processing in all the paths.
#         
:finish_up:
{
#    ---- should we consider automatic training?	
	match [:automatic_training:] /yes/
	# bounce out if we've already auto-trained this email
	match <absent> /AUTO-TRAINED/
	isolate (:msghash:)
	hash (:msghash:) /:*:_dw:/
	#        pick one in 16- here, if the second-to-last digit is a 0
	match [:msghash:] /......0./
	#
	# out put autotraining...
	#       Yep... we should use this for autotraining
	#       do we auto-train on acceptance? 
	{ 
		match [:classifier_reason:] /ACCEPT/
		#   it wasn't spam... autotrain it "nonspam"
		output [:*:fileprefix:nonspamtext.txt] <append> /:*:text:/
		learn <microgroom>  (:*:fileprefix::*:nonspamcss:) [:m_text:]  /:*:lcr:/
		goto /:autotrain_finish:/
	}

	#      or do we autotran on rejection       
	{ 
		match [:classifier_reason:] /REJECT/
		#   it was spam... autotrain it "spam"
		output [:*:fileprefix:spamtext.txt] <append> /:*:text:/
		learn <microgroom> (:*:fileprefix::*:spamcss:) [:m_text:] /:*:lcr:/
		goto /:autotrain_finish:/
	}
	:autotrain_finish:
	{
           {
		match [:autotrain_address:] /../
		syscall (:*:classifier_reason: :*:_nl: :*:_dw:) /mail -s "AUTO-TRAINED email - please check" :*:autotrain_address:/
	   }
	   alius
	   {
	        # there was no autotrain address, so we just accept it.
		match (:subj:) /Subject:/
		alter (:subj:) /Subject:  AUTO_TRAINED- please check! .../
		accept
	   }
	}
}
:exit_here:
exit /:*:our_exit_code:/

trap (:broken_program_message:) /.*/
{
	accept
	output /:*:_nl: Aw, crud.  Mailfilter.crm broke.  Here's the error: :*:_nl:/
	output /:*:broken_program_message:/
}
exit /:*:program_fault_exit_code:/