#! @BIN_PATH@/crm -( learnspam learnnonspam learnfile stats_only config spamcss nonspamcss fileprefix force unlearn ) # # a whitelist / blacklist / command based mail sorter # # Copyright (C) 2002-2004 William S. Yerazunis; licensed under the # GNU Public License (GPL) version 2. A copy of this license is included # in the distribution media, or obtain one from www.fsf.org . # # Note to BSD users - you MUST remove EVERYTHING on the first line of this # program from the first "-" to the end or you will not get what you # expect. This is due to a bug in the BASH code on BSD. # # --->>> Design Philosophy ( do these IN ORDER ) # # * if --fileprefix is specified, all filenames EXCEPT --config # are prefixed with that (You need a trailing slash on the prefix # if it is a directory name.) # * if --config , grab the config file from the specified place. # * Load the mailfilter.cf config file from wherever config or # fileprefix points (--config overrides --fileprefix). # * If --spamcss is specified, use that as the spam.css file # * If --nonspamcss is specified, use that as the nonspam.css file # * If --learnspam, learn as spam and exit. # * If --learnnonspam, learn as nonspam and exit # * If --force, force-feed the learning # * If --learnfile, use :learnfile:.css and :learnfile:text.txt # * If --stats_only, do normal classification but don't do any # forwarding, only output the status on stdout and return # the exit code. # # * check for the "command word", if present, execute the command # # * check to see if any of the whitelist patterns apply. If so, # accept the mail to /var/spool/the_user (which is actually to # be found at /var/spool/mail/:*:_env_USER: # # * check to see if any of the blacklist patterns apply. If so, # flush the mail to the "blacklisted" file. # # * check to see if it's commanded to be learned as a spam or a # nonspam model. If so, learn it to the appropriate .css (Crm # Sparse Spectra) file # # * run the email through the classifier. If the classifier thinks # it's good, send it to the /var/spool/mail/the_user file, else # send it to the "doubtful" file. # ############################################################## # # --- uncomment this if you want to include a "forced" # configuration file --- # insert mailfilterconfig.crm # # # --- make a safe space to keep the results of our work --- # isolate (:classifier_reason:) /no reason yet/ # isolate (:our_exit_code:) /0/ # isolate (:stats:) / pR: 0.000000 / # isolate (:pr:) / pR: 0.00000/ # isolate (:subj_text:) / (None) / # isolate (:add_extra_stuff:) // # # Isolate these and give them values, in case the user doesn't. isolate (:reject_address:) // isolate (:fail_priority_mail_to:) // isolate (:fail_blacklist_mail_to:) // isolate (:fail_SSM_mail_to:) // isolate (:log_rejections:) // # # this ISOLATE will gaurantee that :fileprefix: exists, and keep it's # prior (commandline) value if it does, and an empty string if it doesnt isolate (:fileprefix:) # # This ISOLATE will gaurantee that :force: will exist, and keep the # commandline value ("SET") , or the null string if the user doesn't # use --force on the command line. isolate (:force:) # # This ISOLATE will gaurantee that :unlearn: will exist, and will keep # the commandline value ("SET") or the null string if the user doesn't # use --unlearn on the command line. isolate (:unlearn:) # # now, :clf: is the classify & learn flags; note that we have two # separate flags here in a bizarre chain. The reason is that :unlearn: # can have the value "SET", whereas :rft: needs "refute" isolate (:clf:) // # # This is the code to read the per-user configuration. Note # that because this happens during the run, it will _override_ # any comand line arguments that get set. { isolate (:option_txt:) isolate (:ev:) isolate (:verbose_startup:) # read in the mail filter configuration file. isolate (:config:) { { match [:config:] /.+/ input [:*:config:] (:option_txt:) } alius { # read in the standard mail filter configuration file. input [:*:fileprefix:mailfilter.cf] (:option_txt:) } } # output /options == :*:option_txt:\n/ # reset loop for matching to start of :option_txt: match [:option_txt:] // # and loop till there are no more options. { # find a line that looks like a parameter setting... match < fromend nomultiline > (:line: :name: :value:) \ [:option_txt:] /^[ ]*(:[[:graph:]]+:)[ \t]+\/(.*)\// # [:option_txt:] /^([[:graph:]]+).*\/(.*)\/.*$/ { # don't execute the assign if there's a # at the start of the line. match [:name:] /^\x23/ { # debugging print match [:verbose_startup:] /SET/ output / :*:name:\n :*:value:\n/ } isolate (:*:name:) /:*:value:/ } liaf } } # # Now, a wierdness - we need to add "unlearn" to the :clf: # if it was in the line params - but append, not replace. # { match [:unlearn:] /SET/ alter (:clf:) /:*:clf: refute/ } # # # Do a quick check- has the password been changed or not? If it's # still the default, put in something that will be well-nigh unguessable # (esp. since it will contain recieved headers that the sender cannot # see nor control.) { match [:spw:] /DEFAULT-PASSWORD/ # yes, it's the same as default. So we scramble it just so # nobody can hack in hash (:spw:) /:*:_env_string::*:_dw:/ } # isolate (:reject_address:) /:*:general_fails_to:/ { match [:fail_priority_mail_to:] /[[:graph:]]/ alter (:fail_priority_mail_to:) /:*:general_fails_to:/ } { match [:fail_blacklist_mail_to:] /[[:graph:]]/ alter (:fail_blacklist_mail_to:) /:*:general_fails_to:/ } { match [:fail_SSM_mail_to:] /[[:graph:]]/ alter (:fail_SSM_mail_to:) /:*:general_fails_to:/ } # # Does the user want us to log all incoming mail? This is handy for # testing and auditing purposes. { match [:log_to_allmail.txt:] /yes/ output [:*:fileprefix:allmail.txt] /:*:_dw:/ } # # m_text is "mutilated text" - the result of all our # machinations and hackages. It will become an annotated _copy_ # of the incoming text, with mutilations. # isolate (:m_text:) // isolate (:b_text:) /:*:_dw:/ isolate (:i_text:) /:*:_dw:/ # # # To start with, the commanded text is assumed to be the entire input. # THEN # If there's a command followed by text, we save the text so we can # put that, and _only_ that, into the .txt corpi. { isolate (:cmd_txt:) /:*:_dw:/ match (:z: :cmd_txt:) [:_dw:] /command :*:spw: [^\n]*\n(.*)/ } # # # do we do any expansions? { # expansion 1: - do we perform base64 expansions? { { match [:do_base64:] /yes/ { # yes, expand base64's if there are any # # Note: some spams don't even bother to use # a 'Content-Transfer-Encoding' marker, # and even fewer use Content-Type: text/whatever # so we have to sort of wing it, when to expand # what _might_ be base64 and when to ignore it. # For now, if it says it's a base64, it gets # expanded, no matter what the type. Maybe # someday someone will put in a lockout for # things like .jpg files, .doc files, etc. # isolate (:exp_text:) match [:b_text:] (:a: :h: :b:) /(Content-Transfer-Encoding): base64(.*)/ match (:c:) [:b:] /([a-zA-Z0-9+=!\/]+:*:_nl:){2,200}/ # syscall (:*:c:) (:exp_text:) /:*:mime_decoder: / # and stuff the result back into b_text for # classification right in context. alter (:c:) /:*:exp_text:/ # and mark this piece of mime as "prior". alter (:h:) /Content-Transfer-Prior-Encoding/ # repeat till no more Mime base64 encodings liaf } } alius { # if no base64 expansions enabled, empty out :b_text: # alter (:b_text:) // } } # # If we had expansions, bust the html contents out of them, otherwise # ignore b_text as it's redundant { { match [:b_text:] /Content-Transfer-Prior-Encoding/ alter (:i_text:) /:*:b_text:/ } alius { # if :b_text: _didn't_ have a base64, it's useless alter (:b_text:) // } } # expansion 2 : do we bust HTML comments ( a.k.a. # hypertextus interruptus) out? { match [:undo_interruptus:] /yes/ isolate (:commentbin:) // { match [:i_text:] (:comment:) // alter (:commentbin:) /:*:commentbin: :*:comment:/ alter (:comment:) // liaf } # if we had at least 80 characters worth of comments, then # it's worth using the decommented text, else not. # (this my personal judgement call) { { match [:commentbin:] /(.){80,}/ } alius { alter (:i_text:) // } } } } # and reassemble the mucked-over text into the :m_text: var, always # with the base64's expanded, then a second decommented copy # { alter (:m_text:) /:*:_dw: :*:_nl: :*:b_text: :*:_nl: :*:i_text: :*:_nl:/ } # # Do we want to do any rewrites before running? # { match [:rewrites_enabled:] /yes/ isolate (:rewrites:) input (:rewrites:) [:*:fileprefix:rewrites.mfp] # reset matching on rewrites to start of string - if no string, no more # processing of rewrites !! match [:rewrites:] // # # { # Grab the next regex; turn the one-per-line patterns into a # regex and a replacement string. # First, do the line-spanning regexes. match (:ch: :fr: :to:) [:rewrites:] /(.+)>-->(.*)/ # see if the "fr" regex matches anywhere { match [:m_text:] (:place:) /:*:fr:/ # Yep, it matched... alter it and do it again # alter (:place:) /:*:to:/ liaf } # Nope, didn't match... grab the next regex and try again, liaf } # # reset back to the start of the rewrites. # match [:rewrites:] // # # and do it again for non-line-spanners { # Go through and do it again, except this time do it for # the non-line-spanning regexes. match (:ch: :fr: :to:) [:rewrites:] /(.+)>->(.*)/ # see if the "fr" regex matches anywhere { match [:m_text:] (:place:) /:*:fr:/ # Yep, it matched... alter it and do it again # alter (:place:) /:*:to:/ liaf } # Nope, didn't match... grab the next regex and try again, liaf } } # done with rewrites. # # # -------------do we have a --learnspam or --learnnonspam command line key? # match (:text:) [:m_text:] /.*/ isolate (:c:) // isolate (:learnspam: :learnnonspam: :learnfile: :stats_only:) isolate ( :spamcss: :nonspamcss: ) { match [:spamcss:] /./ alter (:spamcss:) /spam.css/ } { match [:nonspamcss:] /./ alter (:nonspamcss:) /nonspam.css/ } { match [:learnspam:] /SET/ goto /:learnspamhere:/ } { match [:learnnonspam:] /SET/ goto /:learnnonspamhere:/ } { match (:trash: :file:) [:learnfile:] /(.+)/ goto /:learntofilehere:/ } # # #------------ Are we enabled for "inoculations" via email? # { match [:inoculations_enabled:] /yes/ # # see if we have an inoculation header. # match [:m_text:] \ /Inoculation-Sender: ([[:graph:]]+)/ (:x: :sender:) match [:m_text:] \ /Inoculation-Type: ([[:graph:]]+)/ (:x: :type:) match [:m_text:] \ /Inoculation-Authentication: (.*)$/ (:x: :auth:) # # See if the sender is in our list, and if so, what is their secret? # isolate (:inoculation_passwd:) // input (:inoculation_passwd:) [:*:fileprefix:inoc_passwd.txt] match [:inoculation_passwd:] \ /:*:sender: :*:type: (.*)/ (:x: :secret:) # # We now have the shared secret, calculate the checksum we should have # # grab the body... match /\n\n(.*)/ (:x: :body:) # # and calculate the hash. isolate (:md5out:) syscall (:*:secret::*:_nl::*:body:) (:md5out:) /md5sum/ match [:md5out:] /([[:graph:]]+)/ (:x: :md5out:) # # does this hash match with the given hash? match [:auth:] /:*:md5out:/ # # Yes, it matched. It's a real innoculation. # # grab the text we want to actually learn (this is the payload) match [:m_text:] (:x: :text:) /\n\n(.*)/ # # and learn it appropriately { match [:type:] /nonspam/ goto /:learnnonspamhere:/ } alius { match [:type:] /spam/ goto /:learnspamhere:/ } } # # -------------check for the COMMAND WORD ---------- # { # # grab the command word as :c:, password as :pw:, and any arg(s) as :a: # match (:z: :pw: :c: ) [:_dw:] /^command ([[:graph:]]+) (.*)/ # # check the password. If it's invalid, FAIL out of this bracket set # and just treat this as ordinary (non-command) mail. match [:pw:] /:*:spw:/ { # was it a command to add something to the whitelist? match (:q: :a:) [:c:] /whitelist (.*)/ output [:*:fileprefix:whitelist.mfp] /:*:a::*:_nl:/ alter (:z:) /*** :*:z: *** :*:_nl:Whitelist command executed! :*:_nl:/ accept exit /:*:accepted_mail_exit_code:/ } { # was it a command to add something to the blacklist? match (:q: :a:) [:c:] /blacklist (.*)/ output [:*:fileprefix:blacklist.mfp] /:*:a::*:_nl:/ alter (:z:) /*** :*:z: *** :*:_nl:Blacklist command executed! :*:_nl:/ accept exit /:*:accepted_mail_exit_code:/ } # # Did the user specify command "force"? # { match [:c:] /force/ # # yep, so we set the "force" on. alter (:force:) /SET/ } # # Did the user specify command "unlearn"? # { match [:c:] /unlearn/ # # yep, so we set the "force" on. alter (:unlearn:) /SET/ } # # Now, if :unlearn: is set, by either bashline or command, we # set the :clf: flag to be "refute". Otherwise, we set it to # be what it was before. # { match [:unlearn:] /SET/ alter (:clf:) /:*:clf: refute/ } # # Now, the big mahonka. Learn as nonspam, or as spam # (note the three subpaths - one each for non-forced, forced, and # non-forced error messages) # { # was it a command to learn something as nonspam? match [:c:] /nonspam/ match (:z: :text:) [:m_text:] /:*:_nl:command [[:graph:]]+ nonspam(.*)/ # and learn it as nonspam :learnnonspamhere: { # # Verify that we need to learn this first (TOE strategy) classify <:*:clf:> (:*:fileprefix::*:spamcss: | :*:fileprefix::*:nonspamcss: ) [:text:] /:*:lcr:/ output [:*:fileprefix:nonspamtext.txt] /\n\n:*:cmd_txt:\n/ # # write out the pre-mutilation text, with newlines # learn <:*:clf:> (:*:fileprefix::*:nonspamcss:) [:text:] /:*:lcr:/ syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Action: LEARNED NONSPAM :*:clf:"/ accept exit /:*:accepted_mail_exit_code:/ } alius { # # Did the user specify "--force" on the command line? match [:force:] /SET/ output [:*:fileprefix:nonspamtext.txt] /\n\n:*:cmd_txt:\n/ # # write out the pre-mutilation text, with newlines # learn < :*:clf: > (:*:fileprefix::*:nonspamcss:) [:text:] /:*:lcr:/ syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Action: LEARNED NONSPAM (FORCED) :*:clf:"/ accept exit /:*:accepted_mail_exit_code:/ } alius { syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Action: LEARN AS NONSPAM UNNECESSARY - ALREADY CLASSIFIED CORRECTLY - NO ACTION TAKEN"/ accept exit /:*:accepted_mail_exit_code:/ } } { # was it a command to learn something as spam? match [:c:] /spam/ match (:z: :text:) [:m_text:] /:*:_nl:command [[:graph:]]+ spam(.*)/ # and learn it as spam :learnspamhere: { # # Verify that we need to learn this first (TOE strategy) classify <:*:clf:> (:*:fileprefix::*:nonspamcss: | :*:fileprefix::*:spamcss: ) [:text:] /:*:lcr:/ # # write out the pre-mutilation text, with newlines # output [:*:fileprefix:spamtext.txt] /\n\n:*:cmd_txt: \n/ learn < :*:clf:> (:*:fileprefix::*:spamcss:) [:text:] /:*:lcr:/ syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Action: LEARNED SPAM :*:clf:"/ accept exit /:*:accepted_mail_exit_code:/ } alius { # Did the user specify "--force" on the command line? match [:force:] /SET/ # # write out the pre-mutilation text, with newlines # output [:*:fileprefix:spamtext.txt] /\n\n:*:cmd_txt: \n/ learn < :*:clf:> (:*:fileprefix::*:spamcss:) [:text:] /:*:lcr:/ syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Action: LEARNED SPAM (FORCED) :*:clf:"/ accept exit /:*:accepted_mail_exit_code:/ } alius { syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Action: LEARN AS SPAM UNNECESSARY - ALREADY CLASSIFIED CORRECTLY - NO ACTION TAKEN"/ accept exit /:*:accepted_mail_exit_code:/ } } { # was it a command to learn something as an arbitrary type? # Note: the files this generates don't get used for anything unless # you use --spamcss and --nonspamcss in your own scripts. # # Note: these "learns" are a-priori "force", since we don't know # what other .css files we should compare this text to. # match [:c:] /learn/ match (:z: :learnfile: :text:) [:m_text:] /:*:_nl:command [[:graph:]]+ learn ([[:graph:]]+)(.*)/ # and learn it :learntofilehere: output [:*:fileprefix::*:learnfile:text.txt] /:*:text:/ learn < :*:clf:> (:*:fileprefix::*:learnfile:.css) [:text:] /:*:lcr:/ syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Action: LEARNED :*:file: :*:clf:"/ accept exit /:*:accepted_mail_exit_code:/ } } # # # George's Circuit Breaker - if the mail has already been processed # by CRM114, then send it directly to output, without further # processing. # # WE DON'T RISK THIS ANY MORE - WITH ~ A MILLION USERS, WE'RE NOW # A TARGET FOR SPAMMERS TO USE THIS HACK. #{ # match /X-CRM114/ # alter (:classifier_reason:) / This mail seems to have already been processed by CRM114. / # alter (:stats:) / pR: 999.99 / # goto /:looks_good:/ #} # none of the above - classify this incoming mail instead. # first according to priority action list, # then according to whitelist, # then according to blacklist, # then according to the CRM sparse spectral classifier. # # check it against the priority action list- this list is # of the form of a + or -, then a pattern. + means accept, # - means reject. These are executed in order (which is # different from whitelist or blacklist in that they occur # in order given, not whitelist-then-blacklist. The priority # action list is tried before whitelist or blacklist. # isolate (:priolist:) input (:priolist:) [:*:fileprefix:priolist.mfp] # reset matching on :priolist: to the start of the string match [:priolist:] // # # { #... Grab the next regexturn the one-per-line patterns into a regex match (:w: :pm: :pat:) [:priolist:] /(.)(.+)/ #... see if this regex matches the incoming mail { match (:reason:) /:*:pat:/ # Yep, it matched... branch based on pm # { match [:pm:] /[+]/ # put in a little tag saying why prio-listed alter (:classifier_reason:) /** ACCEPT: CRM114 Priority Whitelisted by: :*:reason: **:*:_nl:/ alter (:stats:) / pR: 999.99 / goto /:looks_good:/ } # No, we didn't have a +, was it a '-'? { match [:pm:] /[-]/ alter (:classifier_reason:) /** REJECT: CRM114 Priority Blacklisted by: :*:reason: **:*:_nl:/ alter (:reject_address:) /:*:fail_priority_mail_to:/ { match [:log_rejections:] /yes/ output [:*:fileprefix:rejected_by_blacklist.txt] /:*:_dw:/ } alter (:stats:) / pR: -999.99 / goto /:looks_bad:/ } } # Nope, didn't match as a priority... grab the next regex liaf } # # # check it against the whitelist... load the whitelist... { isolate (:whitelist:) input (:whitelist:) [:*:fileprefix:whitelist.mfp] # reset matching on whitelist to start of string match [:whitelist:] // } # # { #... Grab the next regexturn the one-per-line patterns into a regex match (:waste: :whregex:) [:whitelist:] /(.+)/ #... see if this regex matches the incoming mail { match (:reason:) /:*:whregex:/ # Yep, it matched... whitelist this email # # put in a little tag saying why whitelisted: alter (:classifier_reason:) /** ACCEPT: CRM114 Whitelisted by: :*:reason: **:*:_nl:/ alter (:_dw:) /:*:_dw: ** CRM114 Whitelisted by: :*:reason: **:*:_nl:/ alter (:stats:) / pR: 999.99 / goto /:looks_good:/ } # Nope, didn't match... grab the next regex and try again, liaf } # # No joy, maybe we should blacklist it. # # check it against the blacklist { isolate (:blacklist:) input (:blacklist:) [:*:fileprefix:blacklist.mfp] # reset matching on blacklist to start of string match [:blacklist:] // } # { #... Grab the next regexturn the one-per-line patterns into a regex match (:waste: :blregex:) [:blacklist:] /(.+)/ #... see if this regex matches the incoming mail { match (:reason:) /:*:blregex:/ # Yep, it matched... blacklist this email # # put in a little tag saying why blacklisted alter (:classifier_reason:) /** REJECT: CRM114 Blacklisted by: :*:reason: ** :*:_nl:/ alter (:reject_address:) /:*:fail_blacklist_mail_to:/ { match [:log_rejections:] /yes/ output [:*:fileprefix:rejected_by_blacklist.txt] /:*:_dw:/ } alter (:stats:) / pR: -999.99 / goto /:looks_bad:/ } # Nope, didn't match... grab the next regex and try again liaf } # # # # End of blacklist processing. # # # All else has failed- we now run our CLASSIFY algorithm # to make our best guess. # # { # Run the CSS classifier against the "expanded" text - # if it classifies as SPAM # then reject it as SPAM. # { classify <:*:clf:> ( :*:fileprefix::*:nonspamcss: | :*:fileprefix::*:spamcss: ) ( :stats: ) [:m_text:] /:*:lcr:/ alter (:classifier_reason:) /** ACCEPT: CRM114 PASS Markovian Matcher ** :*:_nl::*:stats:/ goto /:looks_good:/ } alter (:classifier_reason:) /** REJECT: CRM114 FAIL Markovian Matcher ** :*:_nl::*:stats:/ alter (:reject_address:) /:*:fail_SSM_mail_to:/ { match [:log_rejections:] /yes/ output [:*:fileprefix:rejected_by_css.txt] /:*:_dw:/ } goto /:looks_bad:/ } # # # Final wrap-up routines - dispose of the mail as appropriate. # { :looks_bad: # is this a :stats_only: run (i.e. for CAMRAM) { match [:stats_only:] /SET/ match [:stats:] (:d: :pval:) /pR: (.*)/ output /:*:pval: :*:_nl:/ alter (:our_exit_code:) /:*:rejected_mail_exit_code:/ goto /:finish_up:/ } # not stats_only.... we're doing major output. { { match [:add_headers:] /yes/ { match [:stats:] (:pr:) /pR: .*$/ } syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Version: :*:_crm_version: MF-:*:_pgm_hash:"/ syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Status: SPAM ( :*:pr: )"/ } # # Now, get the Subject: line. If none, make one. { { match (:subject_line: :subj_text:) \ /^Subject: (.*)/ } alius { match (:end_of_headers:) /\n\n/ alter (:end_of_headers:) /\nSubject: (none)\n\n/ match (:subject_line: :subj_text:) /^Subject: (.*)/ } } { # # If we are re-sending this, we want to de-fang the # subject, otherwise we don't. match [:reject_address:] /[a-zA-Z0-9]/ # Paolo P. suggests this alteration to avoid subversion # by enclosing an alternate target in "marks". We always # have to do this. { match (:dq:) [:subj_text:] /\$/ alter (:dq:) /USD/ liaf } { match (:dq:) [:subj_text:] /[^-a-zA-Z0-9!., ]/ alter (:dq:) // liaf } # # We isolate subj_text here, so if later syscalls move # things, the subject text used in "mail" is still OK. isolate (:subj_text:) } # # If the user asked for a spam-flagging string, put the flagging # string into the subject. # { match [:spam_flag_subject_string:] /./ alter (:subj_text:) \ /:*:spam_flag_subject_string: :*:subj_text:/ } { match [:add_extra_stuff:] /text/ # get rid of any first-column 'From's as they are message breaks! # this isn't necessary if we're mailing to someplace else... { match (:f:) [:m_text:] /^From/ alter (:f:) />:*:f:/ liaf } alter (:_dw:) /:*:_dw:-=-Extra Stuff-=-\n\n:*:m_text: -0-0-0- :*:_nl:/ } { match [:add_extra_stuff:] /attachment/ # get rid of any first-column 'From's as they are message breaks! # this isn't necessary if we're mailing to someplace else... { match (:f:) [:m_text:] /^From/ alter (:f:) / :*:f:/ liaf } isolate (:content_type:) // syscall (:*:_dw:) (:content_type:) /formail -X "Content-Type"/ isolate (:content_transfer_encoding:) // syscall (:*:_dw:) (:content_transfer_encoding:) /formail -X "Content-Transfer-Encoding"/ syscall (:*:_dw:) (:_dw:) /formail -A "MIME-Version: 1.0"/ syscall (:*:_dw:) (:_dw:) /formail -A "Content-Type: multipart\/mixed\; boundary=Attachment_Quote_Boundary_1234567890\n--Attachment_Quote_Boundary_1234567890\n:*:content_type::*:content_transfer_encoding:"/ syscall (:*:_dw:) (:_dw:) /formail -U "Content-Type"/ alter (:_dw:) /:*:_dw::*:_nl:\ --Attachment_Quote_Boundary_1234567890 :*:_nl:\ Content-Type: text\/plain :*:_nl:\ Content-Transfer-Encoding: quoted-printable \n\n\n:*:m_text:\ \n--Attachment_Quote_Boundary_1234567890--\n/ } # # Make sure the mail is well-formed syscall (:*:_dw:) (:_dw:) /formail/ # # Decide if we forward or if we just output it. { { # if this match succeeds, we should forward-to-an-address? # Yes, but only if we _have_ a forward-to address. match [:reject_address:] /[a-zA-Z0-9]/ { # -- put the classifier reason in as the first thing! match [:add_verbose_stats:] /yes/ alter (:_dw:) /:*:_nl: :*:classifier_reason::*:_nl: :*:_dw: / } syscall (:*:_dw:) /mail :*:reject_address: -s ':*:subj_text:'/ } alius { { # -- put the classifier reason in at the end of the headers match [:add_verbose_stats:] /yes/ match (:start_of_data:) /\n\n/ alter (:start_of_data:) /\n\n :*:classifier_reason: \n / } accept } } } alter (:our_exit_code:) /:*:rejected_mail_exit_code:/ goto /:finish_up:/ } # # and here's where we accept something as good email. { :looks_good: # is this a :stats_only: run (i.e. for CAMRAM) { match [:stats_only:] /SET/ match [:stats:] (:d: :pval:) /pR: (.*)/ output /:*:pval: :*:_nl:/ alter (:our_exit_code:) /:*:accepted_mail_exit_code:/ goto /:finish_up:/ } # Not stats-only; do the full output thing. { match [:add_verbose_stats:] /yes/ alter (:_dw:) /:*:_dw: :*:_nl: :*:classifier_reason: :*:_nl:/ } { match [:add_headers:] /yes/ { match [:stats:] (:pr:) /pR: .*$/ } syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Version: :*:_crm_version: MF-:*:_pgm_hash: "/ syscall (:*:_dw:) (:_dw:) /formail -A "X-CRM114-Status: Good ( :*:pr: )"/ } { match [:add_extra_stuff:] /text/ # get rid of any first-column 'From's as they are message breaks! # this isn't necessary if we're mailing to someplace else... { match (:f:) [:m_text:] /^From/ alter (:f:) / :*:f:/ liaf } alter (:_dw:) /:*:_dw:-=-Extra Stuff-=-\n\n :*:m_text: -0-0-0- \n/ } { match [:add_extra_stuff:] /attachment/ # get rid of any first-column 'From's as they are message breaks! # this isn't necessary if we're mailing to someplace else... { match (:f:) [:m_text:] /^From/ alter (:f:) / :*:f:/ liaf } isolate (:content_type:) // syscall (:*:_dw:) (:content_type:) /formail -X "Content-Type"/ isolate (:content_transfer_encoding:) // syscall (:*:_dw:) (:content_transfer_encoding:) /formail -X "Content-Transfer-Encoding"/ syscall (:*:_dw:) (:_dw:) /formail -A "MIME-Version: 1.0"/ syscall (:*:_dw:) (:_dw:) /formail -A "Content-Type: multipart\/mixed\; boundary=Attachment_Quote_Boundary_1234567890\n--Attachment_Quote_Boundary_1234567890\n:*:content_type::*:content_transfer_encoding:"/ syscall (:*:_dw:) (:_dw:) /formail -U "Content-Type"/ alter (:_dw:) /:*:_dw::*:_nl:\ --Attachment_Quote_Boundary_1234567890 :*:_nl:\ Content-Type: text\/plain :*:_nl:\ Content-Transfer-Encoding: quoted-printable \n\n\n:*:m_text:\ \n--Attachment_Quote_Boundary_1234567890--\n/ } accept alter (:our_exit_code:) /:*:accepted_mail_exit_code:/ goto /:finish_up:/ } # # Here's where we finish up processing in all the paths. # :finish_up: { # ---- should we consider automatic training? match [:automatic_training:] /yes/ # bounce out if we've already auto-trained this email match /AUTO-TRAINED/ isolate (:msghash:) hash (:msghash:) /:*:_dw:/ # pick one in 16- here, if the second-to-last digit is a 0 match [:msghash:] /......0./ # # out put autotraining... # Yep... we should use this for autotraining # do we auto-train on acceptance? { match [:classifier_reason:] /ACCEPT/ # it wasn't spam... autotrain it "nonspam" output [:*:fileprefix:nonspamtext.txt] /:*:text:/ learn (:*:fileprefix::*:nonspamcss:) [:m_text:] /:*:lcr:/ goto /:autotrain_finish:/ } # or do we autotran on rejection { match [:classifier_reason:] /REJECT/ # it was spam... autotrain it "spam" output [:*:fileprefix:spamtext.txt] /:*:text:/ learn (:*:fileprefix::*:spamcss:) [:m_text:] /:*:lcr:/ goto /:autotrain_finish:/ } :autotrain_finish: { { match [:autotrain_address:] /../ syscall (:*:classifier_reason: :*:_nl: :*:_dw:) /mail -s "AUTO-TRAINED email - please check" :*:autotrain_address:/ } alius { # there was no autotrain address, so we just accept it. match (:subj:) /Subject:/ alter (:subj:) /Subject: AUTO_TRAINED- please check! .../ accept } } } :exit_here: exit /:*:our_exit_code:/ trap (:broken_program_message:) /.*/ { accept output /:*:_nl: Aw, crud. Mailfilter.crm broke. Here's the error: :*:_nl:/ output /:*:broken_program_message:/ } exit /:*:program_fault_exit_code:/