#!/usr/bin/perl # bogofilter-milter.pl - a Sendmail::Milter Perl script for filtering # mail using individual users' bogofilter databases. # # (additional information below the coypright statement) # Copyright 2003, 2005, 2007, 2008, 2010 Jonathan Kamens # . Please send me bug reports, # suggestions, criticisms, compliments, or any other feedback you have # about this script! # # The current version of this script and extensive additional # documentation are available from # . # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # You will need the following non-standard Perl modules installed to # use this script: Sendmail::Milter, Mail::Alias, Proc::Daemon, # IO::Stringy, Socket, Net::CIDR. Before using this script, search # for CONFIGURABLE SETTINGS and configure them appropriately for your # site. # # Inserts "X-Bogosity: Spam, tests=bogofilter" into messages that # appear to be spam (or "Ham" or "Unsure" into ones that don't). If # the message is rejected, you usually won't see the "Spam", but see # below about training mode. # # Save this script somewhere, launch it as root (by running it in the # background or invoking it with "--daemon" in which case it will # background itself), and reconfigure your sendmail installation to # call it as an external filter (probably by calling INPUT_MAIL_FILTER # in your sendmail.mc file). Running this script as root should be # safe because it changes its effective UID and GID whenever # performing operations on individual users' files (if you find a # security problem, please let me know!). # # NOTE: You will want to take steps to ensure that this script is # started before sendmail whenever your machine boots, e.g., by # creating an appropriate script in /etc/rc.d/init.d with appropriate # links to it in /etc/rc.d/rc?.d, because once you configure sendmail # to talk to a particular milter, it may refuse to deliver email if # that milter isn't running when the email comes in. # # For additional information about libmilter and integrating this or # any other libmilter filter into your sendmail installation, see the # file README.libmilter that ships with sendmail and/or the section # entitled "ADDING NEW MAIL FILTERS" in the README file that ships # with the M4 sendmail CF templates. # # You may need to restart this script to get it to notice changes in # mail aliases. # This script logs various informational, warning and error messages # to the "mail" facility. # Thanks to Tom Anderson # for the IP whitelisting changes and for several other useful # suggestions and bug fixes. # BEGIN CONFIGURABLE SETTINGS # If this string appears in the Subject of a message (case # insensitive), the message won't be filtered. my $magic_string = '[no-bogofilter]'; # Set the syslog facility you wish to log messages to. my $log_facility = 'LOG_MAIL'; # These settings control exactly what error sendmail sends back to the # sender if a message is rejected. You can leave them as-is, or # customize them as desired. my $rcode = 550; # three-digit RFC 821 SMTP reply my $xcode = "5.7.1"; # extended RFC 2034 reply code my $reject_message = "Your message looks like spam.\n" . "If it isn't, resend it with $magic_string " . "in the Subject line."; # Whitelist any IP addresses or ranges from this filter. # For example: #my(@whitelist) = ("127.0.0.1", "10.127.0.1-10.127.0.9", "192.168.0.0/16"); my(@ip_whitelist) = (); # If you want to whitelist any addresses which have authenticated # via poprelayd (i.e. remote workstations of users on your server) # set $dbfile to your popip.db location, else set it to undef. # For example: #my $ip_whitelist_db = "/etc/mail/popip.db"; my $ip_whitelist_db = undef; # The largest message to keep in memory rather than writing to a # temporary file. my $MAX_INCORE_MSG_LENGTH = 1000000; my $pid_file = '/var/run/bogofilter-milter.pid'; # Whatever path you specify for $socket needs to match the socket # specified in the sendmail.cf file (with "local:" in front of it # there, but not here). my $socket = '/var/run/bogofilter-milter.sock'; # The following two settings give more granular control over whether # bogofilter is used for any particular user and what configuration # settings are used when it is. # - If $bogofilter_cf is set, then the script will look for a file # with that name in the user's home directory. If it finds it, then # bogofilter will be called with "-c $HOME/$bogofilter_cf" so that the # specified configuration file is used rather than the default, # .bogofilter.cf. # - If $require_cf is true, then the specified configuration file # *must* exist for bogofilter to be used for this user. In other # words, rather than only looking for the .bogofilter subdirectory of # the user's home directory, the script will look for both the # .bogofilter subdirectory *and* the config file. # - Note that $require_cf is ignored if $bogofilter_cf is unset. my $bogofilter_cf = undef; my $require_cf = undef; # If you would like the milter to add a unique ID to the X-Bogosity # line, then set this variable to true. ", milter_id=..." will be # added to the end of the X-Bogosity line. my $add_unique_id = 1; # If a file with this name exists in the user's .bogofilter directory, # then it is assumed to contain regular expressions, one per line, to # match against Subject lines in incoming messages (lines containing # only whitespace and lines starting with "#" are ignored). Any # message whose Subject line matches one of the regular expressions # will not be filtered, just as if $magic_string (see above) had # appeared in its Subject line. my $subject_filter_file = 'milter-subject-filters'; # If an executable file or link with this name exists in the user's # .bogofilter directory, and it is owned by the user or root (for # security reasons), then it will be used as a filter, i.e., the # message will be fed into it and replaced with its output, before # bogofilter is run on it, if it returns a zero exit status. # Furthermore, the filtered message is what will be put into the # $archive_mbox and $ham_archive_mbox files. However, the actual # message delivered by the MTA if the milter accepts it will be the # unfiltered version, not the filtered version. You could use this, # e.g., to reformat incoming email with a script that calls # spamitariuim.pl (in bogofilter contrib directory) before filtering # it. # # The following environment variables are available to the script when # it is executed: # # MILTER_REMOTE_IP IP address of remote SMTP server # MILTER_REMOTE_NAME Host name of remote SMTP server as per a # reverse DNS lookup on its IP address # MILTER_LOCAL_IP IP address of SMTP server receiving the # message # MILTER_LOCAL_NAME Host name of SMTP server receiving the message # MILTER_HELOHOST Host name specified by the remote server in # its HELO or EHLO command # MILTER_ENVFROM The envelope address of the sender of the # message, a.k.a., the Return-Path # MILTER_ENVRCPT The envelope address of the recipient of the # message for whom bogofilter is being invoked. # # If you want to disable this functionality, set the variable to # undef. my $filter_script = 'milter-filter-script'; # If a file with this name exists in the user's .bogofilter directory, # then that user's mail will be filtered in training mode. This means # that the message will be filtered and registered as spam or non-spam # and the appropriate X-Bogosity header will be inserted, but it'll be # delivered even if bogofilter thinks it's spam. This allows the user # to detect false positives or false negatives and feed them back into # bogofilter to train it. To disable this functionality set # $training_file to undef. my $training_file = 'training'; # If a file or link with this name exists in the user's .bogofilter # directory, then copies of rejected messages will be saved in this # file in mbox format, using flock locking. To disable rejected # message archiving, set $archive_mbox to undef. my $archive_mbox = 'archive'; # If a file or link with this name exists in the user's .bogofilter # directory, then copies of accepted messages (Ham or Unsure) will be # saved in this file in mbox format, using flock locking. To disable # accepted message archiving, set $ham_archive_mbox to undef. my $ham_archive_mbox = 'ham_archive'; # If $cyrus_deliver is set to an existing executable, then it is # assumed to be a Cyrus IMAP "deliver" program. If the $archive_mbox # or $ham_archive_mbox for a particular user is a symlink pointing at # a nonexistent file whose name starts with "cyrus:", then everything # after the "cyrus:" is assumed to be the name of a Cyrus IMAP folder # within the user's mailbox to which to deliver the spam message # instead of saving it into an mbox format file. my $cyrus_deliver = '/usr/lib/cyrus-imapd/deliver'; # If you would like to use a shared bogofilter database for everyone, # rather than separate per-user databases, then create a user on your # system to be used as a home for the shared database, and set # $database_user to that user's username. # # If you set $database_user, then all the logic described above for # deciding whether to run bogofilter, whether to run in training mode # or real mode, and whether to archive spam still applies, so make # sure you configure $database_user's account properly. # # If you set $database_user, then $aliases_file, $sendmail_canon, # $sendmail_prog, $recipient_cache_expire, and # $recipient_cache_check_interval do NOT apply and are ignored. my $database_user = undef; # Mail::Alias is used to expand SMTP recipient addresses into local # mailboxes to determine if any of them have bogofilter databases. If # someone sends E-mail to a mailing list or alias whose expansion # contains one or more local users with bogofilter databases, then one # of those users' database (which one in particular is not defined) # will be used to filter the message. To disable this functionality # and remove the dependency on Mail::Alias, comment out the "use # Mail::Alias;" line and set $aliases_file to undef in the # configuration section. With this functionality disabled, mail will # only be filtered if it is sent directly to a user in the passwd # file. On the other hand, with this functionality enabled, one # person's bogofilter database can cause a message to be filtered for # everyone on a local mailing list. my $aliases_file = '/etc/aliases'; # If you want the milter to ask sendmail to canonicalize recipient # addresses before trying to alias-expand them, then set # $sendmail_canon to true and $sendmail_prog to the path of the # sendmail binary to invoke. This is necessary, e.g., if you use a # virtual user table for some recipients that do sendmail filtering. # You may also wish to examine the sendmail_canon subroutine below, # because it may not be right for your particular sendmail # configuration. Search for CHECKTHIS in the function. my $sendmail_canon = 1; my $sendmail_prog = '/usr/sbin/sendmail'; # @discard_control is an array of anonymous arrays. Each sub-array # contains a pair of entries, a control pattern and an action, either # "discard" or "reject". The action corresponding to the first # matching control pattern determines what happens to the messages. # If @discard_control is empty or none of its control patterns match, # the default action is "reject". The following control patterns are # valid: # "addr:a.b.c.d" matches if the sending host has the indicated IP address # "netblock:a.b.c.d/e" matches if the sending host is in the indicated netblock # "host:fqdn" matches if the IP address of the sending host resolves # to the indicated host name # "domain:fqdn" matches if the IP address of the sending host resolves # to a host name in the indicated domain # "mx" matches if one of the MX servers for the recipient's # domain resolves to the IP address of the sending host # "*" always matches # The default @discard_control setting discards messages from MX # servers to prevent this script from contributing to spam "blowback", # which occurs when a spammer forges someone's real email address as # the return address on spam, and then that person has to deal with # tons of bounce messages from sites that reject the spam. my(@discard_control) = ( ["mx" => "discard"], ["*" => "reject"], ); # You can configure how long addresses will stay in the cache of # addresses that have been been expanded against the virtual user # table (if $sendmail_canon is set above), then expanded against the # aliases file (if $aliases_file is set above), then checked to see if # they represent users who are doing filtering. You would want cache # entries to time out if you get a lot of spam dictionary attacks # against your mail server, when the spammers try tons of invalid # addresses on the off chance that one of them might be valid, because # in that case your cache will grow without bound and the bogofilter # milter process will get really large. Set this to 0 to disable # cache expiration, or to the number of seconds after which cache # entries should expire. # # Configuration changes in the user's bogofilter directory, e.g., # changes to $subject_filter_file, aren't detected until the cache # entry for the user expires, so if you're allowing users to make # changes like that, you should probably reduce this timeout to # something smaller so that their changes will take affect somewhat # promptly. my $recipient_cache_expire = 24 * 60 * 60; # one day # How often to expire entries from the cache. my $recipient_cache_check_interval = 60 * 60; # one hour # You may wish to remove this restriction, by setting this variable to # 0, if your site gets a lot of mail, but I haven't tested the script # to make sure it functions correctly with multiple interpreters. my $milter_interpreters = 1; # END CONFIGURABLE SETTINGS require 5.008_000; # for User::pwent use strict; use warnings; use DB_File; use Data::Dumper; use English '-no_match_vars'; use Fcntl qw(:flock :seek); use File::Basename; use File::Temp qw(tempfile); use Getopt::Long; use IO::Scalar; use IPC::Open2; use Mail::Alias; use Net::CIDR; use Net::DNS; use POSIX; use Proc::Daemon; use Sendmail::Milter; use Socket; use Sys::Syslog qw(:DEFAULT :macros setlogsock); use User::pwent; $Data::Dumper::Indent = 0; # Used to cache the results of alias expansions and checks for # filtered recipients. my %cached_recipients; my $whoami = basename $0; my $usage = "Usage: $whoami [--daemon] [--debug] [--restart]\n"; my($run_as_daemon, $get_help, $debug, $restart); my %my_milter_callbacks = ( 'helo' => \&my_helo_callback, 'envfrom' => \&my_envfrom_callback, 'envrcpt' => \&my_rcpt_callback, 'header' => \&my_header_callback, 'eoh' => \&my_eoh_callback, 'body' => \&my_body_callback, 'eom' => \&my_eom_callback, 'abort' => \&my_abort_callback, 'close' => \&my_close_callback, ); $my_milter_callbacks{'connect'} = \&my_connect_callback if (@ip_whitelist || $ip_whitelist_db || @discard_control); die $usage if (! GetOptions('daemon' => \$run_as_daemon, 'debug' => \$debug, 'restart' => \$restart, 'help|h|?' => \$get_help)); if ($get_help) { print $usage; exit; } if ($run_as_daemon) { Proc::Daemon::Init; } if (! (open(PIDFILE, '+<', $pid_file) || open(PIDFILE, '+>', $pid_file))) { &die("open($pid_file): $!\n"); } seek(PIDFILE, 0, SEEK_SET); if (! flock(PIDFILE, LOCK_EX|LOCK_NB)) { &die("flock($pid_file): $!\n"); } if (! (print(PIDFILE "$$\n"))) { &die("writing to $pid_file: $!\n"); } # Flush the PID seek(PIDFILE, 0, SEEK_SET); setlogsock('unix'); openlog($whoami, 'pid', $log_facility); if (! $debug) { # I'd really like to to this, but it doesn't work with Sys::Syslog # 0.13 in Perl 5.8.8. # setlogmask(&LOG_UPTO(LOG_INFO)); eval " no warnings 'redefine'; sub debuglog { } "; } while ($restart) { my $pid = fork(); if (! defined($pid)) { &die("fork: $!"); } elsif ($pid) { $SIG{'TERM'} = sub { &syslog('info', "got SIGTERM, shutting down"); kill 'TERM', $pid; exit; }; waitpid $pid, 0; my $status = $? >> 8; &syslog('warning', "child process $pid exited (status word $?, exit status $status)"); } else { last; } } my $magic_string_re = $magic_string; $magic_string_re =~ s/(\W)/\\$1/g; # convert whitelist into CIDR notation { my(@whitelist_cidr); foreach my $IP (@ip_whitelist) { if (not eval {@whitelist_cidr = Net::CIDR::cidradd($IP, @whitelist_cidr)}) { &die("Error processing whitelist: \"$IP\" is not a valid IP ", "address or range."); } } @ip_whitelist = @whitelist_cidr; } # open popip database for reading my %ip_whitelist_db; &opendb_read if ($ip_whitelist_db); if ($database_user) { $aliases_file = $sendmail_canon = $sendmail_prog = $recipient_cache_expire = $recipient_cache_check_interval = undef; syslog("info", "Using shared bogofilter database under %s's account", $database_user); } unlink($socket); Sendmail::Milter::setconn("local:$socket"); Sendmail::Milter::register("bogofilter-milter", \%my_milter_callbacks, SMFI_CURR_ACTS); Sendmail::Milter::main($milter_interpreters); &closedb; sub my_helo_callback { my $ctx = shift; my $helo = shift; my $hash = &getpriv($ctx); $hash->{'helo'} = $helo; &setpriv($ctx, $hash); return SMFIS_CONTINUE; } sub my_envfrom_callback { my $ctx = shift; my $envfrom = shift; my $hash = &getpriv($ctx); $hash->{'envfrom'} = $envfrom; &setpriv($ctx, $hash); return SMFIS_CONTINUE; } sub my_connect_callback { my $ctx = shift; # milter context object my $hostname = shift; # The connection's host name. my $sockaddr_in = shift; # AF_INET portion of the host address, # from getpeername(2) syscall my $hash = &getpriv($ctx); my ($port, $ipaddr) = Socket::unpack_sockaddr_in($sockaddr_in) or &die("Could not unpack socket address: $!"); $ipaddr = Socket::inet_ntoa($ipaddr); # translates into standard IPv4 addr $hash->{'remotename'} = $hostname; $hash->{'remoteip'} = $ipaddr; $hash->{'localname'} = $ctx->getsymval('j'); $hash->{'localip'} = $ctx->getsymval('{if_addr}'); &debuglog("my_connect_callback: entering with hostname=$hostname, ", "ipaddr=$ipaddr, port=$port"); # check if the connecting server is listed in the whitelist if (@ip_whitelist) { if (eval {Net::CIDR::cidrlookup($ipaddr, @ip_whitelist)}) { syslog('info', '%s', "$ipaddr is whitelisted, so this email is " . "being accepted unfiltered."); &setpriv($ctx, undef); return SMFIS_ACCEPT; } else { &debuglog("$ipaddr is not in the whitelist"); } } # check if connecting server is listed in the popip database if ($ip_whitelist_db) { if ($ip_whitelist_db{$ipaddr}) { syslog('info', '%s', "$ipaddr is authenticated via poprelayd, " . "so this email is being accepted unfiltered."); &setpriv($ctx, undef); return SMFIS_ACCEPT; } else { &debuglog("$ipaddr is not in the popip database"); } } $hash->{'ipaddr'} = $ipaddr; &setpriv($ctx, $hash); &debuglog("my_connect_callback: return CONTINUE with hash"); return SMFIS_CONTINUE; } sub my_rcpt_callback { my $ctx = shift; my $envrcpt = shift; my $hash = &getpriv($ctx); &debuglog("my_rcpt_callback: entering with " . Data::Dumper->Dump([&small_hash($hash)], [qw(hash)])); if ($hash->{'rcpt'}) { # We've already encountered a recipient who is filtering this message. &setpriv($ctx, $hash); &debuglog("my_rcpt_callback: return CONTINUE with old hash"); return SMFIS_CONTINUE; } my $rcpt = $ctx->getsymval('{rcpt_addr}'); &debuglog("my_rcpt_callback: rcpt_addr: $rcpt"); if (&filtered_dir($rcpt)) { $hash->{'rcpt'} = $rcpt; $hash->{'envrcpt'} = $envrcpt; &setpriv($ctx, $hash); &debuglog("my_rcpt_callback: return CONTINUE with hash"); return SMFIS_CONTINUE; } else { &setpriv($ctx, undef); &debuglog("my_rcpt_callback: return CONTINUE with undef"); return SMFIS_CONTINUE; } } sub my_header_callback { my($ctx, $field, $value) = @_; my($hash) = &getpriv($ctx); &debuglog("my_header_callback: entering with " . Data::Dumper->Dump([&small_hash($hash), $field, $value], [qw(hash field value)])); if (! $hash) { &debuglog("my_header_callback: return ACCEPT with no hash"); return SMFIS_ACCEPT; } if (lc $field eq 'subject') { if ($value =~ /$magic_string_re/oi) { &setpriv($ctx, undef); &debuglog("my_header_callback: returning ACCEPT for magic subject"); return SMFIS_ACCEPT; } if ($hash->{'rcpt'}) { my(@subject_filters) = &user_subject_filters($hash->{'rcpt'}); foreach my $filter (@subject_filters) { if ($value =~ /$filter/) { &setpriv($ctx, undef); &debuglog(sprintf("my_header_callback: returning ACCEPT for subject filter %s for recipient %s", $filter, $hash->{'rcpt'})); return SMFIS_ACCEPT; } } } } if (lc $field eq 'x-bogosity') { &debuglog("Found $field: $value"); my $index = $hash->{x_bogosity_index} || 1; if ($value =~ /tests=bogofilter/) { unshift(@{$hash->{x_bogosity}}, $index); &debuglog("my_header_callback: stashing $field: $value ", "at index $index"); } $hash->{x_bogosity_index} = $index + 1; } $hash = &add_to_message($hash, "$field: $value\n"); &setpriv($ctx, $hash); &debuglog("my_header_callback: returning CONTINUE with hash"); return SMFIS_CONTINUE; } sub my_eoh_callback { my($ctx) = @_; my($hash) = &getpriv($ctx); # If $hash is undefined here, it means that the sender sent no # message header at all, so the block of code in # my_header_callback for checking if $hash is undefined never got # called. This means the message is almost certainly spam, but # it's not our job to determine that if none of the recipients are # using bogofilter. if (! $hash) { &debuglog("my_eoh_callback: return ACCEPT with no hash (message had empty header)"); return SMFIS_ACCEPT; } &debuglog("my_eoh_callback: entering with " . Data::Dumper->Dump([&small_hash($hash)], [qw(hash)])); $hash = &add_to_message($hash, "\n"); &setpriv($ctx, $hash); &debuglog("my_eoh_callback: returning CONTINUE with hash"); return SMFIS_CONTINUE; } sub my_body_callback { my($ctx, $body, $len) = @_; my($hash) = &getpriv($ctx); &debuglog("my_body_callback: entering with " . Data::Dumper->Dump([&small_hash($hash), $len], [qw(hash len)])); $hash = &add_to_message($hash, $body); &setpriv($ctx, $hash); &debuglog("my_body_callback: returning CONTINUE with hash"); return SMFIS_CONTINUE; } sub add_to_message { my($hash, $text) = @_; return $hash if (! $text); if (! $hash->{'fh'}) { $hash->{'msg'} = '' if (! $hash->{'msg'}); $hash->{'msg'} .= $text; if (length($hash->{'msg'}) <= $MAX_INCORE_MSG_LENGTH) { return $hash; } ($hash->{'fh'}, $hash->{'fn'}) = tempfile(); if (! $hash->{'fn'}) { &die("error creating temporary file"); } &debuglog("switching to temporary file " . $hash->{'fn'}); $text = $hash->{'msg'}; delete $hash->{'msg'}; } if (! print({$hash->{'fh'} } $text)) { &die("error writing to temporary file " . $hash->{'fn'}); } return $hash; } sub message_read_handle { my($hash) = @_; if ($hash->{'fn'}) { if (! seek($hash->{'fh'}, 0, SEEK_SET)) { &die("couldn't seek in " . $hash->{'fn'} . ": $!"); } return $hash->{'fh'}; } else { return new IO::Scalar \$hash->{'msg'}; } } sub my_eom_callback { my $ctx = shift; my $hash = &getpriv($ctx); my $fh; local($_); &debuglog("my_eom_callback: entering with " . Data::Dumper->Dump([&small_hash($hash)], [qw(hash)])); my $dir = &filtered_dir($hash->{'rcpt'}); if (! $dir) { # This can happen if the MTA loses the input channel from the sender, # so it isn't an error condition. &debuglog("my_eom_callback: called for non-filtered recipient; " . Data::Dumper->Dump([&small_hash($hash)], [qw(hash)])); &setpriv($ctx, undef); &debuglog("my_eom_callback: returning ACCEPT with undef"); return SMFIS_ACCEPT; } if (defined($filter_script) and &restrict_permissions($hash->{'rcpt'}) and -x "$dir/$filter_script" and (-o _ or ! (stat(_))[4])) { my $s = "$dir/$filter_script"; &unrestrict_permissions; syslog('debug', 'filtering with %s', $s); my($filter_fh, $filter_fn) = tempfile(); my $stderr_fh = tempfile(); if (! $filter_fn) { &die("error creating temporary file"); } $^F = fileno($filter_fh); pipe(FROMPARENT, FILTER) or &die("pipe: $!\n"); my $pid = fork; &die("fork: $!\n") if (! defined($pid)); if (! $pid) { close(FILTER); if (! open(STDOUT, ">&", $filter_fh)) { syslog('err', "reopen filter STDOUT to $filter_fn failed: %m"); exit(1); } open(STDERR, ">&", $stderr_fh); if (! open(STDIN, "<&FROMPARENT")) { syslog('err', "reopen filter STDIN from parent failed: %m"); exit(1); } &die("couldn't restrict permissions") if (! &restrict_permissions($hash->{'rcpt'}, 1));; $ENV{'MILTER_REMOTE_IP'} = $hash->{'remoteip'} || ''; $ENV{'MILTER_REMOTE_NAME'} = $hash->{'remotename'} || ''; $ENV{'MILTER_HELOHOST'} = $hash->{'helo'} || ''; $ENV{'MILTER_ENVFROM'} = $hash->{'envfrom'} || ''; $ENV{'MILTER_ENVRCPT'} = $hash->{'envrcpt'} || ''; $ENV{'MILTER_LOCAL_IP'} = $hash->{'localip'} || ''; $ENV{'MILTER_LOCAL_NAME'} = $hash->{'localname'} || ''; if (! exec("$s")) { syslog('err', 'exec(%s) failed: %m', $s); exit(1); } } close(FROMPARENT); my $fh = &message_read_handle($hash); my $good_filter = 1; while (<$fh>) { s/\r\n$/\n/; if (! print(FILTER $_)) { syslog('info', 'writing to filter %s: %m', $s); $good_filter = undef; last; } } my @failed; if (! close(FILTER)) { push(@failed, "close(FILTER): $!"); } if (! waitpid($pid, 0)) { push(@failed, "waitpid($pid): $!"); } if ($? >> 8) { push(@failed, "\$?>>8 == " . ($?>>8)); } if (@failed and $good_filter) { syslog('warning', 'filter %s failed: %s', $s, join(", ", @failed)); $good_filter = undef; } if (seek($stderr_fh, 0, SEEK_SET) and -s $stderr_fh) { while (my $error = <$stderr_fh>) { $error =~ s/^\s+//; $error =~ s/\s+$//; syslog('warning', 'stderr output from %s: %s', $s, $error); } close($stderr_fh); } if ($good_filter) { delete $hash->{'msg'}; unlink $hash->{'fn'} if ($hash->{'fn'}); $hash->{'fh'} = $filter_fh; $hash->{'fn'} = $filter_fn; $hash->{'nocr'} = 1; syslog('debug', 'successfully filtered with %s', $s); } else { unlink $filter_fn; close($filter_fh); } } if (! pipe(FROMBOGO, TOPARENT)) { &die("pipe: $!\n"); } if (! pipe(FROMPARENT, BOGOFILTER)) { &die("pipe: $!\n"); } my $pid = fork; if (! defined($pid)) { &die("fork: $!\n"); } elsif (! $pid) { close(FROMBOGO); close(BOGOFILTER); open(STDOUT, ">&TOPARENT") or syslog('warning', "reopen STDOUT to parent failed: $!"); open(STDIN, "<&FROMPARENT"); close(TOPARENT); close(FROMPARENT); &die("couldn't restrict permissions") if (! &restrict_permissions($hash->{'rcpt'}, 1));; my(@cmd) = ('bogofilter', '-v', '-u', '-d', $dir); if ($bogofilter_cf && -f "$dir/$bogofilter_cf") { push(@cmd, '-c', "$dir/$bogofilter_cf"); } exec(@cmd) || &die("exec(bogofilter): $!\n"); # &die had better not return! } close(TOPARENT); close(FROMPARENT); $fh = &message_read_handle($hash); if ($hash->{'fn'}) { # This is safe to do on Unix, since on Unix you can unlink an # open file and it'll stay around until the last open file # handle to it goes away. If this script were to be used on # non-Unix operating systems, which is a big "if" that I'm not # sure could ever happen, then this unlink might be a problem # and would need to happen later. unlink $hash->{'fn'}; } while (<$fh>) { s/\r\n$/\n/ if (! $hash->{'nocr'}); print(BOGOFILTER $_) || &die("writing to bogofilter: $!\n"); } close(BOGOFILTER); my $bogosity_line = ; close(FROMBOGO); waitpid $pid, 0; my $exit_status = $? >> 8; if ($bogosity_line =~ s/^X-Bogosity:\s*//i) { chomp $bogosity_line; } elsif (! $exit_status) { $bogosity_line = "Spam, tests=bogofilter"; } elsif ($exit_status == 1) { $bogosity_line = "Ham, tests=bogofilter"; } elsif ($exit_status == 2) { $bogosity_line = "Unsure, tests=bogofilter"; } if ($add_unique_id) { $bogosity_line .= # I wish we could make this a real UUID, but that would # require depending on one of the CPAN UUID modules, and I # don't want to add that dependency just for this feature. ", milter_id=" . sprintf("%lx.%lx.%lx", $$, time(), int(rand(1000000000))); } my $from = $ctx->getsymval('{mail_addr}'); if (! $exit_status) { my($training); if ($training_file) { if (&restrict_permissions($hash->{'rcpt'})) { $training = (-f "$dir/$training_file"); &unrestrict_permissions; } else { syslog('warning', 'assuming training mode because ' . 'permissions could not be restricted'); $training = 1; } } foreach my $index (@{$hash->{x_bogosity}}) { &debuglog("Removing old X-Bogosity header"); $ctx->chgheader('X-Bogosity', $index, ""); } $ctx->addheader('X-Bogosity', $bogosity_line); my $which = &reject_or_discard($hash); my($verb) = ($which == SMFIS_REJECT) ? "reject" : "discard"; syslog('info', '%s', ($training ? "would $verb" : "${verb}ing") . " likely spam from $from to " . $hash->{'rcpt'} . " based on $dir"); &save_copy($fh, $from, $hash->{'rcpt'}, $dir, $archive_mbox, $bogosity_line, $hash->{'nocr'}); if (! $training) { $ctx->setreply($rcode, $xcode, $reject_message); &setpriv($ctx, undef); return $which; } } else { &save_copy($fh, $from, $hash->{'rcpt'}, $dir, $ham_archive_mbox, $bogosity_line, $hash->{'nocr'}); my $bogosity; if ($exit_status == 1) { $bogosity = "Ham"; } elsif ($exit_status == 2) { $bogosity = "Unsure"; } if ($bogosity_line || $bogosity) { foreach my $index (@{$hash->{x_bogosity}}) { &debuglog("Removing old X-Bogosity header"); $ctx->chgheader('X-Bogosity', $index, ""); } $ctx->addheader('X-Bogosity', $bogosity_line); } } &setpriv($ctx, undef); return SMFIS_CONTINUE; } sub save_copy { my($fh, $from, $rcpt, $dir, $archive_mbox, $bogosity, $nocr) = @_; local($_); my($archive, $link); $archive = ($archive_mbox && &restrict_permissions($rcpt) && (lstat($archive = "$dir/$archive_mbox"))) ? $archive : undef; if ($cyrus_deliver && -f $cyrus_deliver && -X $cyrus_deliver && -l $archive && ($link = readlink($archive)) && $link =~ s/^cyrus:// && (! -f $archive)) { &unrestrict_permissions; my $user = &filtered_user($rcpt); if (! $user) { &die("Couldn't determine username for IMAP delivery"); } if (! seek($fh, 0, SEEK_SET)) { &die("error rewinding message handle: $!"); } my $pid = open(DELIVER, "|-"); if (! defined($pid)) { &die("Error forking to execute $cyrus_deliver: $!"); } elsif (! $pid) { exec($cyrus_deliver, '-a', $user, '-m', "user.$user.$link") || &die("exec($cyrus_deliver): $!"); } else { my ($in_header) = 1; my $ret = 1; while ($ret && <$fh>) { s/\r\n/\n/ if (! $nocr); if ($in_header) { next if (/^x-bogosity:.*tests=bogofilter/i); if (/^$/) { if ($bogosity) { $ret = $ret && print(DELIVER "X-Bogosity: $bogosity\n"); } $in_header = 0; } } $ret = $ret && print(DELIVER $_); } $ret = $ret && close(DELIVER); if (! $ret) { syslog('warning', '%s', "$cyrus_deliver failed for user.$user.$link"); } return; } } if ($archive) { # There is an annoying race condition here. Suppose two spam # messages are delivered at the same time to a user whose # archive file is a symlink pointing at a nonexistent (yet) # file. Milter process A tries to open with +< and fails. IN # the meantime, process B also tries to open with +< and fails. # Then A opens witn +>, locks the file and starts writing to # it, and *then* B opens with +>, thus truncating whatever data # was written thus far by A. I'm not sure what the best way is # to fix this race condition reliably, and it seems rare enough # that it isn't worth the effort. if (! (open(MBOX, '+<', $archive) || open(MBOX, '+>', $archive))) { syslog('warning', '%s', "opening $archive for " . "write: $!"); goto no_archive_open; } if (! flock(MBOX, LOCK_EX)) { syslog('warning', '%s', "locking $archive: $!"); goto close_archive; } if (! seek(MBOX, 0, SEEK_END)) { syslog('warning', '%s', "seek($archive, 0, SEEK_END): $!"); goto close_archive; } if (! seek($fh, 0, SEEK_SET)) { &die("error rewinding message handle: $!"); } if (! print(MBOX "From " . ($from || 'MAILER-DAEMON') . " " . localtime() . "\n")) { syslog('warning', '%s', "write($archive): $!"); goto close_archive; } my($last_blank, $last_nl); my($in_header) = 1; while (<$fh>) { s/\r\n/\n/ if (! $nocr); $last_nl = ($_ =~ /\n/); $last_blank = ($_ eq "\n"); if ($in_header) { next if (/^x-bogosity:.*tests=bogofilter/i); if (/^$/) { if ($bogosity) { $_ = "X-Bogosity: $bogosity\n" . $_; } $in_header = 0; } } else { s/^From />From /; } if (! print(MBOX $_)) { syslog('warning', '%s', "write($archive): $!"); goto close_archive; } } # Mbox format requires a blank line at the end if (! ($last_blank || print(MBOX ($last_nl ? "\n" : "\n\n")))) { syslog('warning', '%s', "write($archive): $!"); goto close_archive; } close_archive: if (! close(MBOX)) { syslog('warning', '%s', "close($archive): $!"); } } no_archive_open: &unrestrict_permissions; } sub my_abort_callback { my($ctx) = shift; my $hash = &getpriv($ctx); &debuglog("my_abort_callback: entering with " . Data::Dumper->Dump([&small_hash($hash)], [qw(hash)])); if ($hash->{'fn'}) { unlink $hash->{'fn'}; } &setpriv($ctx, undef); &debuglog("my_abort_callback: returning CONTINUE with undef"); return SMFIS_CONTINUE; } sub my_close_callback { my($ctx) = shift; my $hash = &getpriv($ctx); &debuglog("my_close_callback: entering with " . Data::Dumper->Dump([&small_hash($hash)], [qw(hash)])); if ($hash) { if ($hash->{'fn'}) { unlink $hash->{'fn'}; } } &setpriv($ctx, undef); &debuglog("my_close_callback: returning CONTINUE with undef"); return SMFIS_CONTINUE; } sub filtered_dir { my($uid, $gid, $dir) = &expand_recipient($_[0]); $dir; } sub filtered_user { my($uid, $gid, $dir, $stamp, $user) = &expand_recipient($_[0]); $user; } sub user_subject_filters { my($uid, $gid, $dir, $stamp, $user, $filters) = &expand_recipient($_[0]); $filters ? @{$filters} : (); } sub restrict_permissions { my($rcpt) = shift; my($no_going_back) = shift; my($uid, $gid, $dir) = &expand_recipient($rcpt); if (! (defined($uid) && defined($gid))) { syslog('err', '%s', "internal error: couldn't determine UID and GID " . "for $rcpt"); return undef; } $EUID = $uid; $EGID = $gid; if ($no_going_back) { # When we're ready to exec an external program, i.e., # bogofilter, we want to set the real UID and GID so that, # e.g., bogofilter will look in the correct home directory for # .bogofilter.cf. $UID = $uid; $GID = $gid; } 1; } sub unrestrict_permissions { $EUID = $UID; $EGID = $GID; } my $recipient_cache_last_checked; # $uid, $gid, $dir, $timestamp, $username, \@subject_filters sub expand_recipient { my($rcpt) = @_; my($orig, @expanded); my $now = time; if ($recipient_cache_expire) { if (! defined($recipient_cache_last_checked)) { $recipient_cache_last_checked = $now; } if ($now - $recipient_cache_last_checked > $recipient_cache_check_interval) { my $old = $now - $recipient_cache_expire; my(@keys) = keys %cached_recipients; my(@expired) = grep($cached_recipients{$_}->[3] <= $old, keys %cached_recipients); &debuglog('expiring %d entries (out of %d) ' . 'from the recipient cache', scalar @expired, scalar @keys); map(delete $cached_recipients{$_}, @expired); $recipient_cache_last_checked = $now; } } if ($database_user) { $rcpt = $database_user; } if (defined($cached_recipients{$rcpt})) { return(@{$cached_recipients{$rcpt}}); } $rcpt = &sendmail_canon($orig = $rcpt); if ($rcpt =~ /\@/) { return(@{$cached_recipients{$orig}} = (undef, undef, undef, $now, undef)); } if ($aliases_file) { my $aliases = Mail::Alias::Sendmail->new($aliases_file); @expanded = $aliases->expand($rcpt); } else { @expanded = ($rcpt); } if ((@expanded == 1) && ($expanded[0] eq $rcpt)) { my($dir, $pw); my $stripped = $rcpt; $stripped =~ s/\+.*//; $pw = getpwnam($stripped); @{$cached_recipients{$orig}} = $pw ? ($pw->uid, $pw->gid, undef, $now, $stripped) : (undef, undef, undef, $now, undef); if ($pw && $pw->dir && &restrict_permissions($orig) && -d ($dir = $pw->dir . "/.bogofilter") && ! ($bogofilter_cf && $require_cf && ! -f "$dir/$bogofilter_cf")) { $cached_recipients{$orig}->[2] = $dir; if ($subject_filter_file) { my $sff = $dir . "/" . $subject_filter_file; my @subject_filters; if (open(SFF, "<", $sff)) { while () { s/^\s+//; s/\s+$//; next if (/^\#/); next if (/^$/); my $re; eval '$re = qr/$_/;'; if (! $re) { syslog("warning", "bad subject filter for %s: %s", $stripped, $_); next; } push(@subject_filters, $re); &debuglog(sprintf('subject filter for %s: %s', $stripped, $_)); } } close(SFF); if (@subject_filters) { $cached_recipients{$orig}->[5] = \@subject_filters; } } } elsif ($database_user) { syslog("warning", "Shared database user %s is not configured " . "properly for bogofilter", $database_user); } &unrestrict_permissions; return(@{$cached_recipients{$orig}}); } else { foreach my $addr (@expanded) { my(@sub); if (@sub = &expand_recipient($addr)) { return(@{$cached_recipients{$orig}} = @sub); } } return(@{$cached_recipients{$orig}} = (undef, undef, undef, $now, undef)); } } sub sendmail_canon { return $_[0] if (! $sendmail_canon); my($pid, $sendmail_reader, $sendmail_writer, $last); local($_); $pid = open2($sendmail_reader, $sendmail_writer, $sendmail_prog, '-bt') or &die("open2 for sendmail failed"); print($sendmail_writer "3,0 $_[0]\n"); close($sendmail_writer); while (<$sendmail_reader>) { # CHECKTHIS You should run "sendmail -bt" as root, give it the # input "3,0 addr" where "addr" is one of the addresses in # your virtual user table, and confirm that the last # "returns:" line that it returns matches the regexp here for # local addresses. if (/\s+returns: \$\# local \$\:\s+(.+)/) { $last = $1; $last =~ s/ \+ .*//; } } close($sendmail_reader); waitpid $pid, 0; if ($last) { return $last; } else { return $_[0]; } } sub opendb_read { tie(%ip_whitelist_db, "DB_File", $ip_whitelist_db, O_RDONLY, 0, $DB_HASH) or &die("Can't open $ip_whitelist_db: $!"); } sub closedb { untie %ip_whitelist_db; } sub die { my(@msg) = @_; &closedb; syslog('err', '%s', "@msg"); exit(1); } sub debuglog { syslog('debug', "%s", "DEBUG: " . join("", @_)); } my(%mx_cache); sub reject_or_discard { my($hash) = @_; my $hostname; foreach my $i (0..@discard_control-1) { my($pattern, $action) = @{$discard_control[$i]}; my $ret; if ($action =~ /^reject$/i) { $ret = SMFIS_REJECT; } elsif ($action =~ /^discard$/i) { $ret = SMFIS_DISCARD; } else { &die("Invalid action $action ", "for discard control pttern $pattern\n"); } if ($pattern =~ /^addr:(.*)$/i) { my $addr = $1; &die("Invalid IP address in discard control pattern $pattern\n") if ($addr !~ /^\d+\.\d+\.\d+\.\d+$/); if ($hash->{'ipaddr'} eq $addr) { &debuglog("reject_or_discard: addr match $addr: $action"); return $ret; } } elsif ($pattern =~ /^netblock:(.*)$/i) { my $netblock = $1; &die("Invalid netblock in discard control pattern $pattern\n") if ($netblock !~ /^\d+\.\d+\.\d+\.\d+\/\d+$/); if (Net::CIDR::cidrlookup($hash->{'ipaddr'}, $netblock)) { &debuglog("reject_or_discard: netblock match ", "$hash->{ipaddr} in $netblock: $action"); return $ret; } } elsif ($pattern =~ /^host:(.*)$/i) { my $match_host = lc $1; $hostname = lc gethostbyaddr(inet_aton($hash->{ipaddr}), AF_INET) if (! $hostname); if ($match_host eq $hostname) { &debuglog("reject_or_discard: ", "host match $hostname for $hash->{ipaddr}: ", "$action and cache"); splice(@discard_control, $i, 0, [ "addr:$hash->{ipaddr}", $action ]); return $ret; } } elsif ($pattern =~ /^domain:(.*)$/i) { my $match_domain = lc $1; $hostname = lc gethostbyaddr(inet_aton($hash->{ipaddr}), AF_INET) if (! $hostname); if ($match_domain eq $hostname or (substr($hostname, -1-length($match_domain)) eq ".$match_domain")) { &debuglog("reject_or_discard: domain match ", "$hostname for $hash->{ipaddr} in $match_domain: ", "$action and cache"); splice(@discard_control, $i, 0, [ "addr:$hash->{ipaddr}", $action ]); return $ret; } } elsif ($pattern =~ /^mx$/i) { my $mx_domain = lc $hash->{'envrcpt'}; if (! $mx_domain) { &debuglog("reject_or_discard: no envrcpt\n"); next; } $mx_domain =~ s/.*\@(.*[^\>])\>?/$1/; my %mx_ips; if ($mx_cache{$mx_domain} and # refetch MX records once per hour time - $mx_cache{$mx_domain}->[0] < 60 * 60) { %mx_ips = %{$mx_cache{$mx_domain}->[1]}; } else { my %mx_ips; foreach my $mx (mx($mx_domain)) { my($name, $aliases, $addrtype, $length, @addrs) = gethostbyname($mx->exchange); foreach my $addr (@addrs) { $mx_ips{inet_ntoa($addr)} = 1; } } $mx_cache{$mx_domain} = [time, \%mx_ips]; &debuglog("reject_or_discard: cached MX IPs ", join(" ", sort keys %mx_ips), " for domain $mx_domain"); } if ($mx_ips{$hash->{'ipaddr'}}) { &debuglog("reject_or_discard: MX addr match ", "$hash->{ipaddr} for domain $mx_domain: $action"); return $ret; } } elsif ($pattern eq "*") { return $ret; } else { &die("Unrecognized discard control pattern: $pattern"); } } return SMFIS_REJECT; } sub getpriv { my($ctx) = @_; my $d = $ctx->getpriv(); my $VAR1; if ($d) { eval $d; } else { undef; } } sub setpriv { my($ctx, $value) = @_; if (defined $value) { my $d = Dumper($value); $ctx->setpriv($d); } else { $ctx->setpriv(undef); } } sub small_hash { my($hash) = @_; return undef if (! $hash); my(%hash2) = %{$hash}; $hash2{'msg'} = "..." if ($hash2{'msg'} and length($hash2{'msg'}) > 100); \%hash2; }