Last week RBL slowness and outages impacted a few people on this list, including us at FastMail.FM, where it resulted in a sizable mail queue forming. Even reducing rbl_timeout pretty low wasn't enough of a solution for us, because we still needed each message to go through in under a second.

The attached patch implements a solution to this problem. It keeps a list of RBLs which failed to respond within rbl_timeout, and doesn't bother trying them again for the next hour. After an hour, it checks them once and delists them as appropriate.

We use SpamAssassin within a preforking Perl server, so we need all processes to share state. In this patch we have used MLDBM::Sync to provide (semi-)transparent persistent shared state for the list of dead RBLs. You will need to install MLDBM::Sync from CPAN to use this patch.

I wasn't sure where to store the MLDBM DB, so it's just hardcoded in this patch. Could someone more familiar with the SA code suggest a way to choose the 'best' directory for these files?

Although not implemented in this patch, I'd suggest using the MLDBM cache for other shared state, such as cached RBL lookups. I've placed the tied hash in {main}->{_cache} so that other parts of the code can access it conveniently too.

This is the first patch I've contributed to SA, so style hints and other constructive criticism would be much appreciated.
--- SpamAssassin.pm     Fri Dec 13 06:07:50 2002
+++ SpamAssassin.pm.new Fri Dec 13 06:09:34 2002
@@ -61,6 +61,9 @@
 use Mail::SpamAssassin::PerMsgStatus;
 use Mail::SpamAssassin::NoMailAudit;
 
+use MLDBM::Sync;
+use MLDBM qw(MLDBM::Sync::SDBM_File Storable);
+use Fcntl qw(:DEFAULT);
 use File::Basename;
 use File::Path;
 use File::Spec 0.8;
@@ -771,7 +774,11 @@
 
   delete $self->{config_text};
 
-  # TODO -- open DNS cache etc. if necessary
+  my %cache;
+  my $cache = "/etc/mail/spamassassin/cache.sdbm";
+  tie %cache, 'MLDBM::Sync', $cache, O_RDWR|O_CREAT, 0666
+    or die "Cannot open file $cache: $!";
+  $self->{_cache} = \%cache;
 }
 
 ###########################################################################
--- EvalTests.pm        Wed Dec 11 02:09:24 2002
+++ EvalTests.pm.new    Wed Dec 11 02:08:01 2002
@@ -923,7 +920,7 @@
        }
       }
       
-      $found = $self->do_rbl_lookup ($set, "$b4.$b3.$b2.$b1.".$rbl_domain, $ip, 
$found, $dialupreturn, $needresult);
+      $found = $self->do_rbl_lookup ($set, "$b4.$b3.$b2.$b1.".$rbl_domain, $ip, 
+$found, $dialupreturn, $needresult, $rbl_domain);
       dbg("Got $found on $ip (item $i)", "rbl", -3);
     }
   };
--- Dns.pm      Fri Oct  4 08:54:16 2002
+++ Dns.pm.new  Fri Dec 13 06:10:38 2002
@@ -113,11 +113,13 @@
 ###########################################################################
 
 sub do_rbl_lookup {
-  my ($self, $set, $dom, $ip, $found, $dialupreturn, $needresult) = @_;
+  my ($self, $set, $dom, $ip, $found, $dialupreturn, $needresult, $rbl_domain) = @_;
   my $socket;
   my @addr=();
   my $maxwait=$self->{conf}->{rbl_timeout};
   return $found if $found;
+  my $cache = $self->{main}->{_cache};
+  my $rbl_cache = $cache->{_rbls_down} || {};
 
   my $gotdialup=0;
   my $domainonly;
@@ -138,6 +140,14 @@
   } else {
     timelog("RBL -> Waiting for result on $dom", "rbl", 1);
     $socket=$self->{dnscache}->{rbl}->{$dom}->{socket};
+    if ($rbl_domain && (my $rbl_down = $rbl_cache->{$rbl_domain})) {
+      if (time() - $rbl_down < 3600) {
+        dbg("Not checking rbl $rbl_domain; marked down", "rbl", -1);
+        return 0;
+      } else {
+        dbg("Re-checking whether rbl $rbl_domain still down", "rbl", -1);
+      }
+    }
     
     while (not $self->{res}->bgisready($socket)) {
       last if (time - $self->{dnscache}->{rbl}->{$dom}->{time} > $maxwait);
@@ -147,8 +157,17 @@
     if (not $self->{res}->bgisready($socket)) {
       timelog("RBL -> Timeout on $dom", "rbl", 2);
       dbg("Query for $dom timed out after $maxwait seconds", "rbl", -1);
+      if ($rbl_domain) {
+        $rbl_cache->{$rbl_domain} = time();
+        $cache->{_rbls_down} = $rbl_cache;
+      }
+
       return 0;
     } else {
+      if ($rbl_domain && $rbl_cache->{$rbl_domain}) {
+        delete $rbl_cache->{$rbl_domain};
+        $cache->{_rbls_down} = $rbl_cache;
+      }
       my $packet = $self->{res}->bgread($socket);
       undef($socket);
       foreach $_ ($packet->answer) {


Reply via email to