On Thu, 22 Jan 2004, Vermyndax wrote:
> Greetings all...
>
> I am trying to implement a way to generate statistics for Spamassassin.
> I've tried numerous perl scripts but most of them return all zeros for
> the stats. The biggest example I can think of is the sa-stats.pl
> script. No matter what I do, the script returns all zeros. Is it
> because it's having a problem parsing out my log file? Can any of you
> perl experts give me a little help? Or can any of you point me in the
> direction of some fantastic stats generator for SpamAssassin?
i've fixed couple things of spamstats0.4b5.pl a while ago.
see attached version. just have a try.
regards,
Matthias
#!/usr/bin/perl -w
## FIXME : remove the -w above when you find the code is fixed !
#
#This is spamstats.pl v0.4b5
#
#
#Changelog
#0.4b5 11 August 2003
#Fixed the bug when a month starting with a zero is entered as start/enddate.
#
#0.4b4 10 June 2003
# Fixed the infile == 0 bug, thanks to Yen-Ming Lee
# Fixes sendmail parsing when email is delivered through procmail, raised by Dirk
Kuypers
#
#
#0.4b3 2 June 2003
#Applied patches from Bob Apthorpe for :
# * more elegant fix of the two digits month intput problem
# * better input handling, now files to process can be specified in @ARGV without the
--file switch
# * Added documentation and scripts to graph spamstats output with cricket.
#
#0.4b2 30 May 2003
#Regexp bugfix in exim mailer_in handling
#Regexp bugfix in spamd ("processing message" seems to have changed to "checking
message") on some setups.
#Updated README into a more english (and less french) syntax
#
#0.4b1 19 May 2003
#This is a very tiny bugfix.
#Fixes parsing mistakes on sendmail setups that relay emails as outputs.
#Emails were undetected on those setups.
#
#0.4b 10 Mar 2003
#WARNING : this release changes the default behaviour of spamstats calculations !!
#From this version on spamstats counts spams and non-spams per recipient, not per
mailer ID.
#(Until this version, a multirecipient message sent to both "[EMAIL PROTECTED]" and
"[EMAIL PROTECTED]"
#counted only as one spam. From now on it counts as two.
#New option : -agglo-recipients uses spamstats "old" mode : one count per mailer ID,
not per recipient.
#WARNING : FOR NOW EXIM USERS PROBABLY WANT TO USE THIS OPTION, ON SOME EXIM CONFIGS
# THERE ARE RISKS LOG ANALYSIS BE BROKEN IF NOT USED!
#Applied patch from Jim Breton <[EMAIL PROTECTED]> for a better display.
#
#0.4 25 Feb 2003
#[Probably very incomplete] sendmail support
#Only sendmail regexp were added, no code modification !
#This is not a very important release in terms of work. Hopefully it is in terms
#of capabilities :-)
#
#0.3b2 30 Jan 2003
#Fix a problem where script will issue warnings when parsed log file is empty or
#contains no reference to used mailer (only contains spamd messages).
#
#0.3b 04 Jan 2003
#Added a (hopefully) useful time filter specification to be used : duration
specification.
#
#0.3a 29 Dec 2002
#Date/Time filter now works.
#Some tiny code cleanup.
#HTML output support.
#
#0.3alpha 17 Dec 2002
#Exim support
#Some work on date/time filtering support, far from complete. These options are
useless for now.
#
#0.2f 26 Nov 2002
#If one input file does not exist, mentions which!
#
#0.2e 26 Nov 2002
#Option "-noabsolute" makes spamstats not complain if argument log file names are not
absolute.
#Now reports total Volume of Spam and Volume of clean messages in general statistics.
#
#0.2d
#Local recipients were not counted, only relayed ones.
#Regexp was modified to just match both.
#Thanks to Jon Gabrielson for bug report
#
#0.2c
#No more lower/upper case distinction in top recipients classification
#Thanks to Kenneth Nerhood for bug report
#
#0.2b
#Fixes stupid bug from 0.2 where spamd process had to run as user "spamd"
#Thanks to Kenneth Nerhood for bug report
#
#
#Parses Postfix a spamd log file (or several) and extract top Spam receivers.
#Also displays spam statistics.
#
#Author : Vincent Deffontaines <[EMAIL PROTECTED]>
#Script Basis, Postfix support Copyright : Vincent Deffontaines
# KDX (www.kdx.fr)
# Council of Europe (www.coe.int)
#
#Exim support Copyright : Vincent Deffontaines.
#Sendmail support Copyright : Vincent Deffontaines.
#
#Please send me contributions/ modifications/ comments that could be useful to this
script!
#Others mailers than Postfix/Exim support shouldn't be hard to implement.
#Author will help and include modifications to this script as long as mailers are free
software.
#
#This program is free software; you can redistribute it and/or
#modify it under the terms of the GNU General Public License
#as published by the Free Software Foundation; version 2
#of the License.
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with this program; if not, write to the Free Software
#Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
use strict;
use Getopt::Long;
#Only useful for Debugging, useless if you don't hack through this code :-)
#use Data::Dumper;
use Compress::Zlib;
my %infile; undef %infile;
my $number = 0;
my $help = 0;
my $nogeneral = 0;
my $debug = 0;
my $noabsolute = 0;
my $error = 0;
my $starttime = "none";
my $endtime = "none";
my $startdate = "none";
my $enddate = "none";
my $skipstarttest = 0;
my $skipendtest = 0;
my $mailerlogtype = undef;
my $html = 0;
my $duration = 0;
my $agglo_rcpt = 0;
my
%html_tags=('br'=>'','b'=>'','i'=>'','html'=>'','body'=>'','endtag'=>'','starttag'=>'');
my $spam_percent = 0;
my $clean_percent = 0;
my %Defs = (); #Parse regexp definitions for each mailer and for spamd
$Defs{'mailer_in'}{'postfix'} =
'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+postfix\/cleanup\[(\d*)\]:\s+([^:]+):\s*message-id=(.*)$';
#$Defs{'mailer_in'}{'exim'} =
'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+exim\[\d+\]:\s+\d{4}-\d{2}-\d{2}\s+\d+:\d+:\d+\s+<[EMAIL
PROTECTED](.*)$';
#$Defs{'mailer_in'}{'exim'} =
'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+exim\[(\d+)\]:\s+\d{4}-\d{2}-\d{2}\s+\d+:\d+:\d+\s+(\S+)\s+<[EMAIL
PROTECTED](?:U=\S+|H=.*)\s+P=\S+\s+S=\S+\s+id=(.*)$';
$Defs{'mailer_in'}{'exim'} =
'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+exim\[(\d+)\]:\s+\d{4}-\d{2}-\d{2}\s+\d+:\d+:\d+\s+(\S+)\s+<[EMAIL
PROTECTED](.*)$';
$Defs{'mailer_in'}{'sendmail'} =
'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+sendmail\[(\d+)\]:\s+(\S+):\s+from=<[^>]*>,\s+size=\d+,\s+class=\S+,\s+nrcpts=\d+,\s+msgid=<([^>]+)>.*,\s+proto=\S+,\s+daemon=\S+,\s+relay=.*$';
$Defs{'spamd_in'}{'postfix'} =
'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+spamd\[(\d+)\]:\s+(?:processing|checking)\s+message\s*(.*)\s+for\s+\S+';
$Defs{'spamd_in'}{'exim'} =
'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+spamd\[(\d+)\]:\s+(?:processing|checking)\s+message\s*<(.*)>\s+for\s+\S+';
$Defs{'spamd_in'}{'sendmail'} =
'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+spamd\[(\d+)\]:\s+(?:processing|checking)\s+message\s*<(.*)>\s+for\s+';
$Defs{'spamd_clean'} =
'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+spamd\[(\d+)\]:\s+clean\s+message\s*\(([^\/]+)\/[^\)]+\)\s+for\s+\S+\d+\s+in\s+(\S+)\s+seconds,\s+(\d+)\s+bytes\.';
$Defs{'spamd_spam'} =
'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+spamd\[(\d+)\]:\s+identified\s+spam\s*\(([^\/]+)\/[^\)]+\)\s+for\s+\S+\d+\s+in\s+(\S+)\s+seconds,\s+(\d+)\s+bytes\.';
$Defs{'mailer_out'}{'postfix'} =
'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+postfix\/(?:pipe|local)\[\d+\]:\s+([^:]+):\s+to=<([^>]+)>';
#'Mar 10 02:11:24 barrel postfix/smtp[20611]: 5A9BF22E04: to=<obfuscated>,
relay=127.0.0.1[127.0.0.1], delay=2, status=sent (250 ok 1047280284 qp 20787)'
$Defs{'mailer_out'}{'exim'} =
'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+exim\[\d+\]:\s+\d{4}-\d{2}-\d{2}\s+\d+:\d+:\d+\s+(\S+)\s+=>\s+([EMAIL
PROTECTED])\s+';
$Defs{'mailer_out'}{'sendmail'} =
'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+sendmail\[\d+\]:\s+(\S+):\s+to=(?:\|.*ctladdr=<|<)([^>]+)>.*,\s+delay=\S+,\s+xdelay=\S+,\s+mailer=\S+,\s+pri=\d+.*,\s+dsn=\S+,\s+stat=\S+';
sub Print_Usage()
{
print "{Exim/Postfix/Sendmail} & spamd logfile analyser. Extracts top N Spam
receivers\n";
print "$0 [-help] [-debug][-file=/path/to/filename] [-file=...] [-number=...]
[-nogeneral]\n";
print " [-startdate=dd-mm] [-starttime=hh:mm:ss] [-enddate=dd-mm]
[-endtime=hh:mm:ss]\n";
print " [-duration=number of seconds] /path/to/file1 [/path/to/file2]
[/path/to/file3.gz]\n";
print "GENERAL OPTIONS\n";
print "\t-debug\t\t\t: Displays informations that _might_ indicate problems while
parsing.\n";
print "\t-help\t\t\t: Displays this help and exits.\n";
print "\t-file /path/file\t: Analyses mail log file for spam results (as logged by
spamd) :\n"
."\t\t\t\t Several files can be asked for parsing at a time, including .gz
files\n"
."\t\t\t\t Default /var/log/mail.log\n"
."\t\t\t\t This switch is deprecated, simply specify filenames after all
options,"
."\t\t\t\t without any switch.\n";
print "\t-number number\t\t: specifies number of top spam receivers to display
(default : 0).\n";
print "\t-nogeneral\t\t: do not display general stats.\n";
print "\t-noabsolute\t\t: lets non-absolute named logfiles be processed.\n";
print "\t-html\t\t\t: HTML output\n";
print "TIME FILTER OPTIONS (no time filter used if no option specified)\n";
print "\t-startdate dd-mm\t: Process only data logged from that date\n";
print "\t\t\t\t Default : today if starttime specified, else unused\n";
print "\t-enddate dd-mm\t: Process only data logged until that date\n";
print "\t\t\t\t Default : today if endtime specified, else unused\n";
print "\t-starttime hh:mm:ss\t: Process only data logged from that time (default
time : 0:00:00)\n";
print "\t-endtime hh:mm:ss\t: Process only data logged until that time (default
time : current time)\n";
print "\t-duration seconds\t: Work only on specified duration.\n";
print "\t\t\t\t To be used with start XOR end{time/date}, obviously not with
both.\n";
print "\t\t\t\t Default : unused\n";
print "\t\t\t\t Default if no other time switch : process n seconds until current
time.\n";
print "\tWhy no year in dates input? Just because there is no year reported in
postfix mail logs\n";
print "\tThis will obviously cause time filter problems around new year!\n";
print "\t-agglo-recipients\t: Old spamstats counting. One count by mail ID, not by
actual recipient.\n";
print "\t\t\t\t EXIM users WANT to set this for now!\n";
}
sub unify($$);
sub unify($$)
#Converts (value, unit) from bytes, kilobytes, megabytes into a more human readable
expression
{
my $volume = shift @_;
my $unit = shift @_;
if ((eval($volume/1024) > 5) and (($unit eq "bytes") or ($unit eq "kbytes")))
{
$volume = $volume/1024;
$unit eq "Mbytes" and $unit = "Tbytes";
$unit eq "kbytes" and $unit = "Mbytes";
$unit eq "bytes" and $unit = "kbytes";
unify($volume,$unit)
}else{
return ($volume,$unit);
}
}
sub check_date($)
#Checks given date is correct (expected format string: "d[d]-m[m]" or "none").
#Returns 0 if correct, 1 if not.
{
my $date = shift @_;
$date eq 'none' and return 0;
unless ($date =~ /^(\d{1,2})-(\d{1,2})$/)
{
return 1;
}
my $day = int($1); my $month = int($2);
unless (($day < 32) and ($month < 13) and (($day * $month) > 0))
{
return 1;
}
return 0;
}
sub check_time($)
#Checks given time is correct (expected format string: "h[h]:mm:ss" or "none").
#Returns 0 if correct, 1 if not.
{
my $time = shift @_;
$time eq 'none' and return 0;
unless ($time =~ /^(\d{1,2}):(\d{2}):(\d{2})$/)
{
return 1;
}
my $hour = $1; my $minute = $2; my $second = $3;
unless (($hour < 25) and ($minute < 60) and ($second < 60))
{
return 1;
}
return 0;
}
sub convert_date_time_to_epoch($$)
#Input : date "d[d]-m[m]", time "h[h]:mm:ss"
#Output : Pseudo epoch (no year included in input)
#Returns -1 in case of trouble.
#This function will ALWAYS be BUGGY around new year days
#This function also presents a bug in case of "bisexctial" (correct word?) year (when
Feb 29 exists).
#This is due to year not being logged, which indeeds confuse things on such days.
{
my $date = shift @_;
my $time = shift @_;
unless ($date =~ /^(\d{1,2})-(\d{1,2})$/)
{
return -1;
}
my $day = int ($1); my $month = int($2);
#Remove leading 0 in mounth is there is one
unless ($time =~ /^(\d{1,2}):(\d{2}):(\d{2})$/)
{
return -1;
}
my $hour = int($1); my $minute = int ($2); my $second = int($3);
my %months = ();
$months{1} = 31; $months{2} = 28; $months{3} = 31; $months{4} = 30; $months{5} =
31; $months{6} = 30; $months{7} = 31; $months{8} = 31; $months{9} = 30; $months{10} =
31; $months{11} = 30; $months{12} = 31;
my $result = ($months{$month} + $day) * 24 * 3600 + $hour * 3600 + $minute * 60 +
$second;
return ($result);
}
sub fill_zeros($)
#Input : a list of numbers
#Output : same numbers list, each preceeded by a zero if originally less than 2
characters long
{
my $list = shift @_;
#print "DEBUG : $list\n";
#return $list;
foreach my $number(@$list)
{
$number =~/^\d$/ and $number = "0".$number;
}
return @$list;
}
my @infiles = ();
GetOptions( "file=s" => [EMAIL PROTECTED],
"number=i" => \$number,
"help" => \$help,
"debug" => \$debug,
"noabsolute" => \$noabsolute,
"nogeneral" => \$nogeneral,
"html" => \$html,
"startdate=s" => \$startdate,
"enddate=s" => \$enddate,
"starttime=s" => \$starttime,
"endtime=s" => \$endtime,
"duration=s" => \$duration,
"agglo-recipients" => \$agglo_rcpt);
push @infiles, @ARGV if (@ARGV);
my $defmaillog = '/var/log/mail.log';
push @infiles, $defmaillog if ($#infiles == -1 && -f $defmaillog);
foreach my $fn (@infiles) {
if (-f $fn) {
$infile{$fn} = 1;
}
}
if ($help)
{
Print_Usage();
exit 0;
}
#Sanity checks
unless (check_date ($startdate) == 0)
{
print STDERR "Bad input format start date was entered\n";
$error++;
}
unless (check_date ($enddate) == 0)
{
print STDERR "Bad input format end date was entered\n";
$error++;
}
unless (check_time ($starttime) == 0)
{
print STDERR "Bad input format start time was entered\n";
$error++;
}
unless (check_time ($endtime) == 0)
{
print STDERR "Bad input format end date was entered\n";
$error++;
}
unless ($duration=~/^\d+$/)
{
print STDERR "Bad input : duration is supposed to be numeric\n";
$error++;
}
$html and
%html_tags=('br'=>'br>','b'=>'b>','i'=>'i>','html'=>'html>','body'=>'body>','endtag'=>'</','starttag'=>'<');
foreach my $file (sort keys %infile)
{
unless ($file =~ /[a-zA-Z\.\/ \\0-9]+/)
{
die "Illegal characters read in parameter file name!\n";
}
unless (-f $file)
{
print STDERR "$file : File does not exist!\n";
$error ++;
}
if (($file !~/^\//) and ($noabsolute == 0))
{
print STDERR "$file : Path to file must be absolute, or you must specify the
\"-noabsolute\" option\n";
$error ++;
}
}
if (($starttime eq "none") and ($startdate eq "none"))
{
$skipstarttest = 1;
}
if (($enddate eq "none") and ($endtime eq "none"))
{
$skipendtest = 1;
}
if (($duration>0) and ($skipstarttest == 0) and ($skipendtest == 0))
{
print STDERR "Input redundancy : You may not specify starttime, endtime and
duration\n";
$error ++;
}
$error and exit 1;
if ($startdate eq 'none')
{
my ($day,$month) = (localtime)[3,4];
$month ++;
$startdate = $day."-".$month;
}
if ($enddate eq 'none')
{
my ($day,$month) = (localtime)[3,4];
$month ++;
$enddate = $day."-".$month;
}
$starttime eq 'none' and $starttime = '00:00:00';
if ($endtime eq 'none')
{
my @tab = (localtime)[0,1,2];
@tab = fill_zeros([EMAIL PROTECTED]);
$endtime = join (':',reverse(@tab));
}
# and $endtime = join(':',reverse(fill_zeros((localtime)[0,1,2])));
print
$html_tags{'starttag'}.$html_tags{'html'}.$html_tags{'starttag'}.$html_tags{'body'};
#print "Time filter used : From $startdate $starttime to $enddate $endtime\n";
my $epoch_start = convert_date_time_to_epoch($startdate, $starttime);
my $epoch_end = convert_date_time_to_epoch($enddate, $endtime);
if ($duration > 0)
{
if (($skipstarttest == 1) and ($skipendtest == 1))
{
$epoch_start = $epoch_end - $duration;
$duration = 0;
$skipstarttest = 0;
$skipendtest = 0;
}
elsif ($skipstarttest == 1)
{
$epoch_start = $epoch_end - $duration;
$duration = 0;
$skipstarttest = 0;
$skipendtest = 0;
}elsif ($skipendtest == 1)
{
$epoch_end = $epoch_start + $duration;
$duration = 0;
$skipstarttest = 0;
$skipendtest = 0;
}
}
#print "DEBUG : $epoch_start to $epoch_end\n";
$epoch_start > $epoch_end and print STDERR "WARNING : time filter seems incorrect : it
starts after it ends! $epoch_start > $epoch_end\n";
my %mounths =
("Jan"=>1,"Feb"=>2,"Mar"=>3,"Apr"=>4,"May"=>5,"Jun"=>6,"Jul"=>7,"Aug"=>8,"Sep"=>9,"Oct"=>10,"Nov"=>11,"Dec"=>12);
my $is_gz = 0;
my $gz;
my $gzerrno;
my %spam = ();
my %clean = ();
my %mailer_table = ();
my %spamd_table = ();
my %spamd_pid = ();
my $spam_score = 0;
my $clean_score = 0;
my $spam_time = 0;
my $clean_time = 0;
my $basic_spam_nb = 0;
my $basic_clean_nb = 0;
my $spam_volume = 0;
my $clean_volume = 0;
my $incorrect_lines = 0; #Count unparsable lines
my $correct_lines = 0; #Count parsable lines
#Processing
FILELOOP: foreach my $file (keys %infile)
{
undef $mailerlogtype;
my $first_date = "";
my $last_date = "";
my $line;
my $linetime = 0;
$is_gz = 0;
#$eof = 0;
if ($file=~/\.gz$/) #We have a gz file
{
#print BLUE "Opening $file\n".$Stag.$NewLinetag;
$gz = gzopen($file, "r") or die "Cannot open $file : $gzerrno\n";
$is_gz = 1;
}
else
{
#print BLUE "Opening $file\n".$Stag.$NewLinetag;
open(FILE, $file) or die "Unable to open file!\n";
}
while (1)
#while (not $eof)
{
if ($is_gz)
{
die "File not open!\n" if not defined $gz;
unless ($gz->gzreadline($line) > 0)
{
#$eof = 1;
$gz->gzclose();
print $html_tags{'starttag'}.$html_tags{'br'};
print "File $file : from $first_date to $last_date\n";
next FILELOOP;
}
}
else
{
{
unless (defined($line = <FILE>))
{
#$eof = 1;
close FILE;
print $html_tags{'starttag'}.$html_tags{'br'};
print "File $file : from $first_date to $last_date\n";
next FILELOOP;
}
}
}
unless ($line =~ /^\s*([a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+)\s+\S+\s+/)
{
$incorrect_lines ++;
next;
}
unless (defined $mailerlogtype)
{
if ($line =~
/^\s*([a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+)\s+\S+\s+(exim|postfix|sendmail)/)
{
$mailerlogtype = $2;
#print $mailerlogtype." style log file detected\n";
}
}
$correct_lines ++;
if ($first_date eq "") {$first_date = $1;}
$last_date = $1;
if (($skipstarttest * $skipendtest) == 0)
{
$last_date =~ /^([a-zA-Z]{3})\s+(\d+)\s+(\d+:\d+:\d+)$/ and $linetime =
convert_date_time_to_epoch($2."-".$mounths{$1},$3);
}
unless ($skipstarttest == 1)
{
$linetime < $epoch_start and next;
}
unless ($skipendtest == 1)
{
$linetime > $epoch_end and next;
}
#Here is Mailer analysis section. Spamd analysis is below.
#We are not running this code unless we know which mailer we are having:
if (defined $mailerlogtype)
{
#Email IN
#if ($line =~
/^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+postfix\/cleanup\[(\d*)\]:\s+([^:]+):\s*message-id=(.*)$/)
if ($line =~ /$Defs{'mailer_in'}{$mailerlogtype}/)
{
if (defined $mailer_table{$2})
{
delete $mailer_table{$2};
if ($debug)
{
print $html_tags{'starttag'}.$html_tags{'br'};
print "INFO: A message \"id\" already existed as $2. Deleted it
from mailer_table before renew.\n";
}
}
#Exim specific :-(
if ($mailerlogtype eq 'exim')
{
my $dollar2= $2;
my $id = undef;
if ($3 =~ /^\s*id=(.*)$/)
{
$id = $1;
}else{
#print "TWO\n";
$id = "I_have_no_id_за:-("; #Hope this will never be a real id ...
}
$mailer_table{$dollar2} = $id;
}else{
$mailer_table{$2} = $3;
}
#print "DEBUG : postfix received message on ID $3, message code $2\n";
next;
}
#EMAIL SENT
#if ($line =~
/^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+postfix\/(?:pipe|local)\[\d+\]:\s+([^:]+):\s+to=<([^>]+)>/)
if ($line =~ /$Defs{'mailer_out'}{$mailerlogtype}/)
{
if (defined $mailer_table{$1})
{
#Exim specific code :-(
if ($mailerlogtype eq "exim")
{
my $blah = $1;
my $tmp_email = $2;
if ( $mailer_table{$1} =~ /^I_have_no_id_за:-\($/)
{
foreach my $key (keys %spamd_table)
{
if ($key =~ /$blah/)
{
#print "I think I maybe resolved a floating Exim ID\n";
if ($spamd_table{$key} eq "spam")
{
$spam{lc($tmp_email)} ++;
# print "SPAM for $tmp_email\n";
}elsif ($spamd_table{$key} eq "clean")
{
$clean{lc($tmp_email)} ++;
# print "CLEAN for $tmp_email\n";
}
}
}
}
}
#End exim specific code
if (defined $spamd_table{$mailer_table{$1}})
{
if ($spamd_table{$mailer_table{$1}} eq "spam")
{
$spam{lc($2)} ++;
}elsif ($spamd_table{$mailer_table{$1}} eq "clean")
{
$clean{lc($2)} ++;
}
if ($agglo_rcpt)
{
delete $spamd_table{$mailer_table{$1}};
delete $mailer_table{$1};
}
}
}else{
if ($debug)
{
print
$html_tags{'starttag'}.$html_tags{'br'}.$html_tags{'starttag'}.$html_tags{'b'};
print "CRITICAL : Warning : Mailer delivered a message it never
received? id $1";
print $html_tags{'endtag'}.$html_tags{'b'}."\n";
}
}
next;
}
}
#if ($line =~
/^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+spamd\[(\d+)\]:\s+processing\s+message\s*(.*)\s+for\s+\S+/)
if (defined $mailerlogtype)
{
if ($line =~ /$Defs{'spamd_in'}{$mailerlogtype}/)
{
#foreach my $key(keys %spamd_pid)
#{
# if ($spamd_pid{$key} eq $2)
# {
# delete $spamd_pid{$key};
# print "INFO: A message \"id\" already existed as $2. Deleted it
from spamd_pid before renew.\n";
# }
#}
$spamd_pid{$1} = $2;
next;
}
}
#Detected as NON spam - Lets delete all its references from the buffer
#if ($line =~
/^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+spamd\[(\d+)\]:\s+clean\s+message\s*\(([^\/]+)\/[^\)]+\)\s+for\s+\S+\d+\s+in\s+(\S+)\s+seconds,\s+(\d+)\s+bytes\./)
if ($line =~ /$Defs{'spamd_clean'}/)
{
if (defined $spamd_pid{$1})
{
$spamd_table{$spamd_pid{$1}} = "clean";
delete ($spamd_pid{$1});
}else{
if ($debug)
{
print
$html_tags{'starttag'}.$html_tags{'br'}.$html_tags{'starttag'}.$html_tags{'b'};
print "CRITICAL : spamd sent an answer for a message it did not
receive? pid $1";
print $html_tags{'endtag'}.$html_tags{'b'}."\n";
}
}
$basic_clean_nb++;
# print $basic_clean_nb." clean \n";
$clean_score = $clean_score + $2;
$clean_time = $clean_time + $3;
$clean_volume = $clean_volume + $4;
next;
}
#SPAM FOUND
#if ($line =~
/^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+spamd\[(\d+)\]:\s+identified\s+spam\s*\(([^\/]+)\/[^\)]+\)\s+for\s+\S+\d+\s+in\s+(\S+)\s+seconds,\s+(\d+)\s+bytes\./)
if ($line =~ /$Defs{'spamd_spam'}/)
{
if (defined $spamd_pid{$1})
{
$spamd_table{$spamd_pid{$1}} = "spam";
#print "spamd_table {".$spamd_pid{$1}."} is spam\n";
delete ($spamd_pid{$1});
}else{
if ($debug)
{
print
$html_tags{'starttag'}.$html_tags{'br'}.$html_tags{'starttag'}.$html_tags{'b'};
print "CRITICAL : spamd sent an answer for a message it did not
receive? pid $1";
print $html_tags{'endtag'}.$html_tags{'b'}."\n";
}
}
$basic_spam_nb++;
#print $basic_spam_nb."spam \n";
$spam_score = $spam_score + $2;
$spam_time = $spam_time + $3;
$spam_volume = $spam_volume + $4;
next;
}
}
#We are in a non-existent case!
print STDERR "WARNING, a piece of the program that shouldnt be run was
reached!\nInvestigate!\n";
}
my %stats = ();
foreach my $key (keys %spam)
{
push @{$stats{$spam{$key}}}, $key;
}
unless ($nogeneral)
{
my $nb_spam = 0;
my $nb_clean = 0;
foreach my $key (keys %spam)
{
$nb_spam = $nb_spam + $spam{$key};
print $nb_spam."\n";
}
foreach my $key (keys %clean)
{
$nb_clean = $nb_clean + $clean{$key};
print $nb_clean."\n";
}
#General stats
#Ok, not beautiful code. But its only run once...
print
$html_tags{'starttag'}.$html_tags{'br'}.$html_tags{'starttag'}.$html_tags{'br'};
print "Total number of emails processed by the spam filter :
".$html_tags{'starttag'}.
$html_tags{'b'}.eval($basic_spam_nb+$basic_clean_nb).$html_tags{'endtag'}.$html_tags{'b'}."\n";
print $html_tags{'starttag'}.$html_tags{'br'};
print $html_tags{'starttag'}.$html_tags{'b'};
if ($nb_spam+$nb_clean == 0)
{
$spam_percent = eval(100 * $basic_spam_nb / ($basic_spam_nb+$basic_clean_nb));
printf("%-40s:%10d (%6.2f%%)\n", "Number of spams", $basic_spam_nb,
$spam_percent);
}else{
printf("%-40s:%10s\n", "Number of spams", "n/a");
}
print $html_tags{'endtag'}.$html_tags{'b'};
print $html_tags{'starttag'}.$html_tags{'br'};
print $html_tags{'starttag'}.$html_tags{'b'};
if ($nb_spam+$nb_clean == 0)
{
$clean_percent = eval(100 * $basic_clean_nb / ($basic_spam_nb+$basic_clean_nb));
printf("%-40s:%10d (%6.2f%%)\n", "Number of clean messages", $basic_clean_nb,
$clean_percent);
}else{
printf("%-40s:%10s\n", "Number of clean messages", "n/a");
}
print $html_tags{'endtag'}.$html_tags{'b'};
print $html_tags{'starttag'}.$html_tags{'br'};
printf("%-40s:", "Average message analysis time");
print $html_tags{'starttag'}.$html_tags{'b'};
if ($basic_spam_nb + $basic_clean_nb > 0)
{
printf "%10.2f",eval(($spam_time + $clean_time)/($basic_spam_nb +
$basic_clean_nb));
}else{
print "n/a";
}
print $html_tags{'endtag'}.$html_tags{'b'};
print " seconds\n";
print $html_tags{'starttag'}.$html_tags{'br'};
printf("%-40s:", "Average spam analysis time");
print $html_tags{'starttag'}.$html_tags{'b'};
if ($basic_spam_nb>0)
{
printf "%10.2f",eval($spam_time/$basic_spam_nb);
}else{
print "n/a";
}
print $html_tags{'endtag'}.$html_tags{'b'};
print " seconds\n";
print $html_tags{'starttag'}.$html_tags{'br'};
printf("%-40s:", "Average clean message analysis time");
print $html_tags{'starttag'}.$html_tags{'b'};
if ($basic_clean_nb > 0)
{
printf "%10.2f",eval($clean_time/$basic_clean_nb);
}else{
print "n/a";
}
print $html_tags{'endtag'}.$html_tags{'b'};
print " seconds\n";
#
#Spam with multiple recipients count only as one in the average...
print $html_tags{'starttag'}.$html_tags{'br'};
printf("%-40s:", "Average message score");
print $html_tags{'starttag'}.$html_tags{'b'};
if ($basic_clean_nb+$basic_spam_nb > 0)
{
printf
"%10.2f",eval(($spam_score+$clean_score)/($basic_clean_nb+$basic_spam_nb));
}else{
print "n/a";
}
print $html_tags{'endtag'}.$html_tags{'b'}."\n";
#Spam with multiple recipients count only as one in the average...
print $html_tags{'starttag'}.$html_tags{'br'};
printf("%-40s:", "Average spam score");
print $html_tags{'starttag'}.$html_tags{'b'};
if ($basic_spam_nb > 0)
{
printf "%10.2f",eval($spam_score/$basic_spam_nb);
}else{
print "n/a";
}
print $html_tags{'endtag'}.$html_tags{'b'}."\n";
#Spam with multiple recipients count only as one in the average...
print $html_tags{'starttag'}.$html_tags{'br'};
printf("%-40s:", "Average clean message score");
print $html_tags{'starttag'}.$html_tags{'b'};
if ($basic_clean_nb)
{
printf "%10.2f",eval($clean_score/$basic_clean_nb);
}else{
print "n/a";
}
print $html_tags{'endtag'}.$html_tags{'b'}."\n";
my $unit = "bytes";
($spam_volume,$unit)=unify($spam_volume,$unit);
print $html_tags{'starttag'}.$html_tags{'br'};
printf("%-40s:", "Total spam volume");
print $html_tags{'starttag'}.$html_tags{'b'};
printf "%10d",$spam_volume; print " ";
print $html_tags{'endtag'}.$html_tags{'b'}.$unit."\n";
$unit = "bytes";
($clean_volume,$unit)=unify($clean_volume,$unit);
print $html_tags{'starttag'}.$html_tags{'br'};
printf("%-40s:", "Total clean volume");
print $html_tags{'starttag'}.$html_tags{'b'};
printf "%10d",$clean_volume; print " ";
print $html_tags{'endtag'}.$html_tags{'b'}.$unit."\n";
}
#Top spammed addresses
if ($number)
{
print $html_tags{'starttag'}.$html_tags{'br'};
print "Recipients with highest number of spams : (top $number)\n";
foreach my $key (sort {$b <=> $a}keys %stats)
{
$number <= 0 and last;
print $key." spams : \n";
foreach my $email ( @{$stats{$key}})
{
print "\t".$email."\n";
$number--;
}
}
}
if (($correct_lines == 0) or (($incorrect_lines / $correct_lines) > 0.1))
{
print $html_tags{'starttag'}.$html_tags{'br'};
print $html_tags{'starttag'}.$html_tags{'br'};
print "INFO: It seems at least one input file contains other things that
{exim/postfix} or spamd lines!\n";
}
print $html_tags{'endtag'}.$html_tags{'body'};
print $html_tags{'endtag'}.$html_tags{'html'};