Thanks for the reply, but I think that you are referring to autolearn_points. As computed in PerMsgStatus.pm and is used in AutoLearningThreshold.pm. They are computed in the same function but they are not the same. Notice that in the get_autolearn_points autolearn_points is $score where learned points is $self->{learned_points} += $self->{conf}->{scoreset}->[$orig_scoreset]->{$test}; which is inside a loop and a conditional. I am not very familiar with perl and was kind of lost in the syntactics of the for and the if, but I assume that += means the same as in say c/c++ so this is some kind of cumulative sum of something. On one run of sa-learn in debug mode I got the following numbers back:
[28135] dbg: learn: auto-learn: currently using scoreset 3, recomputing score based on scoreset 1 [28135] dbg: learn: auto-learn: message score: 10.955, computed score for autolearn: 12.011 [28135] dbg: learn: auto-learn? ham=12, spam=1, body-points=0, head-points=10.813, learned-points=-2.599 so it is definitely not the same score, but what is it? here's a snippet of AutoLearnThreshold.pm sub autolearn_discriminator { my ($self, $params) = @_; my $scan = $params->{permsgstatus}; my $conf = $scan->{conf}; # Figure out min/max for autolearning. # Default to specified auto_learn_threshold settings my $min = $conf->{bayes_auto_learn_threshold_nonspam}; my $max = $conf->{bayes_auto_learn_threshold_spam}; # Find out what score we should consider this message to have ... my $score = $scan->get_autolearn_points(); my $body_only_points = $scan->get_body_only_points(); my $head_only_points = $scan->get_head_only_points(); my $learned_points = $scan->get_learned_points(); dbg("learn: auto-learn? ham=$min, spam=$max, ". "body-points=".$body_only_points.", ". "head-points=".$head_only_points.", ". "learned-points=".$learned_points); my $isspam; if ($score < $min) { $isspam = 0; } elsif ($score >= $max) { $isspam = 1; } else { dbg("learn: auto-learn? no: inside auto-learn thresholds, not considered ham or spam"); return; } my $learner_said_ham_points = -1.0; my $learner_said_spam_points = 1.0; if ($isspam) { my $required_body_points = 3; my $required_head_points = 3; if ($body_only_points < $required_body_points) { dbg("learn: auto-learn? no: scored as spam but too few body points (". $body_only_points." < ".$required_body_points.")"); return; } if ($head_only_points < $required_head_points) { dbg("learn: auto-learn? no: scored as spam but too few head points (". $head_only_points." < ".$required_head_points.")"); return; } if ($learned_points < $learner_said_ham_points) { dbg("learn: auto-learn? no: scored as spam but learner indicated ham (". $learned_points." < ".$learner_said_ham_points.")"); return; } if (!$scan->is_spam()) { dbg("learn: auto-learn? no: scored as ham but autolearn wanted spam"); return; } } else { if ($learned_points > $learner_said_spam_points) { dbg("learn: auto-learn? no: scored as ham but learner indicated spam (". $learned_points." > ".$learner_said_spam_points.")"); return; } if ($scan->is_spam()) { dbg("learn: auto-learn? no: scored as spam but autolearn wanted ham"); return; } } dbg("learn: auto-learn? yes, ".($isspam?"spam ($score > $max)":"ham ($score < $min)")); return $isspam; } **************************** here's a snippet of PerMsgStatus.pm sub _get_autolearn_points { my ($self) = @_; return if (exists $self->{autolearn_points}); # ensure it only gets computed once, even if we return early $self->{autolearn_points} = 0; # This function needs to use use sum($score[scoreset % 2]) not just {score}. # otherwise we shift what we autolearn on and it gets really wierd. - tvd my $orig_scoreset = $self->{conf}->get_score_set(); my $new_scoreset = $orig_scoreset; my $scores = $self->{conf}->{scores}; if (($orig_scoreset & 2) == 0) { # we don't need to recompute dbg("learn: auto-learn: currently using scoreset $orig_scoreset"); } else { $new_scoreset = $orig_scoreset & ~2; dbg("learn: auto-learn: currently using scoreset $orig_scoreset, recomputing score based on scoreset $new_scoreset"); $scores = $self->{conf}->{scoreset}->[$new_scoreset]; } my $tflags = $self->{conf}->{tflags}; my $points = 0; # Just in case this function is called multiple times, clear out the # previous calculated values $self->{learned_points} = 0; $self->{body_only_points} = 0; $self->{head_only_points} = 0; foreach my $test (@{$self->{test_names_hit}}) { # According to the documentation, noautolearn, userconf, and learn # rules are ignored for autolearning. if (exists $tflags->{$test}) { next if $tflags->{$test} =~ /\bnoautolearn\b/; next if $tflags->{$test} =~ /\buserconf\b/; # Keep track of the learn points for an additional autolearn check. # Use the original scoreset since it'll be 0 in sets 0 and 1. if ($tflags->{$test} =~ /\blearn\b/) { # we're guaranteed that the score will be defined $self->{learned_points} += $self->{conf}->{scoreset}->[$orig_scoreset]->{$test}; next; } } # ignore tests with 0 score in this scoreset next if ($scores->{$test} == 0); # Go ahead and add points to the proper locations if (!$self->{conf}->maybe_header_only ($test)) { $self->{body_only_points} += $scores->{$test}; } if (!$self->{conf}->maybe_body_only ($test)) { $self->{head_only_points} += $scores->{$test}; } $points += $scores->{$test}; } # Figure out the final value we'll use for autolearning $points = (sprintf "%0.3f", $points) + 0; dbg("learn: auto-learn: message score: ".$self->{score}.", computed score for autolearn: $points"); $self->{autolearn_points} = $points; } -- View this message in context: http://www.nabble.com/auto-learn-learned_points-tf3353775.html#a9335682 Sent from the SpamAssassin - Users mailing list archive at Nabble.com.