On Jan 3, 2012, at 9:50 AM, Guillaume Lelarge wrote:

>> Yeah, I’m a committer on check_postgres.pl, so the other check_postgres.pl 
>> folks think it’s not a good place for it for some reason I can’t imagine 
>> right now, you can expect that.
>> 
> 
> Oh, you too? funny :)
> 
> Anyway, I don't recall any thread on that issue on the check_postgres.pl
> list.

Here is the implementation I’m working on. Comments? Does the query look right?

sub check_pgagent_jobs {
    ## Check for failed pgAgent jobs.
    ## Supports: Nagios
    ## Critical and warning are intervals.
    ## Example: --critical="1 hour"
    ## Example: --warning="2 hours"

    my ($warning, $critical) = validate_range({ type => 'time' });

    # Determine critcal warning columns.
    my $is_crit = $critical
        ? "CASE WHEN NOW() - (jlog.jlgstart + jlog.jlgduration) < 
'$critical'::interval THEN 1 ELSE 0 END"
        : 'false';

    # Determine WHERE clause
    my $where = do {
        if ($critical && $warning) {
            "GREATEST('$critical}'::interval, '$opt{warning}'::interval)";
        } else {
            my $val = $critical || $warning or ndie msg('range-noopt-orboth');
            "'$val'::interval";
        }
    };

    $SQL = qq{
        SELECT jlog.jlgid
             , job.jobname
             , step.jstname
             , slog.jslresult
             , slog.jsloutput
             , $is_crit AS critical
          FROM pgagent.pga_job job
          JOIN pgagent.pga_joblog     jlog ON job.jobid  = jlog.jlgjobid
          JOIN pgagent.pga_jobstep    step ON job.jobid  = step.jstjobid
          JOIN pgagent.pga_jobsteplog slog ON jlog.jlgid = slog.jsljlgid AND 
step.jstid = slog.jsljstid
         WHERE slog.jslresult <> 0
           AND NOW() - (jlog.jlgstart + jlog.jlgduration) < $where;
    };

    my $info = run_command($SQL);

    for $db (@{$info->{db}}) {
        my @rows = @{ $db->{slurp} } or do {
            add_ok msg('pgagent-jobs-ok');
            next;
        };

        if ($rows[0]{critical} !~ /^[01]$/) {
            add_unknown msg('invalid-query', $db->{slurp});
            next;
        }

        my ($is_crit, @msg);
        my $log_id = -1;
        for my $step (@rows) {
            my $output = $step->{jsloutput} || '(NO OUTPUT)';
            push @msg => "$step->{jslresult} $step->{jobname}/$step->{jstname}: 
$output";
            $is_crit ||= $step->{critical};
        }

        (my $msg = join '; ' => @msg) =~ s{\r?\n}{ }g;
        if ($is_crit) {
            add_critical $msg;
        } else {
            add_warning $msg;
        }
    }

    return;
}

Thanks,

David

Reply via email to