Wietse Venema:
> The long-term solution is to implement an asynchronous atrace_flush()
> operation, similar to the asynchronous abounce_flush() and
> adefer_flush() operations (another solution, moving work from the
> qmgr to the bounce/defer/flush daemons, would require a structural
> redesign and that is not an option at this time).
>
> This long-term solution would involve:
>
> - Cloning a few lines of existing code in src/global/abounce.[hc].
>
> - Replacing the trace_flush() call in src/qmgr/qmgr_active.c by
> atrace_flush() and a call-back function, similar to the way that
> the abounce_flush() and adefer_flush() operations are used.
>
> That is under a day of work, but I have three deadlines early May.
>
> However, if code can't be changed, then configuration must be
> changed: announce DSN support more selectively.
Actually, it took much less time, because this was indeed a trivial
job of cloning a few lines of already-existing code. There are no
changes to existing protocols or APIs.
In the attached patch, all work is done by less than half a dozen
new lines of code: three lines in abounce.c, and four lines in each
of the two identical qmgr_active_done.c files. The remainder of the
patch is spent on comments and declarations.
You may want to reconsider whether qmgr can/can't be changed because
this change is really very localized. Even if this part of qmgr is
customized (which seems unlikely) this should be easy to paste in
by hand.
Wietse
[This patch applies cleanly to Postfix 2.3 and later]
20110410
Performance: a high load of DSN success notification requests
could stall the queue manager. Solution: make the trace
client asynchronous, just like the bounce and defer clients.
Problem reported by Eduardo M. Stelmaszczyk of terra.com.br,
a very good Postfix customer. Files: global/abounce.[hc],
*qmgr/qmgr_active.c (the qmgr_active.c files are identical).
diff -cr /var/tmp/postfix-old/src/global/abounce.c ./src/global/abounce.c
*** /var/tmp/postfix-old/src/global/abounce.c Tue May 15 16:12:49 2007
--- ./src/global/abounce.c Wed Apr 20 19:43:51 2011
***************
*** 67,75 ****
/* int dsn_ret;
/* void (*callback)(int status, char *context);
/* char *context;
/* DESCRIPTION
/* This module implements an asynchronous interface to the
! /* bounce/defer service for submitting sender notifications
/* without waiting for completion of the request.
/*
/* abounce_flush() bounces the specified message to
--- 67,87 ----
/* int dsn_ret;
/* void (*callback)(int status, char *context);
/* char *context;
+ /*
+ /* void atrace_flush(flags, queue, id, encoding, sender,
+ /* dsn_envid, dsn_ret, callback, context)
+ /* int flags;
+ /* const char *queue;
+ /* const char *id;
+ /* const char *encoding;
+ /* const char *sender;
+ /* const char *dsn_envid;
+ /* int dsn_ret;
+ /* void (*callback)(int status, char *context);
+ /* char *context;
/* DESCRIPTION
/* This module implements an asynchronous interface to the
! /* bounce/defer/trace service for submitting sender notifications
/* without waiting for completion of the request.
/*
/* abounce_flush() bounces the specified message to
***************
*** 92,97 ****
--- 104,113 ----
/* the specified sender, including the defer log that was
/* built with defer_append().
/*
+ /* atrace_flush() returns the specified message to the specified
+ /* sender, including the message delivery record log that was
+ /* built with vtrace_append().
+ /*
/* Arguments:
/* .IP flags
/* The bitwise OR of zero or more of the following (specify
***************
*** 359,361 ****
--- 375,389 ----
flags, queue, id, encoding, sender, dsn_envid, dsn_ret,
callback, context);
}
+
+ /* atrace_flush - asynchronous trace flush */
+
+ void atrace_flush(int flags, const char *queue, const char *id,
+ const char *encoding, const char *sender,
+ const char *dsn_envid, int dsn_ret,
+ ABOUNCE_FN callback, char *context)
+ {
+ abounce_request(MAIL_CLASS_PRIVATE, var_trace_service, BOUNCE_CMD_TRACE,
+ flags, queue, id, encoding, sender, dsn_envid, dsn_ret,
+ callback, context);
+ }
diff -cr /var/tmp/postfix-old/src/global/abounce.h ./src/global/abounce.h
*** /var/tmp/postfix-old/src/global/abounce.h Tue May 17 13:36:13 2005
--- ./src/global/abounce.h Wed Apr 20 17:41:35 2011
***************
*** 24,29 ****
--- 24,30 ----
extern void abounce_flush(int, const char *, const char *, const char *,
const char *, const char *, int, ABOUNCE_FN, char *);
extern void adefer_flush(int, const char *, const char *, const char *, const
char *, const char *, int, ABOUNCE_FN, char *);
extern void adefer_warn(int, const char *, const char *, const char *, const
char *, const char *, int, ABOUNCE_FN, char *);
+ extern void atrace_flush(int, const char *, const char *, const char *, const
char *, const char *, int, ABOUNCE_FN, char *);
extern void abounce_flush_verp(int, const char *, const char *, const char *,
const char *, const char *, int, const char *, ABOUNCE_FN, char *);
extern void adefer_flush_verp(int, const char *, const char *, const char *,
const char *, const char *, int, const char *, ABOUNCE_FN, char *);
diff -cr /var/tmp/postfix-old/src/oqmgr/qmgr_active.c ./src/oqmgr/qmgr_active.c
*** /var/tmp/postfix-old/src/oqmgr/qmgr_active.c Wed Oct 3 20:19:21 2007
--- ./src/oqmgr/qmgr_active.c Wed Apr 20 19:39:54 2011
***************
*** 116,121 ****
--- 116,123 ----
*/
static void qmgr_active_done_2_bounce_flush(int, char *);
static void qmgr_active_done_2_generic(QMGR_MESSAGE *);
+ static void qmgr_active_done_25_trace_flush(int, char *);
+ static void qmgr_active_done_25_generic(QMGR_MESSAGE *);
static void qmgr_active_done_3_defer_flush(int, char *);
static void qmgr_active_done_3_defer_warn(int, char *);
static void qmgr_active_done_3_generic(QMGR_MESSAGE *);
***************
*** 336,345 ****
static void qmgr_active_done_2_generic(QMGR_MESSAGE *message)
{
- const char *myname = "qmgr_active_done_2_generic";
const char *path;
struct stat st;
- int status;
/*
* A delivery agent marks a queue file as corrupt by changing its
--- 338,345 ----
***************
*** 372,381 ****
}
/*
- * As a temporary implementation, synchronously inform the sender of
- * trace information. This will block for 10 seconds when the qmgr FIFO
- * is full.
- *
* XXX With multi-recipient mail, some recipients may have NOTIFY=SUCCESS
* and others not. Depending on what subset of recipients are delivered,
* a trace file may or may not be created. Even when the last partial
--- 372,377 ----
***************
*** 388,406 ****
*/
if ((message->tflags & (DEL_REQ_FLAG_USR_VRFY | DEL_REQ_FLAG_RECORD))
|| (message->rflags & QMGR_READ_FLAG_NOTIFY_SUCCESS)) {
! status = trace_flush(message->tflags,
! message->queue_name,
! message->queue_id,
! message->encoding,
! message->sender,
! message->dsn_envid,
! message->dsn_ret);
! if (status == 0 && message->tflags_offset)
! qmgr_message_kill_record(message, message->tflags_offset);
! message->flags |= status;
}
/*
* If we get to this point we have tried all recipients for this message.
* If the message is too old, try to bounce it.
*
--- 384,429 ----
*/
if ((message->tflags & (DEL_REQ_FLAG_USR_VRFY | DEL_REQ_FLAG_RECORD))
|| (message->rflags & QMGR_READ_FLAG_NOTIFY_SUCCESS)) {
! atrace_flush(message->tflags,
! message->queue_name,
! message->queue_id,
! message->encoding,
! message->sender,
! message->dsn_envid,
! message->dsn_ret,
! qmgr_active_done_25_trace_flush,
! (char *) message);
! return;
}
/*
+ * Asynchronous processing does not reach this point.
+ */
+ qmgr_active_done_25_generic(message);
+ }
+
+ /* qmgr_active_done_25_trace_flush - continue after atrace_flush() completion
*/
+
+ static void qmgr_active_done_25_trace_flush(int status, char *context)
+ {
+ QMGR_MESSAGE *message = (QMGR_MESSAGE *) context;
+
+ /*
+ * Process atrace_flush() status and continue processing.
+ */
+ if (status == 0 && message->tflags_offset)
+ qmgr_message_kill_record(message, message->tflags_offset);
+ message->flags |= status;
+ qmgr_active_done_25_generic(message);
+ }
+
+ /* qmgr_active_done_25_generic - continue processing */
+
+ static void qmgr_active_done_25_generic(QMGR_MESSAGE *message)
+ {
+ const char *myname = "qmgr_active_done_25_generic";
+
+ /*
* If we get to this point we have tried all recipients for this message.
* If the message is too old, try to bounce it.
*
diff -cr /var/tmp/postfix-old/src/qmgr/qmgr_active.c ./src/qmgr/qmgr_active.c
*** /var/tmp/postfix-old/src/qmgr/qmgr_active.c Sun Sep 23 08:24:15 2007
--- ./src/qmgr/qmgr_active.c Wed Apr 20 19:39:54 2011
***************
*** 116,121 ****
--- 116,123 ----
*/
static void qmgr_active_done_2_bounce_flush(int, char *);
static void qmgr_active_done_2_generic(QMGR_MESSAGE *);
+ static void qmgr_active_done_25_trace_flush(int, char *);
+ static void qmgr_active_done_25_generic(QMGR_MESSAGE *);
static void qmgr_active_done_3_defer_flush(int, char *);
static void qmgr_active_done_3_defer_warn(int, char *);
static void qmgr_active_done_3_generic(QMGR_MESSAGE *);
***************
*** 336,345 ****
static void qmgr_active_done_2_generic(QMGR_MESSAGE *message)
{
- const char *myname = "qmgr_active_done_2_generic";
const char *path;
struct stat st;
- int status;
/*
* A delivery agent marks a queue file as corrupt by changing its
--- 338,345 ----
***************
*** 372,381 ****
}
/*
- * As a temporary implementation, synchronously inform the sender of
- * trace information. This will block for 10 seconds when the qmgr FIFO
- * is full.
- *
* XXX With multi-recipient mail, some recipients may have NOTIFY=SUCCESS
* and others not. Depending on what subset of recipients are delivered,
* a trace file may or may not be created. Even when the last partial
--- 372,377 ----
***************
*** 388,406 ****
*/
if ((message->tflags & (DEL_REQ_FLAG_USR_VRFY | DEL_REQ_FLAG_RECORD))
|| (message->rflags & QMGR_READ_FLAG_NOTIFY_SUCCESS)) {
! status = trace_flush(message->tflags,
! message->queue_name,
! message->queue_id,
! message->encoding,
! message->sender,
! message->dsn_envid,
! message->dsn_ret);
! if (status == 0 && message->tflags_offset)
! qmgr_message_kill_record(message, message->tflags_offset);
! message->flags |= status;
}
/*
* If we get to this point we have tried all recipients for this message.
* If the message is too old, try to bounce it.
*
--- 384,429 ----
*/
if ((message->tflags & (DEL_REQ_FLAG_USR_VRFY | DEL_REQ_FLAG_RECORD))
|| (message->rflags & QMGR_READ_FLAG_NOTIFY_SUCCESS)) {
! atrace_flush(message->tflags,
! message->queue_name,
! message->queue_id,
! message->encoding,
! message->sender,
! message->dsn_envid,
! message->dsn_ret,
! qmgr_active_done_25_trace_flush,
! (char *) message);
! return;
}
/*
+ * Asynchronous processing does not reach this point.
+ */
+ qmgr_active_done_25_generic(message);
+ }
+
+ /* qmgr_active_done_25_trace_flush - continue after atrace_flush() completion
*/
+
+ static void qmgr_active_done_25_trace_flush(int status, char *context)
+ {
+ QMGR_MESSAGE *message = (QMGR_MESSAGE *) context;
+
+ /*
+ * Process atrace_flush() status and continue processing.
+ */
+ if (status == 0 && message->tflags_offset)
+ qmgr_message_kill_record(message, message->tflags_offset);
+ message->flags |= status;
+ qmgr_active_done_25_generic(message);
+ }
+
+ /* qmgr_active_done_25_generic - continue processing */
+
+ static void qmgr_active_done_25_generic(QMGR_MESSAGE *message)
+ {
+ const char *myname = "qmgr_active_done_25_generic";
+
+ /*
* If we get to this point we have tried all recipients for this message.
* If the message is too old, try to bounce it.
*