Hi,

I've just started using dbmail... I'm moving to osx and I like apple's mail app sooo I'm abandoning my much loved mh for the brave new world of imap et al. The reason I liked mh was that it was easy to grep (either with grep or with glimpse) through all of my email while still organizing things by directories. These days, though, I get so much spam that any grepping takes a significant amount of time. This is where I hoped dbmail would help, but now I'm not so sure since it seems difficult to scan the bodies of all my messages for some word or phrase via SQL... but that wasn't really the point of this email...

I'm in the process of importing all of my email (major pain), and the biggest pain was that the internal_date is set to the current time by dbmail-smtp which causes apple's mail app to say that everything was just delivered. I scanned the archive and found that others had this same problem. The general approach seemed to be to write a special script to either hack the internal_date after the fact or to write a special script/program to do the insertion. Neither solution was completely satisfactory, so I added a "-i" option to dbmail-smtp which will cause dbmail-smtp to pull the date out of the received header entries and then use that for the "internal_date". This seems to solve my problems (though I haven't yet imported all of my email). The change was straight forward, but I thought I'd share the changes in case someone wants to use them.

Darrell

PS: my use of dbmail is probably atypical, I'm running postgres & dbmail on a powerbook. It will be dedicated to my email. The email will be retrieved with fetchmail, which will feed the mail to mailagent, then mailagent will use spamassassin et al to sort and filter the mail, based on this process, mailagent will then insert the email into dbmail (directed to the appropriate mailbox) via dbmail-smtp.

diff -ru ../../orig/dbmail-2.0rc7/db.c ./db.c
--- ../../orig/dbmail-2.0rc7/db.c       Fri Jun 18 08:16:34 2004
+++ ./db.c      Tue Aug 10 14:05:32 2004
@@ -773,10 +773,10 @@
        return 1;
 }
 
-int db_insert_message(u64_t user_idnr,
-                     const char *mailbox,
+int db_insert_message(u64_t user_idnr, const char *mailbox,
                      int create_or_error_mailbox,
-                     const char *unique_id, u64_t * message_idnr)
+                     const char *unique_id, u64_t * message_idnr,
+                     timestring_t internal_time)
 {
        u64_t mailboxid;
        u64_t physmessage_id;
@@ -813,7 +813,9 @@
        }
 
        /* insert a new physmessage entry */
-       if (db_insert_physmessage(&physmessage_id) == -1) {
+       if ((internal_time && *internal_time ?
+            
db_insert_physmessage_with_internal_date(internal_time,&physmessage_id) :
+            db_insert_physmessage(&physmessage_id)) == -1) {
                trace(TRACE_ERROR, "%s,%s: error inserting physmessage",
                      __FILE__, __func__);
                return -1;
diff -ru ../../orig/dbmail-2.0rc7/db.h ./db.h
--- ../../orig/dbmail-2.0rc7/db.h       Thu Jun  3 05:22:05 2004
+++ ./db.h      Tue Aug 10 14:05:32 2004
@@ -558,7 +558,8 @@
 int db_insert_message(u64_t user_idnr,
                      const char *deliver_to,
                      int create_or_error_mailbox,
-                     const char *unique_id, u64_t * message_idnr);
+                     const char *unique_id, u64_t * message_idnr,
+                     timestring_t internal_time);
 
 #define CREATE_IF_MBOX_NOT_FOUND 1
 #define ERROR_IF_MBOX_NOT_FOUND -1
diff -ru ../../orig/dbmail-2.0rc7/lmtp.c ./lmtp.c
--- ../../orig/dbmail-2.0rc7/lmtp.c     Thu Jul  1 11:27:32 2004
+++ ./lmtp.c    Tue Aug 10 14:05:32 2004
@@ -706,7 +706,7 @@
                                                    headersize, headerrfcsize,
                                                    body_size, body_rfcsize,
                                                    &headerfields, &rcpt,
-                                                   &fromlist) == -1) {
+                                                   &fromlist,0) == -1) {
                                                ci_write((FILE *) stream,
                                                        "503 Message not 
received\r\n");
                                        } else {
diff -ru ../../orig/dbmail-2.0rc7/main.c ./main.c
--- ../../orig/dbmail-2.0rc7/main.c     Fri Jun 18 08:16:34 2004
+++ ./main.c    Tue Aug 10 14:05:32 2004
@@ -160,7 +160,7 @@
 int main(int argc, char *argv[])
 {
        int exitcode = 0;
-       int c, c_prev = 0, usage_error = 0;
+       int c, c_prev = 0, usage_error = 0, set_internal_date = 0;
        u64_t dummyidx = 0, dummysize = 0;
        char *whole_message = NULL;
        u64_t whole_message_size;
@@ -168,6 +168,7 @@
        u64_t body_size;
        u64_t body_rfcsize;
        char *header = NULL;
+       timestring_t header_time;
        
        openlog(PNAME, LOG_PID, LOG_MAIL);
 
@@ -175,13 +176,14 @@
        list_init(&dsnusers);
        list_init(&mimelist);
        list_init(&returnpath);
+       header_time[0] = '\0';
 
        /* Check for commandline options.
         * The initial '-' means that arguments which are not associated
         * with an immediately preceding option are return with option 
         * value '1'. We will use this to allow for multiple values to
         * follow after each of the supported options. */
-       while ((c = getopt(argc, argv, "-n::m:u:d:f:r:")) != EOF) {
+       while ((c = getopt(argc, argv, "-n:::m:u:d:f:i")) != EOF) {
                /* Received an n-th value following the last option,
                 * so recall the last known option to be used in the switch. */
                if (c == 1)
@@ -276,6 +278,12 @@
                        }
 
                        break;
+               case 'i':
+                       trace(TRACE_INFO,
+                             "main(): pulling internal_date from header");
+
+                       set_internal_date = 1;
+                       break;
                default:
                        usage_error = 1;
                        break;
@@ -399,6 +407,17 @@
                }
        }
 
+       /* Pull date from header and inject into 'internal_date'... */
+       if ( set_internal_date ) {
+               trace(TRACE_DEBUG, "main(): pulling internal_date from header" 
);
+               if (mail_header_date( header_time, &mimelist) != 0) {
+                       trace(TRACE_STOP,
+                             "main(): scanner found no header date, defaulting 
to current date");
+                       set_internal_date = 0;
+                       header_time[0] = '\0';
+               }
+       }
+
        /* If the MAILBOX delivery mode has been selected... */
        if (deliver_to_mailbox != NULL) {
                trace(TRACE_DEBUG, "main(): setting mailbox for all deliveries 
to [%s]",
@@ -420,7 +439,8 @@
        /* inserting messages into the database */
        if (insert_messages(header, body, headersize, headerrfcsize,
                            body_size, body_rfcsize,
-                           &mimelist, &dsnusers, &returnpath) == -1) {
+                           &mimelist, &dsnusers,
+                           &returnpath, header_time) == -1) {
                trace(TRACE_ERROR, "main(): insert_messages failed");
                /* Most likely a random failure... */
                exitcode = EX_TEMPFAIL;
diff -ru ../../orig/dbmail-2.0rc7/mime.c ./mime.c
--- ../../orig/dbmail-2.0rc7/mime.c     Wed Apr 14 09:20:31 2004
+++ ./mime.c    Tue Aug 10 14:05:39 2004
@@ -340,6 +340,217 @@
        *mr = NULL;
 }
 
+int mail_header_date(timestring_t header_time, struct list *mimelist)
+{
+       struct element *raw;
+       struct mime_record *mr;
+       int i;
+       char *ptr, *tmp, *dp, sp;
+       char day[3], month[3], year[5], hour[3], minute[3], second[3];
+       char coff[3];
+       char month_names[12][3] = { "Jan", "Feb", "Mar", "Apr", "May",
+                                   "Jun", "Jul", "Aug", "Sep", "Oct",
+                                   "Nov", "Dec" };
+       int offset=0, last_received=0, received_count=0;
+
+       if (!mimelist) {
+               trace(TRACE_ERROR,
+                     "mail_header_date(): received a NULL argument\n");
+               return -1;
+       }
+
+       trace(TRACE_DEBUG,
+             "mail_header_date(): mimelist currently has [%ld] nodes",
+             mimelist->total_nodes);
+
+       trace(TRACE_INFO, "mail_header_date(): mail date parser starting");
+
+       trace(TRACE_DEBUG, "mail_header_date(): total fields in header %ld",
+             mimelist->total_nodes);
+
+       trace(TRACE_DEBUG, "mail_header_date(): scanning for last received 
entry");
+
+       
+       for ( raw = list_getstart(mimelist); raw != NULL; raw = raw->nextnode ) 
{
+               mr = (struct mime_record *) raw->data;
+               if ((strcasecmp(mr->field, "received") == 0))
+                       ++last_received;
+       }
+
+       trace(TRACE_DEBUG, "mail_header_date(): there are %d received header 
entries");
+               
+       for ( raw = list_getstart(mimelist); raw != NULL; raw = raw->nextnode ) 
{
+               mr = (struct mime_record *) raw->data;
+               trace(TRACE_DEBUG, "mail_header_date(): scanning for received");
+               if ((strcasecmp(mr->field, "received") == 0)) {
+                       if ( ++received_count != last_received )  continue;
+                       trace(TRACE_DEBUG, "mail_header_date(): found last 
received");
+                       /* The received lines look something like:              
                          */
+                       /*   Received: from host.domain.com (actualhost.domain. 
[192.168.100.2])          */
+                       /*           by localhost.domain.com 
(8.9.3/domain/CV-2.0) with ESMTP id VAA14687 */
+                       /*           for <[EMAIL PROTECTED]>; Sun, 1 Apr 2001 
21:34:55 -0400   */
+                       /* so first we skip to the ';'...                       
                          */
+                       ptr = strstr(mr->value, ";");
+                       if (ptr != NULL) {
+                               /* found an ;! */
+                               tmp = ptr;
+                               ++tmp;
+                               /* zero out our buffers... */
+                               day[0] = month[0] = year[0] = hour[0] = 
minute[0] = second[0] = '\0';
+                               /* first skip white space */
+                               while (isspace(*tmp)) ++tmp;
+                               /* next skip any day string, e.g. 'Sun' */
+                               while (isalpha(*tmp)) ++tmp;
+                               while (isspace(*tmp)) ++tmp;
+                               if ( *tmp == ',' ) ++tmp;
+                               while (isspace(*tmp)) ++tmp;
+
+                               /* fill day... */
+                               dp = day;
+                               if ( isdigit(*tmp) ) *dp++ = *tmp++;
+                               if ( isdigit(*tmp) ) *dp++ = *tmp++;
+                               *dp = '\0';
+                               trace(TRACE_DEBUG,
+                                     "mail_header_date(): found day (%s)", day 
);
+
+                               month[0] = '\0';
+                               /* fill month... */
+                               while (isspace(*tmp)) ++tmp;
+                               if (isalpha(*tmp)) {
+                                       for (i=0; i<12; ++i) {
+                                               if 
((strncasecmp(tmp,month_names[i],3)) == 0) {
+                                                       sprintf(month,"%d",i+1);
+                                                       break;
+                                               }
+                                       }
+                               }
+                               tmp += 3;
+                               trace(TRACE_DEBUG,
+                                     "mail_header_date(): found month (%s)", 
month );
+                               while (isspace(*tmp)) ++tmp;
+
+                               year[0] = '\0';
+                               /* fill year... */
+                               if (isdigit(tmp[0]) && isdigit(tmp[1]) &&
+                                   isdigit(tmp[2]) && isdigit(tmp[3])) {
+                                       year[0] = *tmp++;
+                                       year[1] = *tmp++;
+                                       year[2] = *tmp++;
+                                       year[3] = *tmp++;
+                                       year[4] = '\0';
+                               }
+                               trace(TRACE_DEBUG,
+                                     "mail_header_date(): found year (%s)", 
year );
+                               while (isspace(*tmp)) ++tmp;
+
+                               dp = hour;
+                               minute[0] = '\0';
+                               second[0] = '\0';
+                               /* fill hours... */
+                               if ( isdigit(*tmp) ) *dp++ = *tmp++;
+                               if ( isdigit(*tmp) ) *dp++ = *tmp++;
+                               *dp = '\0';
+                               trace(TRACE_DEBUG,
+                                     "mail_header_date(): found hours (%s)", 
hour );
+                               if ( *tmp == ':' ) {
+                                       ++tmp;
+                                       dp = minute;
+                                       /* fill minutes... */
+                                       if ( isdigit(*tmp) ) *dp++ = *tmp++;
+                                       if ( isdigit(*tmp) ) *dp++ = *tmp++;
+                                       *dp = '\0';
+                                       trace(TRACE_DEBUG,
+                                             "mail_header_date(): found 
minutes (%s)", minute );
+                                       if ( *tmp == ':' ) {
+                                               ++tmp;
+                                               dp = second;
+                                               /* fill seconds... */
+                                               if ( isdigit(*tmp) ) *dp++ = 
*tmp++;
+                                               if ( isdigit(*tmp) ) *dp++ = 
*tmp++;
+                                               *dp = '\0';
+                                               trace(TRACE_DEBUG,
+                                                     "mail_header_date(): 
found seconds (%s)", second );
+                                               while (isspace(*tmp)) ++tmp;
+                                               if ( *tmp == '+' || *tmp == '-' 
) {
+                                                       sp = *tmp++;
+                                                       if (isdigit(tmp[0]) && 
isdigit(tmp[1])) {
+                                                               coff[0] = 
*tmp++;
+                                                               coff[1] = 
*tmp++;
+                                                               coff[2] = '\0';
+                                                               /* we're going 
TO GMT... */
+                                                               offset = 
atoi(coff) * (sp == '-' ? 1 : -1);
+                                                               
trace(TRACE_DEBUG,
+                                                                     
"mail_header_date(): found TZ offset (%d)", offset );
+
+                                                               if ( offset != 
0 ) {
+                                                                       offset 
+= atoi(hour);
+                                                                       if ( 
offset > 0 ) {
+                                                                               
sprintf( coff, "%d", offset % 24 );
+                                                                               
hour[0] = coff[0];
+                                                                               
hour[1] = coff[1];
+                                                                               
hour[2] = coff[2];
+                                                                               
trace(TRACE_DEBUG,
+                                                                               
      "mail_header_date(): adjusted hour to %s", hour );
+                                                                       }
+                                                               }
+                                                       }
+                                               }
+                                       } else {
+                                               minute[0] = '\0';
+                                       }
+                               } else {
+                                       hour[0] = '\0';
+                               }
+                       }
+                       /*** Don't 'break;' here because we want the last ***/
+                       /*** 'Recieved:' entry added to the header...     ***/
+                       /*** it's the least likely to be forged, i.e.     ***/
+                       /*** closest to us...                             ***/
+               }
+       }
+
+       /* Clean up day, month, hour, minute & second so that they are two 
digits each... */
+       if ( *day && *month && *year && *hour && *minute && *second ) {
+               if ( day[1] == '\0' ) {
+                       day[2] = '\0';
+                       day[1] = day[0];
+                       day[0] = '0';
+               }
+               if ( month[1] == '\0' ) {
+                       month[2] = '\0';
+                       month[1] = month[0];
+                       month[0] = '0';
+               }
+               if ( hour[1] == '\0' ) {
+                       hour[2] = '\0';
+                       hour[1] = hour[0];
+                       hour[0] = '0';
+               }
+               if ( minute[1] == '\0' ) {
+                       minute[2] = '\0';
+                       minute[1] = minute[0];
+                       minute[0] = '0';
+               }
+               if ( second[1] == '\0' ) {
+                       second[2] = '\0';
+                       second[1] = second[0];
+                       second[0] = '0';
+               }
+       }
+
+       sprintf(header_time, "%s-%s-%s %s:%s:%s", year, month, day, hour, 
minute, second);
+       trace(TRACE_DEBUG, "mail_adr_list(): found date (%s)", header_time);
+
+       trace(TRACE_INFO, "mail_header_date(): mail date parser finished");
+
+       if ( *day == '\0' || *month == '\0' || *year == '\0' ||
+            *hour == '\0' || *minute == '\0' || *second == '\0' ) {
+               /* failed to find a valid date */
+               return -1;
+       }
+
+       return 0;
+}
 
 int mail_adr_list(char *scan_for_field, struct list *targetlist,
                  struct list *mimelist)
diff -ru ../../orig/dbmail-2.0rc7/mime.h ./mime.h
--- ../../orig/dbmail-2.0rc7/mime.h     Wed Apr 14 09:20:31 2004
+++ ./mime.h    Tue Aug 10 14:05:32 2004
@@ -45,6 +45,7 @@
                    struct mime_record **mr);
 int mail_adr_list(char *scan_for_field, struct list *targetlist,
                  struct list *mimelist);
+int mail_header_date(timestring_t header_time, struct list *mimelist);
 int mime_readheader(const char *datablock, u64_t * blkidx, 
                    struct list *mimelist, u64_t * headersize);
 
diff -ru ../../orig/dbmail-2.0rc7/pipe.c ./pipe.c
--- ../../orig/dbmail-2.0rc7/pipe.c     Thu Jun  3 08:41:54 2004
+++ ./pipe.c    Tue Aug 10 14:05:32 2004
@@ -379,7 +379,8 @@
 static int store_message_temp(const char *header, const char *body, 
                              u64_t headersize, u64_t headerrfcsize,
                              u64_t bodysize, u64_t bodyrfcsize,
-                             /[EMAIL PROTECTED]@*/ u64_t * temp_message_idnr)
+                             /[EMAIL PROTECTED]@*/ u64_t * temp_message_idnr,
+                             timestring_t internal_time)
 {
        int result;
        u64_t user_idnr;
@@ -405,7 +406,7 @@
        /* create a message record */
        switch (db_insert_message(user_idnr, DBMAIL_TEMPMBOX,
                                  CREATE_IF_MBOX_NOT_FOUND, unique_id,
-                                 &msgidnr)) {
+                                 &msgidnr, internal_time)) {
        case -1:
                trace(TRACE_ERROR,
                      "store_message_temp(): returned -1, aborting");
@@ -515,8 +516,8 @@
  */
 int insert_messages(const char *header, const char* body, u64_t headersize,
                    u64_t headerrfcsize, u64_t bodysize, u64_t bodyrfcsize,
-                   struct list *headerfields,
-                   struct list *dsnusers, struct list *returnpath)
+                   struct list *headerfields, struct list *dsnusers,
+                   struct list *returnpath, timestring_t internal_time )
 {
        struct element *element, *ret_path;
        u64_t msgsize, rfcsize, tmpmsgidnr;
@@ -527,7 +528,7 @@
        /* Read in the rest of the stream and store it into a temporary message 
*/
        switch (store_message_temp
                (header, body, headersize, headerrfcsize, 
-                bodysize, bodyrfcsize, &tmpmsgidnr)) {
+                bodysize, bodyrfcsize, &tmpmsgidnr, internal_time)) {
        case -1:
                /* Major trouble. Bail out immediately. */
                trace(TRACE_ERROR,
diff -ru ../../orig/dbmail-2.0rc7/pipe.h ./pipe.h
--- ../../orig/dbmail-2.0rc7/pipe.h     Fri May 14 09:48:56 2004
+++ ./pipe.h    Tue Aug 10 14:05:32 2004
@@ -48,7 +48,7 @@
 int insert_messages(const char *header, const char *body, u64_t headersize, 
                    u64_t headerrfcsize, u64_t bodysize, u64_t bodyrfcsize,
                    struct list *headerfields, struct list *dsnusers,
-                   struct list *returnpath);
+                   struct list *returnpath, timestring_t internal_time );
 
 /**
  * \brief discards all input coming from instream

Reply via email to