Eelco, Thanks, thats a lot faster :)
Conversion started @ Sun Jul 28 17:24:37 2002 Conversion finished @ Sun Jul 28 17:28:20 2002 I had to edit the file a little bit so that it inputs files into their respective mailboxes under one user. Also I had to edit: *mbox_delimiter_pattern = "^From .* "; to *mbox_delimiter_pattern = "^From .* "; (one less space) However this seems to confuse imapd I think, because the imported folders where the above actually made a difference now die on: Jul 28 17:53:45 java dbmail/imap4[18953]: imap_process(): Executing command fetch... Jul 28 17:53:48 java dbmail/imap4[18953]: mime_readheader(): no valid mime headers found Jul 28 17:53:48 java dbmail/imap4[18953]: db_fetch_headers(): error fetching message, ID: 7518 Jul 28 17:53:48 java dbmail/imap4[18953]: db_fetch_headers(): got error at level 0 Jul 28 17:53:48 java dbmail/imap4[18953]: Received SIGSEGV Any ideas on this ? maybe I should just change the delimiters in the mailbox ? On Sat, Jul 27, 2002 at 11:15:26PM +0200, Eelco van Beek - IC&S wrote: > You should take a look at the uni-one converter. It was especially > written for a company called uni-hone here in the netherlands to convert > 6,5 Gbyte of mboxes. All boxes were converted in 51 minutes. > >In preperation of migrating our office setup to dbmail, I've decided > >to convert my personal mailsetup first. I have about 165Mb of mboxes > >I want to convert to dbmail with the mbox2dbmail tool. For convenience > >I edited mbox2dbmail so that I can give it a mailbox as second argument. > >However the mboximport seems to be a bit slow, it takes about half an > >hour with 5mb/240 mail mbox. Can anyone shine some light at this ? > >Used hardware is a Via C3/800 (about the same celeron) with 128 RAM, > >all actions are done on localhost. Regards, -- Frido
/* * this program traverses a directory tree and executes * dbmail conversion on each file.A * * slightly edited to parse this kind of directory * * /home/user/mail/box * /home/user/mail/lists/mailinglist * * create mailbox for each mboxfile (box, lists/mailinglist) * use constant for user * * did not change printf statements, maybe confusing */ #include <stdio.h> #include <string.h> #include <stdlib.h> #include <sys/types.h> #include <dirent.h> #include <time.h> #include <unistd.h> #include "db.h" #include "auth.h" #include "dbmailtypes.h" #include "debug.h" #include <regex.h> #define MAX_LINESIZE 1024 #define UID_SIZE 70 const char *mbox_delimiter_pattern = "^From .* "; char blk[READ_BLOCK_SIZE + MAX_LINESIZE + 1]; /* syslog */ #define PNAME "dbmail/uni-one-convertor" char *getusername (char *path); int traverse (char *path); int process_mboxfile(char *file, u64_t userid); int main (int argc, char* argv[]) { time_t start; time_t stop; int result; if (argc < 2) { printf ("Error, traverse need a directory as argument\n"); return -1; } openlog(PNAME, LOG_PID, LOG_MAIL); /* open connection to syslog */ configure_debug(TRACE_ERROR, 1, 0); /* open dbase connections */ if (db_connect() != 0 || auth_connect() != 0) { printf("Error opening dbase connections\n"); return -1; } time (&start); /* mark the starting time */ result = traverse (argv[1]); time (&stop); /* mark the ending time */ printf ("Conversion started @ %s", ctime(&start)); printf ("Conversion finished @ %s", ctime(&stop)); return result; } char *getusername (char *path) { int i; char *tmp; i = strlen (path); tmp = path+i; while ( (tmp!=path) && (*tmp!='/')) tmp--; return tmp+1; } int traverse (char *path) { char newpath [1024]; char *username; struct dirent **namelist; int n; u64_t userid; n = scandir (path, &namelist, 0, alphasort); if (n < 0) { printf ("file %s\n",path); username = getusername(path); printf ("username %s\n", username); printf("creating user..."); userid = auth_user_exists("frido"); if (userid != -1 && userid != 0) { printf("Ok id [%llu]\n", userid); printf("converting mailbox..."); fflush(stdout); n = process_mboxfile(path, userid); if (n != 0) printf("Warning: error converting mailbox\n"); else printf ("done :)\n"); } else { printf("user already exists. Skipping\n"); } } else { while (n--) { if ((strcmp(namelist[n]->d_name,"..")!=0) && (strcmp(namelist[n]->d_name,".")!=0)) { sprintf (newpath,"%s/%s",path, namelist[n]->d_name); traverse (newpath); } free (namelist[n]); } free(namelist); } return 0; } int process_mboxfile(char *file, u64_t userid) { regex_t preg; int result; FILE *infile; int in_msg, header_passed; char newunique[UID_SIZE]; unsigned cnt,len,newlines; u64_t msgid=0, size; char saved; char *a, *b; int len2; // need to make a proper mailboxname from path // no idea how to do this properly, never coded c before :) /* does the actual searching */ if ((a = strstr(file, "./") ) != 0) { /* blindly chop out the unwanted string */ /* don't forget the null! */ len2 = strlen("./"); b = a+len2; memcpy(a, b, strlen(b)+1); } db_createmailbox(file, userid); if ((result = regcomp(&preg, mbox_delimiter_pattern, REG_NOSUB)) != 0) { trace(TRACE_ERROR,"Regex compilation failed."); return -1; } if ( (infile = fopen(file, "r")) == 0) { trace(TRACE_ERROR,"Could not open file [%s]", infile); return -1; } in_msg = 0; cnt = 0; size = 0; newlines = 0; while (!feof(infile) && !ferror(infile)) { if (fgets(&blk[cnt], MAX_LINESIZE, infile) == 0) break; /* check if this is an mbox delimiter */ if (regexec(&preg, &blk[cnt], 0, NULL, 0) == 0) { if (!in_msg) in_msg = 1; /* ok start of a new msg */ else { /* update & end message */ db_insert_message_block(blk, cnt, msgid); snprintf(newunique, UID_SIZE, "%lluA%lu", userid, time(NULL)); db_update_message(msgid, newunique, size+cnt, size+cnt+newlines); trace(TRACE_ERROR, "message [%llu] inserted, [%u] bytes", msgid, size+cnt); } /* start new message */ msgid = db_insert_message(userid, file, 0); header_passed = 0; cnt = 0; size = 0; newlines = 0; } else { newlines++; if (header_passed == 0) { /* we're still reading the header */ len = strlen(&blk[cnt]); if (strcmp(&blk[cnt], "\n") == 0) { db_insert_message_block(blk, cnt+len, msgid); header_passed = 1; size += (cnt+len); cnt = 0; } else cnt += len; } else { /* this is body data */ len = strlen(&blk[cnt]); cnt += len; if (cnt >= READ_BLOCK_SIZE) { /* write block */ saved = blk[READ_BLOCK_SIZE]; blk[READ_BLOCK_SIZE] = '\0'; db_insert_message_block(blk, READ_BLOCK_SIZE, msgid); blk[READ_BLOCK_SIZE] = saved; memmove(blk, &blk[READ_BLOCK_SIZE], cnt - (READ_BLOCK_SIZE)); size += READ_BLOCK_SIZE; cnt -= READ_BLOCK_SIZE; } } } } /* update & end message */ if (msgid > 0) { db_insert_message_block(blk, cnt, msgid); snprintf(newunique, UID_SIZE, "%lluA%lu", userid, time(NULL)); db_update_message(msgid, newunique, size+cnt, size+cnt+newlines); trace(TRACE_ERROR, "message [%llu] inserted, [%u] bytes", msgid, size+cnt); } fclose(infile); return 0; }