Eelco,

Thanks, thats a lot faster :)

Conversion started @  Sun Jul 28 17:24:37 2002
Conversion finished @ Sun Jul 28 17:28:20 2002

I had to edit the file a little bit so that it inputs files 
into their respective mailboxes under one user. Also I had
to edit:

*mbox_delimiter_pattern = "^From .*  ";
to
*mbox_delimiter_pattern = "^From .* "; (one less space)

However this seems to confuse imapd I think, because the imported
folders where the above actually made a difference now die on:

Jul 28 17:53:45 java dbmail/imap4[18953]: imap_process(): Executing command 
fetch... 
Jul 28 17:53:48 java dbmail/imap4[18953]: mime_readheader(): no valid mime 
headers found 
Jul 28 17:53:48 java dbmail/imap4[18953]: db_fetch_headers(): error fetching 
message, ID: 7518 
Jul 28 17:53:48 java dbmail/imap4[18953]: db_fetch_headers(): got error at 
level 0 
Jul 28 17:53:48 java dbmail/imap4[18953]: Received SIGSEGV 

Any ideas on this ? maybe I should just change the delimiters in the mailbox ?

On Sat, Jul 27, 2002 at 11:15:26PM +0200, Eelco van Beek - IC&S wrote:

> You should take a look at the uni-one converter. It was especially 
> written for a company called uni-hone here in the netherlands to convert 
> 6,5 Gbyte of mboxes. All boxes were converted in 51 minutes.

> >In preperation of migrating our office setup to dbmail, I've decided
> >to convert my personal mailsetup first. I have about 165Mb of mboxes
> >I want to convert to dbmail with the mbox2dbmail tool. For convenience
> >I edited mbox2dbmail so that I can give it a mailbox as second argument.
> >However the mboximport seems to be a bit slow, it takes about half an
> >hour with 5mb/240 mail mbox. Can anyone shine some light at this ?
> >Used hardware is a Via C3/800 (about the same celeron) with 128 RAM,
> >all actions are done on localhost.


Regards,


                -- Frido
/* 
 * this program traverses a directory tree and executes
 * dbmail conversion on each file.A
 *
 * slightly edited to parse this kind of directory
 *
 * /home/user/mail/box
 * /home/user/mail/lists/mailinglist
 * 
 * create mailbox for each mboxfile (box, lists/mailinglist)
 * use constant for user
 *
 * did not change printf statements, maybe confusing
 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <dirent.h>
#include <time.h>
#include <unistd.h>
#include "db.h"
#include "auth.h"
#include "dbmailtypes.h"
#include "debug.h"
#include <regex.h>

#define MAX_LINESIZE 1024
#define UID_SIZE 70

const char *mbox_delimiter_pattern = "^From .* ";
char blk[READ_BLOCK_SIZE + MAX_LINESIZE + 1];

/* syslog */
#define PNAME "dbmail/uni-one-convertor"

char *getusername (char *path);
int traverse (char *path);
int process_mboxfile(char *file, u64_t userid);



int main (int argc, char* argv[])
{
  time_t start;
  time_t stop;
  int result;

  if (argc < 2)
    {
      printf ("Error, traverse need a directory as argument\n");
      return -1;
    }

  openlog(PNAME, LOG_PID, LOG_MAIL);   /* open connection to syslog */
  configure_debug(TRACE_ERROR, 1, 0);

  /* open dbase connections */
  if (db_connect() != 0 || auth_connect() != 0)
    {
      printf("Error opening dbase connections\n");
      return -1;
    }


  time (&start); /* mark the starting time */
  result = traverse (argv[1]);
  time (&stop); /* mark the ending time */

  printf ("Conversion started @  %s", ctime(&start));
  printf ("Conversion finished @ %s", ctime(&stop));

  return result;
}



char *getusername (char *path)
{
  int i;
  char *tmp;
        
  i = strlen (path);
  tmp = path+i;
        
  while ( (tmp!=path) && (*tmp!='/'))
    tmp--;

  return tmp+1;
}


int traverse (char *path)
{
  char newpath [1024];
  char *username;
  struct dirent **namelist;
  int n;
  u64_t userid;

  n = scandir (path, &namelist, 0, alphasort);

  if (n < 0)
    {
      printf ("file %s\n",path);
      username = getusername(path);
      printf ("username %s\n", username);
           
      printf("creating user...");
      userid = auth_user_exists("frido");
      if (userid != -1 && userid != 0)
        {
          printf("Ok id [%llu]\n", userid);
          printf("converting mailbox...");
          fflush(stdout);
          n = process_mboxfile(path, userid);
          if (n != 0)
            printf("Warning: error converting mailbox\n");
          else
            printf ("done :)\n");
        }  
      else
        {
          printf("user already exists. Skipping\n");
        }
        
    }
  else
    {
      while (n--)
        {
          if ((strcmp(namelist[n]->d_name,"..")!=0) &&
              (strcmp(namelist[n]->d_name,".")!=0))
            {
              sprintf (newpath,"%s/%s",path, namelist[n]->d_name);
              traverse (newpath);
            }
          free (namelist[n]);
        }
      free(namelist);
    }
  return 0;
}


int process_mboxfile(char *file, u64_t userid)
{
  regex_t preg;
  int result;
  FILE *infile;
  int in_msg, header_passed;
  char newunique[UID_SIZE];
  unsigned cnt,len,newlines;
  u64_t msgid=0, size;
  char saved;

  char *a, *b;
  int len2;

  // need to make a proper mailboxname from path
  // no idea how to do this properly, never coded c before :)

  /* does the actual searching                              */
  if ((a = strstr(file, "./") ) != 0) {

       /* blindly chop out the unwanted string                */
       /* don't forget the null!                              */
       len2 = strlen("./");
       b = a+len2;
       memcpy(a, b, strlen(b)+1);
  }

  db_createmailbox(file, userid);

  if ((result = regcomp(&preg, mbox_delimiter_pattern, REG_NOSUB)) != 0)
    {
      trace(TRACE_ERROR,"Regex compilation failed.");
      return -1;
    }

  if ( (infile = fopen(file, "r")) == 0)
    {
      
      trace(TRACE_ERROR,"Could not open file [%s]", infile);
      return -1;
    }

  in_msg = 0;
  cnt = 0;
  size = 0;
  newlines = 0;

  while (!feof(infile) && !ferror(infile))
    {
      if (fgets(&blk[cnt], MAX_LINESIZE, infile) == 0)
        break;

      /* check if this is an mbox delimiter */
      if (regexec(&preg, &blk[cnt], 0, NULL, 0) == 0)
        {
          if (!in_msg)
            in_msg = 1; /* ok start of a new msg */
          else
            {
              /* update & end message */
              db_insert_message_block(blk, cnt, msgid);

              snprintf(newunique, UID_SIZE, "%lluA%lu", userid, time(NULL));
              db_update_message(msgid, newunique, size+cnt, size+cnt+newlines);
              trace(TRACE_ERROR, "message [%llu] inserted, [%u] bytes", msgid, 
size+cnt);
            }


          /* start new message */
          msgid = db_insert_message(userid, file, 0);
          header_passed = 0;
          cnt = 0;
          size = 0;
          newlines = 0;
        }
      else
        {
          newlines++;
          if (header_passed == 0)
            {
              /* we're still reading the header */
              len = strlen(&blk[cnt]);
              if (strcmp(&blk[cnt], "\n") == 0)
                {
                  db_insert_message_block(blk, cnt+len, msgid);
                  header_passed = 1;
                  size += (cnt+len);
                  cnt = 0;
                }
              else
                cnt += len;
            }
          else
            {
              /* this is body data */
              len = strlen(&blk[cnt]);
              cnt += len;
              
              if (cnt >= READ_BLOCK_SIZE)
                {
                  /* write block */
                  saved = blk[READ_BLOCK_SIZE];

                  blk[READ_BLOCK_SIZE] = '\0';
                  db_insert_message_block(blk, READ_BLOCK_SIZE, msgid);
                  blk[READ_BLOCK_SIZE] = saved;

                  memmove(blk, &blk[READ_BLOCK_SIZE], cnt - (READ_BLOCK_SIZE));
                  size += READ_BLOCK_SIZE;
                  cnt  -= READ_BLOCK_SIZE;
                }
            }
        }
    }

  /* update & end message */
  if (msgid > 0)
    {
      db_insert_message_block(blk, cnt, msgid);

      snprintf(newunique, UID_SIZE, "%lluA%lu", userid, time(NULL));
      db_update_message(msgid, newunique, size+cnt, size+cnt+newlines);
      trace(TRACE_ERROR, "message [%llu] inserted, [%u] bytes", msgid, 
size+cnt);
    }

  fclose(infile);
  return 0;
}

Reply via email to