On Mon, 2012-10-08 at 03:11 +0300, Timo Sirainen wrote: > The 3. part is what I like changing the least. An alternative solution > would be to just not map the entire cache file into memory all at > once. The code was actually originally designed to do just that, but > munmap()ing + mmap()ing again wasn't very efficient. But for LMTP > there's really no need to map the whole file. All it really wants is > to read a couple of header records and then append to the file. Maybe > it could use an alternative code path that would simply do that > instead of mmap()ing anything. It wouldn't solve it for IMAP though.
Attached patch changes LMTP/LDA to not mmap() the target mailbox's cache file. I did a few quick tests and it seems to work. I'll probably commit it to 2.1 hg after some more tests.
diff -r d96313048f17 src/lib-index/mail-cache-compress.c --- a/src/lib-index/mail-cache-compress.c Mon Oct 08 03:14:12 2012 +0300 +++ b/src/lib-index/mail-cache-compress.c Mon Oct 08 04:40:15 2012 +0300 @@ -353,6 +353,7 @@ uint32_t file_seq, old_offset; ARRAY_TYPE(uint32_t) ext_offsets; const uint32_t *offsets; + const void *data; unsigned int i, count; int fd, ret; @@ -440,7 +441,7 @@ if (cache->file_cache != NULL) file_cache_set_fd(cache->file_cache, cache->fd); - if (mail_cache_map(cache, 0, 0) < 0) + if (mail_cache_map(cache, 0, 0, &data) < 0) return -1; if (mail_cache_header_fields_read(cache) < 0) return -1; @@ -460,6 +461,9 @@ if (MAIL_INDEX_IS_IN_MEMORY(cache->index) || cache->index->readonly) return 0; + /* compression isn't very efficient with small read()s */ + cache->map_with_read = FALSE; + if (cache->index->lock_method == FILE_LOCK_METHOD_DOTLOCK) { /* we're using dotlocking, cache file creation itself creates the dotlock file we need. */ @@ -494,5 +498,6 @@ bool mail_cache_need_compress(struct mail_cache *cache) { return cache->need_compress_file_seq != 0 && + (cache->index->flags & MAIL_INDEX_OPEN_FLAG_SAVEONLY) == 0 && !cache->index->readonly; } diff -r d96313048f17 src/lib-index/mail-cache-fields.c --- a/src/lib-index/mail-cache-fields.c Mon Oct 08 03:14:12 2012 +0300 +++ b/src/lib-index/mail-cache-fields.c Mon Oct 08 04:40:15 2012 +0300 @@ -198,11 +198,14 @@ return list; } -static int mail_cache_header_fields_get_offset(struct mail_cache *cache, - uint32_t *offset_r, bool map) +static int +mail_cache_header_fields_get_offset(struct mail_cache *cache, + uint32_t *offset_r, + const struct mail_cache_header_fields **field_hdr_r) { const struct mail_cache_header_fields *field_hdr; struct mail_cache_header_fields tmp_field_hdr; + const void *data; uint32_t offset = 0, next_offset; unsigned int next_count = 0; bool invalidate = FALSE; @@ -210,6 +213,8 @@ if (MAIL_CACHE_IS_UNUSABLE(cache)) { *offset_r = 0; + if (field_hdr_r != NULL) + *field_hdr_r = NULL; return 0; } @@ -228,16 +233,16 @@ invalidate = TRUE; if (cache->mmap_base != NULL) { - if (mail_cache_map(cache, offset, - sizeof(*field_hdr)) < 0) - return -1; - if (offset >= cache->mmap_length) { + ret = mail_cache_map(cache, offset, sizeof(*field_hdr), + &data); + if (ret <= 0) { + if (ret < 0) + return -1; mail_cache_set_corrupted(cache, "header field next_offset points outside file"); return -1; } - - field_hdr = CONST_PTR_OFFSET(cache->data, offset); + field_hdr = data; } else { /* if we need to follow multiple offsets to get to the last one, it's faster to just pread() the file @@ -270,7 +275,7 @@ if (next_count > MAIL_CACHE_HEADER_FIELD_CONTINUE_COUNT) cache->need_compress_file_seq = cache->hdr->file_seq; - if (map) { + if (field_hdr_r != NULL) { if (cache->file_cache != NULL && invalidate) { /* if this isn't the first header in file and we hadn't read this before, we can't trust that the cached @@ -278,17 +283,23 @@ file_cache_invalidate(cache->file_cache, offset, field_hdr->size); } - if (mail_cache_map(cache, offset, field_hdr->size) < 0) + if ((ret = mail_cache_map(cache, offset, field_hdr->size, + &data)) < 0) return -1; + if (ret == 0) { + mail_cache_set_corrupted(cache, + "header field size outside file"); + return -1; + } + *field_hdr_r = data; } - *offset_r = offset; return 0; } int mail_cache_header_fields_read(struct mail_cache *cache) { - const struct mail_cache_header_fields *field_hdr = NULL; + const struct mail_cache_header_fields *field_hdr; struct mail_cache_field field; const uint32_t *last_used, *sizes; const uint8_t *types, *decisions; @@ -299,7 +310,7 @@ time_t max_drop_time; uint32_t offset, i; - if (mail_cache_header_fields_get_offset(cache, &offset, TRUE) < 0) + if (mail_cache_header_fields_get_offset(cache, &offset, &field_hdr) < 0) return -1; if (offset == 0) { @@ -307,13 +318,6 @@ return 0; } - field_hdr = CONST_PTR_OFFSET(cache->data, offset); - if (offset + field_hdr->size > cache->mmap_length) { - mail_cache_set_corrupted(cache, - "field header points outside file"); - return -1; - } - /* check the fixed size of the header. name[] has to be checked separately */ if (field_hdr->size < sizeof(*field_hdr) + @@ -322,9 +326,7 @@ return -1; } - field_hdr = CONST_PTR_OFFSET(cache->data, offset); new_fields_count = field_hdr->fields_count; - if (new_fields_count != 0) { cache->file_field_map = i_realloc(cache->file_field_map, diff -r d96313048f17 src/lib-index/mail-cache-lookup.c --- a/src/lib-index/mail-cache-lookup.c Mon Oct 08 03:14:12 2012 +0300 +++ b/src/lib-index/mail-cache-lookup.c Mon Oct 08 04:40:15 2012 +0300 @@ -14,6 +14,7 @@ const struct mail_cache_record **rec_r) { const struct mail_cache_record *rec; + const void *data; i_assert(offset != 0); @@ -24,14 +25,15 @@ } /* we don't know yet how large the record is, so just guess */ - if (mail_cache_map(cache, offset, sizeof(*rec) + CACHE_PREFETCH) < 0) + if (mail_cache_map(cache, offset, sizeof(*rec) + CACHE_PREFETCH, + &data) < 0) return -1; if (offset + sizeof(*rec) > cache->mmap_length) { mail_cache_set_corrupted(cache, "record points outside file"); return -1; } - rec = CACHE_RECORD(cache, offset); + rec = data; if (rec->size < sizeof(*rec)) { mail_cache_set_corrupted(cache, "invalid record size"); @@ -39,9 +41,9 @@ } if (rec->size > CACHE_PREFETCH) { /* larger than we guessed. map the rest of the record. */ - if (mail_cache_map(cache, offset, rec->size) < 0) + if (mail_cache_map(cache, offset, rec->size, &data) < 0) return -1; - rec = CACHE_RECORD(cache, offset); + rec = data; } if (rec->size > cache->mmap_length || @@ -270,6 +272,7 @@ field_r->field_idx = field_idx; field_r->data = CONST_PTR_OFFSET(ctx->rec, ctx->pos); field_r->size = data_size; + field_r->offset = ctx->offset + ctx->pos; /* each record begins from 32bit aligned position */ ctx->pos += (data_size + sizeof(uint32_t)-1) & ~(sizeof(uint32_t)-1); @@ -444,8 +447,7 @@ lines_count = i; hdr_data = t_new(struct header_lookup_data, 1); - hdr_data->offset = (const char *)&lines[lines_count+1] - - (const char *)ctx->view->cache->data; + hdr_data->offset = field->offset + (lines_count+1) * sizeof(uint32_t); hdr_data->data_size = data_size; for (i = 0; i < lines_count; i++) { @@ -471,6 +473,7 @@ struct mail_cache_iterate_field field; struct header_lookup_context ctx; struct header_lookup_line *lines; + const void *data; const unsigned char *p, *start, *end; uint8_t *field_state; unsigned int i, count, max_field = 0; @@ -535,7 +538,10 @@ /* then start filling dest buffer from the headers */ for (i = 0; i < count; i++) { - start = CONST_PTR_OFFSET(cache->data, lines[i].data->offset); + if (mail_cache_map(cache, lines[i].data->offset, + lines[i].data->data_size, &data) <= 0) + return -1; + start = data; end = start + lines[i].data->data_size; /* find the end of the (multiline) header */ diff -r d96313048f17 src/lib-index/mail-cache-private.h --- a/src/lib-index/mail-cache-private.h Mon Oct 08 03:14:12 2012 +0300 +++ b/src/lib-index/mail-cache-private.h Mon Oct 08 04:40:15 2012 +0300 @@ -134,10 +134,14 @@ ino_t st_ino; dev_t st_dev; + size_t mmap_length; + /* a) mmaping the whole file */ void *mmap_base; - const void *data; - size_t mmap_length; + /* b) using file cache */ struct file_cache *file_cache; + /* c) using small read() calls with MAIL_INDEX_OPEN_FLAG_SAVEONLY */ + uoff_t read_offset; + buffer_t *read_buf; /* mail_cache_map() increases this always. */ unsigned int remap_counter; @@ -174,6 +178,7 @@ unsigned int hdr_modified:1; unsigned int field_header_write_pending:1; unsigned int compressing:1; + unsigned int map_with_read:1; }; struct mail_cache_loop_track { @@ -205,6 +210,7 @@ unsigned int field_idx; const void *data; unsigned int size; + uoff_t offset; }; struct mail_cache_lookup_iterate_ctx { @@ -256,7 +262,8 @@ int mail_cache_lookup_iter_next(struct mail_cache_lookup_iterate_ctx *ctx, struct mail_cache_iterate_field *field_r); -int mail_cache_map(struct mail_cache *cache, size_t offset, size_t size); +int mail_cache_map(struct mail_cache *cache, size_t offset, size_t size, + const void **data_r); void mail_cache_file_close(struct mail_cache *cache); int mail_cache_reopen(struct mail_cache *cache); diff -r d96313048f17 src/lib-index/mail-cache-transaction.c --- a/src/lib-index/mail-cache-transaction.c Mon Oct 08 03:14:12 2012 +0300 +++ b/src/lib-index/mail-cache-transaction.c Mon Oct 08 04:40:15 2012 +0300 @@ -1091,6 +1091,8 @@ uint32_t new_offset) { const struct mail_cache_record *rec; + const void *data; + int ret; i_assert(cache->locked); @@ -1107,15 +1109,16 @@ records at the same time. we'd rather not lose those additions, so force the linking order to be new_offset -> old_offset if it isn't already. */ - if (mail_cache_map(cache, new_offset, sizeof(*rec)) < 0) - return -1; - if (new_offset + sizeof(*rec) > cache->mmap_length) { + if ((ret = mail_cache_map(cache, new_offset, sizeof(*rec), + &data)) <= 0) { + if (ret < 0) + return -1; mail_cache_set_corrupted(cache, "Cache record offset %u points outside file", new_offset); return -1; } - rec = CACHE_RECORD(cache, new_offset); + rec = data; if (rec->prev_offset == old_offset) { /* link is already correct */ return 0; diff -r d96313048f17 src/lib-index/mail-cache.c --- a/src/lib-index/mail-cache.c Mon Oct 08 03:14:12 2012 +0300 +++ b/src/lib-index/mail-cache.c Mon Oct 08 04:40:15 2012 +0300 @@ -7,6 +7,7 @@ #include "nfs-workarounds.h" #include "file-cache.h" #include "mmap-util.h" +#include "read-full.h" #include "write-full.h" #include "mail-cache-private.h" @@ -59,7 +60,6 @@ file_cache_set_fd(cache->file_cache, -1); cache->mmap_base = NULL; - cache->data = NULL; cache->hdr = NULL; cache->mmap_length = 0; cache->last_field_header_offset = 0; @@ -145,6 +145,7 @@ { struct mail_index_view *view; const struct mail_index_ext *ext; + const void *data; i_assert(!cache->locked); @@ -167,7 +168,7 @@ mail_cache_init_file_cache(cache); - if (mail_cache_map(cache, 0, 0) < 0) + if (mail_cache_map(cache, 0, 0, &data) < 0) return -1; if (mail_cache_header_fields_read(cache) < 0) @@ -213,10 +214,9 @@ cache->need_compress_file_seq = hdr->file_seq; } -static bool mail_cache_verify_header(struct mail_cache *cache) +static bool mail_cache_verify_header(struct mail_cache *cache, + const struct mail_cache_header *hdr) { - const struct mail_cache_header *hdr = cache->data; - /* check that the header is still ok */ if (cache->mmap_length < sizeof(struct mail_cache_header)) { mail_cache_set_corrupted(cache, "File too small"); @@ -265,17 +265,88 @@ return TRUE; } -int mail_cache_map(struct mail_cache *cache, size_t offset, size_t size) +static int +mail_cache_map_finish(struct mail_cache *cache, uoff_t offset, size_t size, + const void *data, bool copy_hdr) { + if (offset == 0) { + const struct mail_cache_header *hdr = data; + + if (!mail_cache_verify_header(cache, hdr)) { + cache->need_compress_file_seq = + !MAIL_CACHE_IS_UNUSABLE(cache) && + cache->hdr->file_seq != 0 ? + cache->hdr->file_seq : 0; + return -1; + } + if (!copy_hdr) + cache->hdr = data; + else { + memcpy(&cache->hdr_ro_copy, data, + sizeof(cache->hdr_ro_copy)); + cache->hdr = &cache->hdr_ro_copy; + } + mail_cache_update_need_compress(cache); + } + + if (offset + size > cache->mmap_length) + return 0; + return 1; +} + +static int +mail_cache_map_with_read(struct mail_cache *cache, size_t offset, size_t size, + const void **data_r) +{ + void *data; ssize_t ret; - cache->remap_counter++; + if (cache->read_buf == NULL) { + cache->read_buf = + buffer_create_dynamic(default_pool, size); + } else if (cache->read_offset <= offset && + cache->read_offset + cache->read_buf->used >= offset+size) { + /* already mapped */ + *data_r = CONST_PTR_OFFSET(cache->mmap_base, + offset - cache->read_offset); + return mail_cache_map_finish(cache, offset, size, *data_r, TRUE); + } else { + buffer_set_used_size(cache->read_buf, 0); + } + data = buffer_append_space_unsafe(cache->read_buf, size); + ret = pread(cache->fd, data, size, offset); + if (ret < 0) { + if (errno != ESTALE) + mail_cache_set_syscall_error(cache, "read()"); + + buffer_set_used_size(cache->read_buf, 0); + cache->hdr = NULL; + cache->mmap_length = 0; + return -1; + } + buffer_set_used_size(cache->read_buf, ret); + + cache->read_offset = offset; + cache->mmap_length = offset + size; + + *data_r = data; + return mail_cache_map_finish(cache, offset, size, data, TRUE); +} + +int mail_cache_map(struct mail_cache *cache, size_t offset, size_t size, + const void **data_r) +{ + const void *data; + ssize_t ret; if (size == 0) size = sizeof(struct mail_cache_header); + cache->remap_counter++; + if (cache->map_with_read) + return mail_cache_map_with_read(cache, offset, size, data_r); + if (cache->file_cache != NULL) { - cache->data = NULL; cache->hdr = NULL; ret = file_cache_read(cache->file_cache, offset, size); @@ -292,30 +363,17 @@ return -1; } - cache->data = file_cache_get_map(cache->file_cache, - &cache->mmap_length); - - if (offset == 0) { - if (!mail_cache_verify_header(cache)) { - cache->need_compress_file_seq = - !MAIL_CACHE_IS_UNUSABLE(cache) && - cache->hdr->file_seq != 0 ? - cache->hdr->file_seq : 0; - return -1; - } - memcpy(&cache->hdr_ro_copy, cache->data, - sizeof(cache->hdr_ro_copy)); - } - cache->hdr = &cache->hdr_ro_copy; - if (offset == 0) - mail_cache_update_need_compress(cache); - return 0; + data = file_cache_get_map(cache->file_cache, + &cache->mmap_length); + *data_r = CONST_PTR_OFFSET(data, offset); + return mail_cache_map_finish(cache, offset, size, *data_r, TRUE); } if (offset < cache->mmap_length && size <= cache->mmap_length - offset) { /* already mapped */ - return 0; + *data_r = CONST_PTR_OFFSET(cache->mmap_base, offset); + return 1; } if (cache->mmap_base != NULL) { @@ -338,28 +396,17 @@ cache->mmap_base = mmap_ro_file(cache->fd, &cache->mmap_length); if (cache->mmap_base == MAP_FAILED) { cache->mmap_base = NULL; - cache->data = NULL; mail_cache_set_syscall_error(cache, "mmap()"); return -1; } - cache->data = cache->mmap_base; - - if (!mail_cache_verify_header(cache)) { - cache->need_compress_file_seq = - !MAIL_CACHE_IS_UNUSABLE(cache) && - cache->hdr->file_seq != 0 ? - cache->hdr->file_seq : 0; - return -1; - } - - cache->hdr = cache->data; - if (offset == 0) - mail_cache_update_need_compress(cache); - return 0; + *data_r = CONST_PTR_OFFSET(cache->mmap_base, offset); + return mail_cache_map_finish(cache, offset, size, *data_r, FALSE); } static int mail_cache_try_open(struct mail_cache *cache) { + const void *data; + cache->opened = TRUE; if (MAIL_INDEX_IS_IN_MEMORY(cache->index)) @@ -379,9 +426,9 @@ mail_cache_init_file_cache(cache); - if (mail_cache_map(cache, 0, sizeof(struct mail_cache_header)) < 0) + if (mail_cache_map(cache, 0, sizeof(struct mail_cache_header), + &data) < 0) return -1; - return 1; } @@ -425,6 +472,8 @@ if (!MAIL_INDEX_IS_IN_MEMORY(index) && (index->flags & MAIL_INDEX_OPEN_FLAG_MMAP_DISABLE) != 0) cache->file_cache = file_cache_new(-1); + cache->map_with_read = + (cache->index->flags & MAIL_INDEX_OPEN_FLAG_SAVEONLY) != 0; cache->ext_id = mail_index_ext_register(index, "cache", 0, @@ -550,6 +599,7 @@ bool nonblock) { const struct mail_index_ext *ext; + const void *data; struct mail_index_view *iview; uint32_t reset_id; int i, ret; @@ -613,7 +663,7 @@ file_cache_invalidate(cache->file_cache, 0, sizeof(struct mail_cache_header)); } - if (mail_cache_map(cache, 0, 0) == 0) + if (mail_cache_map(cache, 0, 0, &data) == 0) cache->hdr_copy = *cache->hdr; else { (void)mail_cache_unlock(cache); @@ -678,13 +728,8 @@ return -1; } - if (cache->file_cache != NULL) { + if (cache->file_cache != NULL) file_cache_write(cache->file_cache, data, size, offset); - - /* data pointer may change if file cache was grown */ - cache->data = file_cache_get_map(cache->file_cache, - &cache->mmap_length); - } return 0; } diff -r d96313048f17 src/lib-index/mail-index.h --- a/src/lib-index/mail-index.h Mon Oct 08 03:14:12 2012 +0300 +++ b/src/lib-index/mail-index.h Mon Oct 08 04:40:15 2012 +0300 @@ -27,7 +27,10 @@ MAIL_INDEX_OPEN_FLAG_KEEP_BACKUPS = 0x100, /* If we run out of disk space, fail modifications instead of moving indexes to memory. */ - MAIL_INDEX_OPEN_FLAG_NEVER_IN_MEMORY = 0x200 + MAIL_INDEX_OPEN_FLAG_NEVER_IN_MEMORY = 0x200, + /* We're only going to save new messages to the index. + Avoid unnecessary reads. */ + MAIL_INDEX_OPEN_FLAG_SAVEONLY = 0x400 }; enum mail_index_header_compat_flags { diff -r d96313048f17 src/lib-storage/index/index-storage.c --- a/src/lib-storage/index/index-storage.c Mon Oct 08 03:14:12 2012 +0300 +++ b/src/lib-storage/index/index-storage.c Mon Oct 08 04:40:15 2012 +0300 @@ -304,6 +304,8 @@ ibox->list_index_sync_ext_id = (uint32_t)-1; ibox->index_flags = MAIL_INDEX_OPEN_FLAG_CREATE | mail_storage_settings_to_index_flags(box->storage->set); + if ((box->flags & MAILBOX_FLAG_SAVEONLY) != 0) + ibox->index_flags |= MAIL_INDEX_OPEN_FLAG_SAVEONLY; ibox->next_lock_notify = time(NULL) + LOCK_NOTIFY_INTERVAL; MODULE_CONTEXT_SET(box, index_storage_module, ibox);