comments below On 01/05/14 08:27, Qiao Nuohan wrote: > functions are used to write 1st and 2nd dump_bitmap of kdump-compressed > format, > which is used to indicate whether the corresponded page is existed in vmcore. > 1st and 2nd dump_bitmap are same, because dump level is specified to 1 here. > > Signed-off-by: Qiao Nuohan <qiaonuo...@cn.fujitsu.com> > --- > dump.c | 116 > +++++++++++++++++++++++++++++++++++++++++++++++++ > include/sysemu/dump.h | 7 +++ > 2 files changed, 123 insertions(+), 0 deletions(-) > > diff --git a/dump.c b/dump.c > index e3623b9..1fae152 100644 > --- a/dump.c > +++ b/dump.c > @@ -80,12 +80,14 @@ typedef struct DumpState { > bool flag_flatten; > uint32_t nr_cpus; > size_t page_size; > + uint32_t page_shift; > uint64_t max_mapnr; > size_t len_dump_bitmap; > void *note_buf; > size_t note_buf_offset; > off_t offset_dump_bitmap; > off_t offset_page; > + size_t num_dumpable; > uint32_t flag_compress; > } DumpState; > > @@ -972,6 +974,120 @@ static int write_dump_header(DumpState *s) > } > } > > +/* set dump_bitmap sequencely. bitmap is not allowed to be rewritten. */ > +static int set_dump_bitmap(int64_t last_pfn, int64_t pfn, uint32_t value, > + void *buf, DumpState *s) > +{
I'd recommend - "uint8_t *buf", and - "bool value", and - maybe an assert() that neither of pfn and last_pfn is negative. > + off_t old_offset, new_offset; > + off_t offset_bitmap1, offset_bitmap2; > + uint32_t byte, bit; > + > + /* should not set the previous place */ > + if (last_pfn > pfn) { > + return -1; > + } > + > + /* > + * if the bit needed to be set is not cached in buf, flush the data in > buf > + * to vmcore firstly. > + * making new_offset be bigger than old_offset can also sync remained > data > + * into vmcore. > + */ > + old_offset = BUFSIZE_BITMAP * (last_pfn / PFN_BUFBITMAP); > + new_offset = BUFSIZE_BITMAP * (pfn / PFN_BUFBITMAP); > + > + while (old_offset < new_offset) { > + /* calculate the offset and write dump_bitmap */ > + offset_bitmap1 = s->offset_dump_bitmap + old_offset; > + if (write_buffer(s->fd, s->flag_flatten, offset_bitmap1, buf, > + BUFSIZE_BITMAP) < 0) { > + return -1; > + } > + > + /* dump level 1 is chosen, so 1st and 2nd bitmap are same */ > + offset_bitmap2 = s->offset_dump_bitmap + s->len_dump_bitmap + > + old_offset; > + if (write_buffer(s->fd, s->flag_flatten, offset_bitmap2, buf, > + BUFSIZE_BITMAP) < 0) { > + return -1; > + } > + > + memset(buf, 0, BUFSIZE_BITMAP); > + old_offset += BUFSIZE_BITMAP; > + } Seems sane to me. > + > + /* get the exact place of the bit in the buf, and set it */ > + byte = (pfn % PFN_BUFBITMAP) >> 3; (dividing by CHAR_BIT would be more consistent with the connection between PFN_BUFBITMAP and BUFSIZE_BITMAP) > + bit = (pfn % PFN_BUFBITMAP) & 7; > + if (value) { > + ((char *)buf)[byte] |= 1<<bit; > + } else { > + ((char *)buf)[byte] &= ~(1<<bit); > + } Shudder. I would much prefer (with "uint8_t *buf" included): if (value) { buf[byte] |= 1u << bit; } else { buf[byte] &= ~(1u << bit); } When you interpret the expressions in question against the C standard, my proposal is simpler to understand, and relies on fewer platform-specifics. The current code looks simple, but it's actually quite complex. In any case, I think it will work in practice. > + > + return 0; > +} > + > +static int write_dump_bitmap(DumpState *s) > +{ > + int ret = 0; > + int64_t pfn_start, pfn_end, pfn; > + int64_t last_pfn; > + void *dump_bitmap_buf; > + size_t num_dumpable; > + MemoryMapping *memory_mapping; > + > + /* dump_bitmap_buf is used to store dump_bitmap temporarily */ > + dump_bitmap_buf = g_malloc0(BUFSIZE_BITMAP); > + > + num_dumpable = 0; > + last_pfn = -1; This would trip up the assert() that I proposed above. It also exploits that (last_pfn == -1) will result in (old_offset = 0) in set_dump_bitmap(), due to the division. I think it's fairly ugly, but should work in practice. > + > + /* > + * exam memory page by page, and set the bit in dump_bitmap corresponded > + * to the existing page > + */ > + QTAILQ_FOREACH(memory_mapping, &s->list.head, next) { > + pfn_start = paddr_to_pfn(memory_mapping->phys_addr, s->page_shift); > + pfn_end = paddr_to_pfn(memory_mapping->phys_addr + > + memory_mapping->length, s->page_shift); OK, the intent seems to be to make "pfn_end" exclusive (see the loop below). That however depends on "memory_mapping->length" being an integral multiple of the target page size. I propose an assert() here for that reason. Looking at patch v6 10/11, for a compressed dump both paging and filtering are rejected. This implies that in dump_init(), - qemu_get_guest_simple_memory_mapping() is called -- ie. the memory mapping list will directly reflect the guest-phys blocks, - memory_mapping_filter() will *not* be called. These should ensure that pfn_end is exclusive here (and that "phys_addr" falls to the beginning of a page) , but an assert() with a comment would help. > + > + for (pfn = pfn_start; pfn < pfn_end; pfn++) { > + ret = set_dump_bitmap(last_pfn, pfn, 1, dump_bitmap_buf, s); (1 --> "true" after replacing that param type with bool) > + if (ret < 0) { > + dump_error(s, "dump: failed to set dump_bitmap.\n"); Alas, dump_error() ignores the reason, but maybe we can fix that at a different time. > + ret = -1; > + goto out; > + } > + > + last_pfn = pfn; > + num_dumpable++; > + } > + } > + > + /* > + * last_pfn > -1 means bitmap is set, then remained data in buf should be > + * synchronized into vmcore > + */ As far as I can see, (num_dumpable > 0) could work just as well, and it would eliminate the super-ugly (last_pfn == -1) case. > + if (last_pfn > -1) { > + ret = set_dump_bitmap(last_pfn, last_pfn + PFN_BUFBITMAP, 0, > + dump_bitmap_buf, s); Results in new_offset == old_offset + BUFSIZE_BITMAP in set_dump_bitmap(), that is, one iteration of the loop. It seems correct to call this if we have set at least one bit, because set_dump_bitmap() *always* leaves the most recently set bit un-synced. > + if (ret < 0) { > + dump_error(s, "dump: failed to sync dump_bitmap.\n"); > + ret = -1; > + goto out; > + } > + } > + > + /* number of dumpable pages that will be dumped later */ > + s->num_dumpable = num_dumpable; > + > +out: > + g_free(dump_bitmap_buf); > + > + return ret; > +} > + > static ram_addr_t get_start_block(DumpState *s) > { > GuestPhysBlock *block; > diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h > index 9e47b4c..b5eaf8d 100644 > --- a/include/sysemu/dump.h > +++ b/include/sysemu/dump.h > @@ -27,11 +27,18 @@ > #define DUMP_DH_COMPRESSED_LZO (0x2) > #define DUMP_DH_COMPRESSED_SNAPPY (0x4) > > +#define PAGE_SIZE (4096) > #define KDUMP_SIGNATURE "KDUMP " > #define SIG_LEN (sizeof(KDUMP_SIGNATURE) - 1) > #define PHYS_BASE (0) > #define DUMP_LEVEL (1) > #define DISKDUMP_HEADER_BLOCKS (1) > +#define BUFSIZE_BITMAP (PAGE_SIZE) > +#define PFN_BUFBITMAP (CHAR_BIT * BUFSIZE_BITMAP) > +#define ARCH_PFN_OFFSET (0) > + > +#define paddr_to_pfn(X, page_shift) \ > + (((unsigned long long)(X) >> (page_shift)) - ARCH_PFN_OFFSET) > > typedef struct ArchDumpInfo { > int d_machine; /* Architecture */ > I think these magic constants are somewhat tied to x86, and therefore should be in an arch-specific file rather than a common file, but whoever wants to extend this to another architecture can do that. I think I haven't found anything that I'd call a bug. Reviewed-by: Laszlo Ersek <ler...@redhat.com>