On Tue, May 16, 2017 at 11:36:15AM +0900, Sergey Senozhatsky wrote: > > > > @@ -794,7 +801,15 @@ static int __zram_bvec_write(struct zram *zram, > > > > struct bio_vec *bvec, u32 index) > > > > entry = zram_dedup_find(zram, page, &checksum); > > > > if (entry) { > > > > comp_len = entry->len; > > > > - goto found_dup; > > > > + zram_slot_lock(zram, index); > > > > + zram_free_page(zram, index); > > > > + zram_set_flag(zram, index, ZRAM_DUP); > > > > + zram_set_entry(zram, index, entry); > > > > + zram_set_obj_size(zram, index, comp_len); > > > > + zram_slot_unlock(zram, index); > > > > + atomic64_add(comp_len, &zram->stats.dup_data_size); > > > > + atomic64_inc(&zram->stats.pages_stored); > > > > + return 0; > > > > > > hm. that's a somewhat big code duplication. isn't it? > > > > Yub. 3 parts. above part, zram_same_page_write and tail of > > __zram_bvec_write. > > hmm... good question... hardly can think of anything significantly > better, zram object handling is now a mix of flags, entries, > ref_counters, etc. etc. may be we can merge some of those ops, if we > would keep slot locked through the entire __zram_bvec_write(), but > that does not look attractive. > > something ABSOLUTELY untested and incomplete. not even compile tested (!). > 99% broken and stupid (!). but there is one thing that it has revealed, so > thus incomplete. see below: > > --- > > diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c > index 372602c7da49..b31543c40d54 100644 > --- a/drivers/block/zram/zram_drv.c > +++ b/drivers/block/zram/zram_drv.c > @@ -509,11 +509,8 @@ static bool zram_same_page_write(struct zram *zram, u32 > index, > if (page_same_filled(mem, &element)) { > kunmap_atomic(mem); > /* Free memory associated with this sector now. */ > - zram_slot_lock(zram, index); > - zram_free_page(zram, index); > zram_set_flag(zram, index, ZRAM_SAME); > zram_set_element(zram, index, element); > - zram_slot_unlock(zram, index); > > atomic64_inc(&zram->stats.same_pages); > return true; > @@ -778,7 +775,7 @@ static int zram_compress(struct zram *zram, struct > zcomp_strm **zstrm, > > static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 > index) > { > - int ret; > + int ret = 0; > struct zram_entry *entry; > unsigned int comp_len; > void *src, *dst; > @@ -786,12 +783,20 @@ static int __zram_bvec_write(struct zram *zram, struct > bio_vec *bvec, u32 index) > struct page *page = bvec->bv_page; > u32 checksum; > > + /* > + * Free memory associated with this sector > + * before overwriting unused sectors. > + */ > + zram_slot_lock(zram, index); > + zram_free_page(zram, index);
Hmm, zram_free should happen only if the write is done successfully. Otherwise, we lose the valid data although write IO was fail. > + > if (zram_same_page_write(zram, index, page)) > - return 0; > + goto out_unlock; > > entry = zram_dedup_find(zram, page, &checksum); > if (entry) { > comp_len = entry->len; > + zram_set_flag(zram, index, ZRAM_DUP); In case of hitting dedup, we shouldn't increase compr_data_size. If we fix above two problems, do you think it's still cleaner? (I don't mean to be reluctant with your suggestion. Just a real question to know your thought.:) > goto found_dup; > } > > @@ -799,7 +804,7 @@ static int __zram_bvec_write(struct zram *zram, struct > bio_vec *bvec, u32 index) > ret = zram_compress(zram, &zstrm, page, &entry, &comp_len); > if (ret) { > zcomp_stream_put(zram->comp); > - return ret; > + goto out_unlock; > } > > dst = zs_map_object(zram->mem_pool, > @@ -817,20 +822,16 @@ static int __zram_bvec_write(struct zram *zram, struct > bio_vec *bvec, u32 index) > zram_dedup_insert(zram, entry, checksum); > > found_dup: > - /* > - * Free memory associated with this sector > - * before overwriting unused sectors. > - */ > - zram_slot_lock(zram, index); > - zram_free_page(zram, index); > zram_set_entry(zram, index, entry); > zram_set_obj_size(zram, index, comp_len); > - zram_slot_unlock(zram, index); > > /* Update stats */ > atomic64_add(comp_len, &zram->stats.compr_data_size); > atomic64_inc(&zram->stats.pages_stored); > - return 0; > + > +out_unlock: > + zram_slot_unlock(zram, index); > + return ret; > } > > --- > > > namely, > that zram_compress() error return path from __zram_bvec_write(). > > currently, we touch the existing compressed object and overwrite it only > when we successfully compressed a new object. when zram_compress() fails > we propagate the error, but never touch the old object. so all reads that > could hit that index later will read stale data. and probably it would > make more sense to fail those reads as well; IOW to free the old page > regardless zram_compress() progress. > > what do you think? > > -ss