With POWER10, architecture is adding new pmem flush and sync instructions. The kernel should prevent the usage of MAP_SYNC if applications are not using the new instructions on newer hardware
This patch adds a dax attribute (/sys/bus/nd/devices/region0/pfn0.1/block/pmem0/dax/sync_fault) which can be used to control this flag. If the device supports synchronous flush then userspace can update this attribute to enable/disable the synchronous fault. The attribute is only visible if there is write cache enabled on the device. In a followup patch on ppc64 device with compat string "ibm,pmemory-v2" will disable the sync fault feature. Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.ibm.com> --- drivers/dax/bus.c | 2 +- drivers/dax/super.c | 73 ++++++++++++++++++++++++++++++++++++ drivers/nvdimm/pmem.c | 4 ++ drivers/nvdimm/region_devs.c | 16 ++++++++ include/linux/dax.h | 16 ++++++++ include/linux/libnvdimm.h | 4 ++ mm/Kconfig | 3 ++ 7 files changed, 117 insertions(+), 1 deletion(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index df238c8b6ef2..8a825ecff49b 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -420,7 +420,7 @@ struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id, * No 'host' or dax_operations since there is no access to this * device outside of mmap of the resulting character device. */ - dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC); + dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC | DAXDEV_F_SYNC_ENABLED); if (IS_ERR(dax_dev)) { rc = PTR_ERR(dax_dev); goto err; diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 8e32345be0f7..f93e6649d452 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -198,6 +198,12 @@ enum dax_device_flags { DAXDEV_WRITE_CACHE, /* flag to check if device supports synchronous flush */ DAXDEV_SYNC, + /* + * flag to indicate whether synchronous flush is enabled. + * Some platform may want to disable synchronous flush support + * even though device supports the same. + */ + DAXDEV_SYNC_ENABLED, }; /** @@ -254,6 +260,63 @@ static ssize_t write_cache_store(struct device *dev, } static DEVICE_ATTR_RW(write_cache); +bool __dax_synchronous_enabled(struct dax_device *dax_dev) +{ + return test_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags); +} +EXPORT_SYMBOL_GPL(__dax_synchronous_enabled); + +static void set_dax_synchronous_enable(struct dax_device *dax_dev, bool enable) +{ + if (!test_bit(DAXDEV_SYNC, &dax_dev->flags)) + return; + + if (enable) + set_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags); + else + clear_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags); +} + + +static ssize_t sync_fault_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int enabled; + struct dax_device *dax_dev = dax_get_by_host(dev_name(dev)); + ssize_t rc; + + WARN_ON_ONCE(!dax_dev); + if (!dax_dev) + return -ENXIO; + + enabled = (dax_synchronous(dax_dev) && dax_synchronous_enabled(dax_dev)); + rc = sprintf(buf, "%d\n", enabled); + put_dax(dax_dev); + return rc; +} + +static ssize_t sync_fault_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + bool enable_sync; + int rc = strtobool(buf, &enable_sync); + struct dax_device *dax_dev = dax_get_by_host(dev_name(dev)); + + WARN_ON_ONCE(!dax_dev); + if (!dax_dev) + return -ENXIO; + + if (rc) + len = rc; + else + set_dax_synchronous_enable(dax_dev, enable_sync); + + put_dax(dax_dev); + return len; +} + +static DEVICE_ATTR_RW(sync_fault); + static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = container_of(kobj, typeof(*dev), kobj); @@ -267,11 +330,18 @@ static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n) if (a == &dev_attr_write_cache.attr) return 0; #endif + if (a == &dev_attr_sync_fault.attr) { + if (dax_write_cache_enabled(dax_dev)) + return a->mode; + return 0; + } + return a->mode; } static struct attribute *dax_attributes[] = { &dev_attr_write_cache.attr, + &dev_attr_sync_fault.attr, NULL, }; @@ -594,6 +664,9 @@ struct dax_device *alloc_dax(void *private, const char *__host, if (flags & DAXDEV_F_SYNC) set_dax_synchronous(dax_dev); + if (flags & DAXDEV_F_SYNC_ENABLED) + set_dax_synchronous_enable(dax_dev, true); + return dax_dev; err_dev: diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 97f948f8f4e6..a738b237a3ff 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -487,6 +487,10 @@ static int pmem_attach_disk(struct device *dev, if (is_nvdimm_sync(nd_region)) flags = DAXDEV_F_SYNC; + + if (is_nvdimm_sync_enabled(nd_region)) + flags |= DAXDEV_F_SYNC_ENABLED; + dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags); if (IS_ERR(dax_dev)) { put_disk(disk); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 88ea34a9c7fd..d60d159b812b 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1283,6 +1283,22 @@ bool is_nvdimm_sync(struct nd_region *nd_region) } EXPORT_SYMBOL_GPL(is_nvdimm_sync); +bool is_nvdimm_sync_enabled(struct nd_region *nd_region) +{ +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE + if (is_nd_volatile(&nd_region->dev)) + return true; + + return is_nd_pmem(&nd_region->dev) && + test_bit(ND_REGION_SYNC_ENABLED, &nd_region->flags); +#else + return true; +#endif + +} +EXPORT_SYMBOL_GPL(is_nvdimm_sync_enabled); + + struct conflict_context { struct nd_region *nd_region; resource_size_t start, size; diff --git a/include/linux/dax.h b/include/linux/dax.h index d7af5d243f24..c4a3551557de 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -10,6 +10,9 @@ /* Flag for synchronous flush */ #define DAXDEV_F_SYNC (1UL << 0) +/* flag for platform forcing synchronous flush disable */ +#define DAXDEV_F_SYNC_ENABLED (1UL << 1) + typedef unsigned long dax_entry_t; struct iomap_ops; @@ -59,6 +62,13 @@ static inline void set_dax_synchronous(struct dax_device *dax_dev) { __set_dax_synchronous(dax_dev); } + +bool __dax_synchronous_enabled(struct dax_device *dax_dev); +static inline bool dax_synchronous_enabled(struct dax_device *dax_dev) +{ + return __dax_synchronous_enabled(dax_dev); +} + /* * Check if given mapping is supported by the file / underlying device. */ @@ -69,6 +79,12 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, return true; if (!IS_DAX(file_inode(vma->vm_file))) return false; + /* + * check MAP_SYNC is disabled by platform for this device. + */ + if (!dax_synchronous_enabled(dax_dev)) + return false; + return dax_synchronous(dax_dev); } #else diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 66f6c65bd789..53d4d4b8b6b9 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -63,6 +63,9 @@ enum { /* Platform provides asynchronous flush mechanism */ ND_REGION_ASYNC = 3, + /* Platform wants to disable synchronous flush mechanism */ + ND_REGION_SYNC_ENABLED= 4, + /* mark newly adjusted resources as requiring a label update */ DPA_RESOURCE_ADJUSTED = 1 << 0, }; @@ -262,6 +265,7 @@ int nvdimm_has_flush(struct nd_region *nd_region); int nvdimm_has_cache(struct nd_region *nd_region); int nvdimm_in_overwrite(struct nvdimm *nvdimm); bool is_nvdimm_sync(struct nd_region *nd_region); +bool is_nvdimm_sync_enabled(struct nd_region *nd_region); static inline int nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) diff --git a/mm/Kconfig b/mm/Kconfig index cffc276fa19c..aebe7e9c079d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -868,4 +868,7 @@ config ARCH_HAS_HUGEPD config MAPPING_DIRTY_HELPERS bool +config ARCH_MAP_SYNC_DISABLE + bool + endmenu -- 2.26.2