Hi Dan / Mpe, I have sent out a v6 of this patch set that addresses your review comments so far. Also I have added a new doc patch in the patchset that adds documentation for PAPR_SCM_HEALTH hcall specification.
Requesting you to please review the new patchset at https://lore.kernel.org/linux-nvdimm/20200420070711.223545-1-vaib...@linux.ibm.com Thanks, ~ Vaibhav Jain Dan Williams <dan.j.willi...@intel.com> writes: > On Tue, Mar 31, 2020 at 7:33 AM Vaibhav Jain <vaib...@linux.ibm.com> wrote: >> >> This patch implements support for papr_scm command >> 'DSM_PAPR_SCM_HEALTH' that returns a newly introduced 'struct >> nd_papr_scm_dimm_health_stat' instance containing dimm health >> information back to user space in response to ND_CMD_CALL. This >> functionality is implemented in newly introduced papr_scm_get_health() >> that queries the scm-dimm health information and then copies these bitmaps >> to the package payload whose layout is defined by 'struct >> papr_scm_ndctl_health'. >> >> The patch also introduces a new member a new member 'struct >> papr_scm_priv.health' thats an instance of 'struct >> nd_papr_scm_dimm_health_stat' to cache the health information of a >> scm-dimm. As a result functions drc_pmem_query_health() and >> papr_flags_show() are updated to populate and use this new struct >> instead of two be64 integers that we earlier used. > > Link to HCALL specification? > >> >> Signed-off-by: Vaibhav Jain <vaib...@linux.ibm.com> >> --- >> Changelog: >> >> v4..v5: None >> >> v3..v4: Call the DSM_PAPR_SCM_HEALTH service function from >> papr_scm_service_dsm() instead of papr_scm_ndctl(). [Aneesh] >> >> v2..v3: Updated struct nd_papr_scm_dimm_health_stat_v1 to use '__xx' >> types as its exported to the userspace [Aneesh] >> Changed the constants DSM_PAPR_SCM_DIMM_XX indicating dimm >> health from enum to #defines [Aneesh] >> >> v1..v2: New patch in the series >> --- >> arch/powerpc/include/uapi/asm/papr_scm_dsm.h | 40 +++++++ >> arch/powerpc/platforms/pseries/papr_scm.c | 109 ++++++++++++++++--- >> 2 files changed, 132 insertions(+), 17 deletions(-) >> >> diff --git a/arch/powerpc/include/uapi/asm/papr_scm_dsm.h >> b/arch/powerpc/include/uapi/asm/papr_scm_dsm.h >> index c039a49b41b4..8265125304ca 100644 >> --- a/arch/powerpc/include/uapi/asm/papr_scm_dsm.h >> +++ b/arch/powerpc/include/uapi/asm/papr_scm_dsm.h >> @@ -132,6 +132,7 @@ struct nd_papr_scm_cmd_pkg { >> */ >> enum dsm_papr_scm { >> DSM_PAPR_SCM_MIN = 0x10000, >> + DSM_PAPR_SCM_HEALTH, >> DSM_PAPR_SCM_MAX, >> }; >> >> @@ -158,4 +159,43 @@ static void *papr_scm_pcmd_to_payload(struct >> nd_papr_scm_cmd_pkg *pcmd) >> else >> return (void *)((__u8 *) pcmd + pcmd->payload_offset); >> } >> + >> +/* Various scm-dimm health indicators */ >> +#define DSM_PAPR_SCM_DIMM_HEALTHY 0 >> +#define DSM_PAPR_SCM_DIMM_UNHEALTHY 1 >> +#define DSM_PAPR_SCM_DIMM_CRITICAL 2 >> +#define DSM_PAPR_SCM_DIMM_FATAL 3 >> + >> +/* >> + * Struct exchanged between kernel & ndctl in for PAPR_DSM_PAPR_SMART_HEALTH >> + * Various bitflags indicate the health status of the dimm. >> + * >> + * dimm_unarmed : Dimm not armed. So contents wont persist. >> + * dimm_bad_shutdown : Previous shutdown did not persist contents. >> + * dimm_bad_restore : Contents from previous shutdown werent restored. >> + * dimm_scrubbed : Contents of the dimm have been scrubbed. >> + * dimm_locked : Contents of the dimm cant be modified until CEC >> reboot >> + * dimm_encrypted : Contents of dimm are encrypted. >> + * dimm_health : Dimm health indicator. >> + */ >> +struct nd_papr_scm_dimm_health_stat_v1 { >> + __u8 dimm_unarmed; >> + __u8 dimm_bad_shutdown; >> + __u8 dimm_bad_restore; >> + __u8 dimm_scrubbed; >> + __u8 dimm_locked; >> + __u8 dimm_encrypted; >> + __u16 dimm_health; >> +}; > > Does the structure pack the same across different compilers and > configurations? > >> + >> +/* >> + * Typedef the current struct for dimm_health so that any application >> + * or kernel recompiled after introducing a new version automatically >> + * supports the new version. >> + */ >> +#define nd_papr_scm_dimm_health_stat nd_papr_scm_dimm_health_stat_v1 >> + >> +/* Current version number for the dimm health struct */ >> +#define ND_PAPR_SCM_DIMM_HEALTH_VERSION 1 >> + >> #endif /* _UAPI_ASM_POWERPC_PAPR_SCM_DSM_H_ */ >> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c >> b/arch/powerpc/platforms/pseries/papr_scm.c >> index e8ce96d2249e..ce94762954e0 100644 >> --- a/arch/powerpc/platforms/pseries/papr_scm.c >> +++ b/arch/powerpc/platforms/pseries/papr_scm.c >> @@ -47,8 +47,7 @@ struct papr_scm_priv { >> struct mutex dimm_mutex; >> >> /* Health information for the dimm */ >> - __be64 health_bitmap; >> - __be64 health_bitmap_valid; >> + struct nd_papr_scm_dimm_health_stat health; >> }; >> >> static int drc_pmem_bind(struct papr_scm_priv *p) >> @@ -158,6 +157,7 @@ static int drc_pmem_query_health(struct papr_scm_priv *p) >> { >> unsigned long ret[PLPAR_HCALL_BUFSIZE]; >> int64_t rc; >> + __be64 health; >> >> rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index); >> if (rc != H_SUCCESS) { >> @@ -172,13 +172,41 @@ static int drc_pmem_query_health(struct papr_scm_priv >> *p) >> return rc; >> >> /* Store the retrieved health information in dimm platform data */ >> - p->health_bitmap = ret[0]; >> - p->health_bitmap_valid = ret[1]; >> + health = ret[0] & ret[1]; >> >> dev_dbg(&p->pdev->dev, >> "Queried dimm health info. Bitmap:0x%016llx >> Mask:0x%016llx\n", >> - be64_to_cpu(p->health_bitmap), >> - be64_to_cpu(p->health_bitmap_valid)); >> + be64_to_cpu(ret[0]), >> + be64_to_cpu(ret[1])); >> + >> + memset(&p->health, 0, sizeof(p->health)); >> + >> + /* Check for various masks in bitmap and set the buffer */ >> + if (health & PAPR_SCM_DIMM_UNARMED_MASK) >> + p->health.dimm_unarmed = true; >> + >> + if (health & PAPR_SCM_DIMM_BAD_SHUTDOWN_MASK) >> + p->health.dimm_bad_shutdown = true; >> + >> + if (health & PAPR_SCM_DIMM_BAD_RESTORE_MASK) >> + p->health.dimm_bad_restore = true; >> + >> + if (health & PAPR_SCM_DIMM_ENCRYPTED) >> + p->health.dimm_encrypted = true; >> + >> + if (health & PAPR_SCM_DIMM_SCRUBBED_AND_LOCKED) { >> + p->health.dimm_locked = true; >> + p->health.dimm_scrubbed = true; >> + } > > I don't think bool is suitable for ioctl ABI. For example the true > value may be positive, or negative depending on the arch. This should > be using explicit integer values. > > I assume the reason you are translating this rather than transmitting > that raw 64-bit health value is for future compatibility if the HCALL > format changes / is extended? > >> + >> + if (health & PAPR_SCM_DIMM_HEALTH_UNHEALTHY) >> + p->health.dimm_health = DSM_PAPR_SCM_DIMM_UNHEALTHY; >> + >> + if (health & PAPR_SCM_DIMM_HEALTH_CRITICAL) >> + p->health.dimm_health = DSM_PAPR_SCM_DIMM_CRITICAL; >> + >> + if (health & PAPR_SCM_DIMM_HEALTH_FATAL) >> + p->health.dimm_health = DSM_PAPR_SCM_DIMM_FATAL; > >> >> mutex_unlock(&p->dimm_mutex); >> return 0; >> @@ -331,6 +359,51 @@ static int is_cmd_valid(struct nvdimm *nvdimm, unsigned >> int cmd, void *buf, >> return 0; >> } >> >> +/* Fetch the DIMM health info and populate it in provided package. */ >> +static int papr_scm_get_health(struct papr_scm_priv *p, >> + struct nd_papr_scm_cmd_pkg *pkg) >> +{ >> + int rc; >> + size_t copysize = sizeof(p->health); >> + >> + rc = drc_pmem_query_health(p); >> + if (rc) >> + goto out; >> + /* >> + * If the requested payload version is greater than one we know >> + * about, return the payload version we know about and let >> + * caller/userspace handle. >> + */ >> + if (pkg->payload_version > ND_PAPR_SCM_DIMM_HEALTH_VERSION) >> + pkg->payload_version = ND_PAPR_SCM_DIMM_HEALTH_VERSION; >> + >> + if (pkg->hdr.nd_size_out < copysize) { >> + dev_dbg(&p->pdev->dev, "%s Payload not large enough\n", >> + __func__); >> + dev_dbg(&p->pdev->dev, "%s Expected %lu, available %u\n", >> + __func__, copysize, pkg->hdr.nd_size_out); >> + rc = -ENOSPC; >> + goto out; >> + } >> + >> + dev_dbg(&p->pdev->dev, "%s Copying payload size=%lu version=0x%x\n", >> + __func__, copysize, pkg->payload_version); >> + >> + /* Copy a subset of health struct based on copysize */ >> + memcpy(papr_scm_pcmd_to_payload(pkg), &p->health, copysize); >> + pkg->hdr.nd_fw_size = copysize; >> + >> +out: >> + /* >> + * Put the error in out package and return success from function >> + * so that errors if any are propogated back to userspace. >> + */ >> + pkg->cmd_status = rc; >> + dev_dbg(&p->pdev->dev, "%s completion code = %d\n", __func__, rc); >> + >> + return 0; >> +} >> + >> static int papr_scm_service_dsm(struct papr_scm_priv *p, >> struct nd_papr_scm_cmd_pkg *call_pkg) >> { >> @@ -345,6 +418,9 @@ static int papr_scm_service_dsm(struct papr_scm_priv *p, >> >> /* Depending on the DSM command call appropriate service routine */ >> switch (call_pkg->hdr.nd_command) { >> + case DSM_PAPR_SCM_HEALTH: >> + return papr_scm_get_health(p, call_pkg); >> + >> default: >> pr_debug("Unsupported DSM command 0x%llx\n", >> call_pkg->hdr.nd_command); >> @@ -431,7 +507,6 @@ static ssize_t papr_flags_show(struct device *dev, >> { >> struct nvdimm *dimm = to_nvdimm(dev); >> struct papr_scm_priv *p = nvdimm_provider_data(dimm); >> - __be64 health; >> int rc; >> >> rc = drc_pmem_query_health(p); >> @@ -443,26 +518,26 @@ static ssize_t papr_flags_show(struct device *dev, >> if (rc) >> return rc; >> >> - health = p->health_bitmap & p->health_bitmap_valid; >> - >> - /* Check for various masks in bitmap and set the buffer */ >> - if (health & PAPR_SCM_DIMM_UNARMED_MASK) >> + if (p->health.dimm_unarmed) >> rc += sprintf(buf, "not_armed "); >> >> - if (health & PAPR_SCM_DIMM_BAD_SHUTDOWN_MASK) >> + if (p->health.dimm_bad_shutdown) >> rc += sprintf(buf + rc, "save_fail "); > > Per my patch1 comment is this "save_fail" or "flush_fail"? > >> >> - if (health & PAPR_SCM_DIMM_BAD_RESTORE_MASK) >> + if (p->health.dimm_bad_restore) >> rc += sprintf(buf + rc, "restore_fail "); >> >> - if (health & PAPR_SCM_DIMM_ENCRYPTED) >> + if (p->health.dimm_encrypted) >> rc += sprintf(buf + rc, "encrypted "); >> >> - if (health & PAPR_SCM_DIMM_SMART_EVENT_MASK) >> + if (p->health.dimm_health) >> rc += sprintf(buf + rc, "smart_notify "); >> >> - if (health & PAPR_SCM_DIMM_SCRUBBED_AND_LOCKED) >> - rc += sprintf(buf + rc, "scrubbed locked "); >> + if (p->health.dimm_scrubbed) >> + rc += sprintf(buf + rc, "scrubbed "); >> + >> + if (p->health.dimm_locked) >> + rc += sprintf(buf + rc, "locked "); >> >> if (rc > 0) >> rc += sprintf(buf + rc, "\n"); >> -- >> 2.25.1 >> --