On Tue, 2017-06-13 at 23:26 +0530, Shilpasri G Bhat wrote:
> In P9, OCC (On-Chip-Controller) supports shared memory based
> commad-response interface. Within the shared memory there is an OPAL
> command buffer and OCC response buffer that can be used to send
> inband commands to OCC. This patch adds a platform driver to support
> the command/response interface between OCC and the host.
> 

Hi Shilpasri,

I have another question about printing of errors, see below. Otherwise
looks better.

When you're dropping the locks I wonder why you couldn't just use
atomic_set().

Finally, as you say below, if a user does a read() without having done
a write(), there isn't really any guarantee as to what they get. They
will get the previous response if there had been a write() call, what
about if there had never been a write() call? 
Could this be considered a security problem? Imagine that one program
is using the file descriptor and a malicious program keeps trying to
get the file descriptor and then the program which has the file
descriptor crashes? What information might the malicious program get?

More inline,

Cyril

> Signed-off-by: Shilpasri G Bhat <shilpa.b...@linux.vnet.ibm.com>
> ---
> Changes from V2:
> - Remove spinlock and use atomic_t for setting and clearing flags
> - Fix endian swapping
> - Use pa() and va() before and after opal call for accessing buffer
>   data
> - Replace (u8 *) with __be64 for buffer pointers
> - User reads the previous OCC response if the user does a read()
>   before a write(). Is this wrong?
> - Add WARN_ON check for nr_occs > 254
> 
>  arch/powerpc/include/asm/opal-api.h            |  41 +++-
>  arch/powerpc/include/asm/opal.h                |   3 +
>  arch/powerpc/platforms/powernv/Makefile        |   2 +-
>  arch/powerpc/platforms/powernv/opal-occ.c      | 314 
> +++++++++++++++++++++++++
>  arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
>  arch/powerpc/platforms/powernv/opal.c          |   8 +
>  6 files changed, 367 insertions(+), 2 deletions(-)
>  create mode 100644 arch/powerpc/platforms/powernv/opal-occ.c
> 
> diff --git a/arch/powerpc/include/asm/opal-api.h 
> b/arch/powerpc/include/asm/opal-api.h
> index cb3e624..011d86c 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -42,6 +42,10 @@
>  #define OPAL_I2C_STOP_ERR    -24
>  #define OPAL_XIVE_PROVISIONING       -31
>  #define OPAL_XIVE_FREE_ACTIVE        -32
> +#define OPAL_OCC_INVALID_STATE       -33
> +#define OPAL_OCC_BUSY                -34
> +#define OPAL_OCC_CMD_TIMEOUT -35
> +#define OPAL_OCC_RSP_MISMATCH        -36
>  
>  /* API Tokens (in r0) */
>  #define OPAL_INVALID_CALL                   -1
> @@ -190,7 +194,8 @@
>  #define OPAL_NPU_INIT_CONTEXT                        146
>  #define OPAL_NPU_DESTROY_CONTEXT             147
>  #define OPAL_NPU_MAP_LPAR                    148
> -#define OPAL_LAST                            148
> +#define OPAL_OCC_COMMAND                     149
> +#define OPAL_LAST                            149
>  
>  /* Device tree flags */
>  
> @@ -829,6 +834,40 @@ struct opal_prd_msg_header {
>  
>  struct opal_prd_msg;
>  
> +enum occ_cmd {
> +     OCC_CMD_AMESTER_PASS_THRU = 0,
> +     OCC_CMD_CLEAR_SENSOR_DATA,
> +     OCC_CMD_SET_POWER_CAP,
> +     OCC_CMD_SET_POWER_SHIFTING_RATIO,
> +     OCC_CMD_SELECT_SENSOR_GROUPS,
> +     OCC_CMD_LAST
> +};
> +
> +struct opal_occ_cmd_rsp_msg {
> +     __be64 cdata;
> +     __be64 rdata;
> +     __be16 cdata_size;
> +     __be16 rdata_size;
> +     u8 cmd;
> +     u8 request_id;
> +     u8 status;
> +};
> +
> +struct opal_occ_cmd_data {
> +     __be16 size;
> +     u8 cmd;
> +     u8 data[];
> +};
> +
> +struct opal_occ_rsp_data {
> +     __be16 size;
> +     u8 status;
> +     u8 data[];
> +};
> +
> +#define MAX_OPAL_CMD_DATA_LENGTH        4090
> +#define MAX_OCC_RSP_DATA_LENGTH         8698
> +
>  #define OCC_RESET                       0
>  #define OCC_LOAD                        1
>  #define OCC_THROTTLE                    2
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index 03ed493..e55ed79 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -346,6 +346,9 @@ static inline int opal_get_async_rc(struct opal_msg msg)
>  
>  void opal_wake_poller(void);
>  
> +int64_t opal_occ_command(int chip_id, struct opal_occ_cmd_rsp_msg *msg,
> +                      bool retry);
> +
>  #endif /* __ASSEMBLY__ */
>  
>  #endif /* _ASM_POWERPC_OPAL_H */
> diff --git a/arch/powerpc/platforms/powernv/Makefile 
> b/arch/powerpc/platforms/powernv/Makefile
> index b5d98cb..f5f0902 100644
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -2,7 +2,7 @@ obj-y                 += setup.o opal-wrappers.o opal.o 
> opal-async.o idle.o
>  obj-y                        += opal-rtc.o opal-nvram.o opal-lpc.o 
> opal-flash.o
>  obj-y                        += rng.o opal-elog.o opal-dump.o 
> opal-sysparam.o opal-sensor.o
>  obj-y                        += opal-msglog.o opal-hmi.o opal-power.o 
> opal-irqchip.o
> -obj-y                        += opal-kmsg.o
> +obj-y                        += opal-kmsg.o opal-occ.o
>  
>  obj-$(CONFIG_SMP)    += smp.o subcore.o subcore-asm.o
>  obj-$(CONFIG_PCI)    += pci.o pci-ioda.o npu-dma.o
> diff --git a/arch/powerpc/platforms/powernv/opal-occ.c 
> b/arch/powerpc/platforms/powernv/opal-occ.c
> new file mode 100644
> index 0000000..912cdc4
> --- /dev/null
> +++ b/arch/powerpc/platforms/powernv/opal-occ.c
> @@ -0,0 +1,314 @@
> +/*
> + * Copyright IBM Corporation 2017
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#define pr_fmt(fmt) "opal-occ: " fmt
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +#include <linux/miscdevice.h>
> +#include <linux/highmem.h>
> +#include <linux/slab.h>
> +#include <asm/opal.h>
> +
> +struct occ {
> +     struct miscdevice dev;
> +     struct opal_occ_rsp_data *rsp;
> +     atomic_t session;
> +     atomic_t cmd_in_progress;
> +     int id;
> +     u8 last_token;
> +} *occs;
> +static int nr_occs;
> +
> +static int __send_occ_command(struct opal_occ_cmd_rsp_msg *msg,
> +                           int chip_id, bool retry)
> +{
> +     struct opal_msg async_msg;
> +     int rc;
> +
> +     if (!retry) {
> +             msg->cdata = cpu_to_be64(__pa(msg->cdata));
> +             msg->cdata_size = cpu_to_be16(msg->cdata_size);
> +             msg->rdata = cpu_to_be64(__pa(msg->rdata));
> +     }
> +
> +     rc = opal_occ_command(chip_id, msg, retry);
> +     if (rc == OPAL_ASYNC_COMPLETION) {
> +             rc = opal_async_wait_response(msg->request_id, &async_msg);
> +             if (rc) {
> +                     pr_info("Failed to wait for async response %d\n", rc);
> +                     return rc;
> +             }
> +     } else if (rc) {
> +             pr_info("Failed to send opal_occ_command %d\n", rc);
> +             return rc;
> +     }
> +
> +     rc = opal_get_async_rc(async_msg);
> +     if (rc) {
> +             pr_info("opal_occ_command failed with %d\n", rc);
> +             return rc;
> +     }

If you get a timeout or a response mismatch you're going to retry. Is
this because you expect this to happen generally in the course of using
this interface - if so, should you print a message or would it be best
to say nothing? Also is pr_info() a good level for these?

> +
> +     msg->rdata_size = be16_to_cpu(msg->rdata_size);
> +     msg->rdata = be64_to_cpu(__va(msg->rdata));
> +     msg->cdata_size = be16_to_cpu(msg->cdata_size);
> +     msg->cdata = be64_to_cpu(__va(msg->cdata));
> +
> +     if (msg->rdata_size > MAX_OCC_RSP_DATA_LENGTH) {
> +             pr_info("Opal sent bigger data, clipping to the max response 
> size\n");
> +             msg->rdata_size = MAX_OCC_RSP_DATA_LENGTH;
> +     }
> +
> +     return rc;
> +}
> +
> +static int send_occ_command(struct opal_occ_cmd_rsp_msg *msg, struct occ 
> *occ)
> +{
> +     int token, rc;
> +
> +     token = opal_async_get_unique_token_interruptible(occ->last_token);
> +     if (token < 0) {
> +             pr_info("Failed to get the request_id/token for command %d 
> (%d)\n",
> +                     msg->cmd, token);
> +             return token;
> +     }
> +
> +     msg->request_id = token;
> +     rc = __send_occ_command(msg, occ->id, false);
> +
> +     switch (rc) {
> +     case OPAL_OCC_CMD_TIMEOUT:
> +     case OPAL_OCC_RSP_MISMATCH:
> +             occ->last_token = token;
> +             opal_async_release_token(token);
> +             token = opal_async_get_unique_token_interruptible(token);
> +             if (token < 0) {
> +                     pr_info("Failed to get the request_id/token for retry 
> command %d (%d)\n",
> +                             msg->cmd, token);
> +
> +                     return opal_error_code(rc);
> +             }
> +
> +             msg->request_id = token;
> +             rc = __send_occ_command(msg, occ->id, true);
> +             break;
> +     default:
> +             break;
> +     }
> +
> +     occ->last_token = token;
> +     opal_async_release_token(token);
> +     return opal_error_code(rc);
> +}
> +
> +static int opal_occ_cmd_prepare(struct opal_occ_cmd_data *cmd, struct occ 
> *occ)
> +{
> +     struct opal_occ_cmd_rsp_msg msg;
> +     int rc;
> +
> +     msg.cmd = cmd->cmd;
> +     msg.cdata_size = cmd->size;
> +     msg.rdata = (u64)occ->rsp->data;
> +     msg.cdata = (u64)cmd->data;

I'm surprised this doesn't throw (gcc) warnings, I certainly expect it
will throw sparse warnings.

You've done the right thing and annotated your struct
opal_occ_cmd_rsp_msg with __be64 which is what skiboot expects,
however, at this point it is still in CPU endian and it can get
confusing. If a type is marked as a specific endian, it should *always*
be in that endian. In this case it may (depending on what the CPU
endian is) be in the wrong endian until we get to __send_occ_command()
where you do the conversion. Is there any reason you can't convert them
here?

> +     rc = send_occ_command(&msg, occ);
> +     if (rc)
> +             return rc;
> +
> +     occ->rsp->size = msg.rdata_size;
> +     occ->rsp->status = msg.status;
> +
> +     return rc;
> +}
> +
> +static ssize_t opal_occ_write(struct file *file, const char __user *buf,
> +                           size_t count, loff_t *ppos)
> +{
> +     struct miscdevice *dev = file->private_data;
> +     struct occ *occ = container_of(dev, struct occ, dev);
> +     struct opal_occ_cmd_data *cmd;
> +     int rc;
> +
> +     if (count < sizeof(*cmd))
> +             return -EINVAL;
> +
> +     if (atomic_xchg(&occ->cmd_in_progress, 1) == 1)
> +             return -EBUSY;
> +
> +     cmd = kmalloc(count, GFP_KERNEL);
> +     if (!cmd) {
> +             rc = -ENOMEM;
> +             goto out;
> +     }
> +
> +     rc = copy_from_user(cmd, buf, count);
> +     if (rc) {
> +             pr_err("Failed to copy OCC command request message\n");
> +             rc = -EFAULT;
> +             goto free_cmd;
> +     }
> +
> +     if (cmd->size > MAX_OPAL_CMD_DATA_LENGTH) {
> +             rc = -EINVAL;
> +             goto free_cmd;
> +     }
> +
> +     rc = opal_occ_cmd_prepare(cmd, occ);
> +     if (!rc)
> +             rc = count;
> +
> +free_cmd:
> +     kfree(cmd);
> +out:
> +     atomic_xchg(&occ->cmd_in_progress, 0);
> +     return rc;
> +}
> +
> +static ssize_t opal_occ_read(struct file *file, char __user *buf,
> +                          size_t count, loff_t *ppos)
> +{
> +     struct miscdevice *dev = file->private_data;
> +     struct occ *occ = container_of(dev, struct occ, dev);
> +     int rc;
> +
> +     if (count < sizeof(*occ->rsp) + occ->rsp->size)
> +             return -EINVAL;
> +
> +     rc = copy_to_user((void __user *)buf, occ->rsp,
> +                       sizeof(occ->rsp) + occ->rsp->size);
> +     if (rc) {
> +             pr_err("Failed to copy OCC response data to user\n");
> +             return rc;
> +     }
> +
> +     return sizeof(*occ->rsp) + occ->rsp->size;
> +}
> +
> +static int opal_occ_open(struct inode *inode, struct file *file)
> +{
> +     struct miscdevice *dev = file->private_data;
> +     struct occ *occ = container_of(dev, struct occ, dev);
> +
> +     if (atomic_xchg(&occ->session, 1) == 1)
> +             return -EBUSY;
> +
> +     return 0;
> +}
> +
> +static int opal_occ_release(struct inode *inode, struct file *file)
> +{
> +     struct miscdevice *dev = file->private_data;
> +     struct occ *occ = container_of(dev, struct occ, dev);
> +
> +     atomic_xchg(&occ->session, 0);
> +
> +     return 0;
> +}
> +
> +static const struct file_operations opal_occ_fops = {
> +     .open           = opal_occ_open,
> +     .read           = opal_occ_read,
> +     .write          = opal_occ_write,
> +     .release        = opal_occ_release,
> +     .owner          = THIS_MODULE,
> +};
> +
> +static int opal_occ_probe(struct platform_device *pdev)
> +{
> +     unsigned int chip[256];
> +     unsigned int cpu;
> +     unsigned int prev_chip_id = UINT_MAX;
> +     int i, rc;
> +
> +     for_each_possible_cpu(cpu) {
> +             unsigned int id = cpu_to_chip_id(cpu);
> +
> +             if (prev_chip_id != id) {
> +                     prev_chip_id = id;
> +                     chip[nr_occs++] = id;
> +                     WARN_ON(nr_occs > 254);

Perhaps have some MAX_POSSIBLE_CHIPS or something? Also might I suggest
WARN_ON_ONCE just because if there's no point printing the warning more
than once but this code could print it a bunch of times...

> +             }
> +     }
> +
> +     occs = kcalloc(nr_occs, sizeof(*occs), GFP_KERNEL);
> +     if (!occs)
> +             return -ENOMEM;
> +
> +     for (i = 0; i < nr_occs; i++) {
> +             char name[10];
> +
> +             occs[i].id = chip[i];
> +             occs[i].dev.minor = MISC_DYNAMIC_MINOR;
> +             snprintf(name, 10, "occ%d", chip[i]);
> +             occs[i].dev.name = name;
> +             occs[i].dev.fops = &opal_occ_fops;
> +             occs[i].last_token = -1;
> +             occs[i].rsp = kmalloc(sizeof(occs[i].rsp) +
> +                                   MAX_OCC_RSP_DATA_LENGTH,
> +                                   GFP_KERNEL);
> +             if (!occs[i].rsp) {
> +                     rc = -ENOMEM;
> +                     goto free_occs;
> +             }
> +
> +             rc = misc_register(&occs[i].dev);
> +             if (rc)
> +                     goto free_occ_rsp_data;
> +     }
> +
> +     return 0;
> +
> +free_occ_rsp_data:
> +     kfree(occs[i].rsp);
> +free_occs:
> +     while (--i >= 0) {
> +             kfree(occs[i].rsp);
> +             misc_deregister(&occs[i].dev);
> +     }
> +     kfree(occs);
> +
> +     return rc;
> +}
> +
> +static int opal_occ_remove(struct platform_device *pdev)
> +{
> +     int i;
> +
> +     for (i = 0; i < nr_occs; i++) {
> +             kfree(occs[i].rsp);
> +             misc_deregister(&occs[i].dev);
> +     }
> +
> +     kfree(occs);
> +     return 0;
> +}
> +
> +static const struct of_device_id opal_occ_match[] = {
> +     { .compatible = "ibm,opal-occ-cmd-rsp-interface" },
> +     { },
> +};
> +
> +static struct platform_driver opal_occ_driver = {
> +     .driver = {
> +             .name           = "opal-occ",
> +             .of_match_table = opal_occ_match,
> +     },
> +     .probe  = opal_occ_probe,
> +     .remove = opal_occ_remove,
> +};
> +
> +module_platform_driver(opal_occ_driver);
> +
> +MODULE_DESCRIPTION("PowerNV OPAL-OCC driver");
> +MODULE_LICENSE("GPL");
> diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
> b/arch/powerpc/platforms/powernv/opal-wrappers.S
> index f620572..e6bf18d 100644
> --- a/arch/powerpc/platforms/powernv/opal-wrappers.S
> +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
> @@ -310,3 +310,4 @@ OPAL_CALL(opal_xive_dump,                 OPAL_XIVE_DUMP);
>  OPAL_CALL(opal_npu_init_context,             OPAL_NPU_INIT_CONTEXT);
>  OPAL_CALL(opal_npu_destroy_context,          OPAL_NPU_DESTROY_CONTEXT);
>  OPAL_CALL(opal_npu_map_lpar,                 OPAL_NPU_MAP_LPAR);
> +OPAL_CALL(opal_occ_command,                  OPAL_OCC_COMMAND);
> diff --git a/arch/powerpc/platforms/powernv/opal.c 
> b/arch/powerpc/platforms/powernv/opal.c
> index 59684b4..d87c61b 100644
> --- a/arch/powerpc/platforms/powernv/opal.c
> +++ b/arch/powerpc/platforms/powernv/opal.c
> @@ -815,6 +815,9 @@ static int __init opal_init(void)
>       opal_pdev_init("ibm,opal-flash");
>       opal_pdev_init("ibm,opal-prd");
>  
> +     /* Initialize platform device: OCC_OPAL command-response interface */
> +     opal_pdev_init("ibm,opal-occ-cmd-rsp-interface");
> +
>       /* Initialise platform device: oppanel interface */
>       opal_pdev_init("ibm,opal-oppanel");
>  
> @@ -859,6 +862,7 @@ void opal_shutdown(void)
>  EXPORT_SYMBOL_GPL(opal_flash_write);
>  EXPORT_SYMBOL_GPL(opal_flash_erase);
>  EXPORT_SYMBOL_GPL(opal_prd_msg);
> +EXPORT_SYMBOL_GPL(opal_occ_command);
>  
>  /* Convert a region of vmalloc memory to an opal sg list */
>  struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
> @@ -937,6 +941,10 @@ int opal_error_code(int rc)
>       case OPAL_UNSUPPORTED:          return -EIO;
>       case OPAL_HARDWARE:             return -EIO;
>       case OPAL_INTERNAL_ERROR:       return -EIO;
> +     case OPAL_OCC_BUSY:             return -EBUSY;
> +     case OPAL_OCC_INVALID_STATE:
> +     case OPAL_OCC_CMD_TIMEOUT:
> +     case OPAL_OCC_RSP_MISMATCH:     return -EIO;
>       default:
>               pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
>               return -EIO;

Reply via email to