On 2020/08/27 22:50, Niklas Cassel wrote:
> Add support for user space to set a max open zone and a max active zone
> limit via configfs. By default, the default values are 0 == no limit.
> 
> Call the block layer API functions used for exposing the configured
> limits to sysfs.
> 
> Add accounting in null_blk_zoned so that these new limits are respected.
> Performing an operating that would exceed these limits results in a

Performing a write operation that would result in exceeding these...

> standard I/O error.
> 
> A max open zone limit exists in the ZBC standard.
> While null_blk_zoned is used to test the Zoned Block Device model in
> Linux, when it comes to differences between ZBC and ZNS, null_blk_zoned
> mostly follows ZBC.
> 
> Therefore, implement the manage open zone resources function from ZBC,
> but additionally add support for max active zones.
> This enables user space not only to test against a device with an open
> zone limit, but also to test against a device with an active zone limit.
> 
> Signed-off-by: Niklas Cassel <niklas.cas...@wdc.com>
> ---
> Changes since v1:
> -Fixed review comments by Damien Le Moal.
> 
>  drivers/block/null_blk.h       |   5 +
>  drivers/block/null_blk_main.c  |  16 +-
>  drivers/block/null_blk_zoned.c | 319 +++++++++++++++++++++++++++------
>  3 files changed, 282 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
> index daed4a9c34367..d2e7db43a52a7 100644
> --- a/drivers/block/null_blk.h
> +++ b/drivers/block/null_blk.h
> @@ -42,6 +42,9 @@ struct nullb_device {
>       struct badblocks badblocks;
>  
>       unsigned int nr_zones;
> +     unsigned int nr_zones_imp_open;
> +     unsigned int nr_zones_exp_open;
> +     unsigned int nr_zones_closed;
>       struct blk_zone *zones;
>       sector_t zone_size_sects;
>  
> @@ -51,6 +54,8 @@ struct nullb_device {
>       unsigned long zone_size; /* zone size in MB if device is zoned */
>       unsigned long zone_capacity; /* zone capacity in MB if device is zoned 
> */
>       unsigned int zone_nr_conv; /* number of conventional zones */
> +     unsigned int zone_max_open; /* max number of open zones */
> +     unsigned int zone_max_active; /* max number of active zones */
>       unsigned int submit_queues; /* number of submission queues */
>       unsigned int home_node; /* home node for the device */
>       unsigned int queue_mode; /* block interface */
> diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
> index d74443a9c8fa2..53161a418611b 100644
> --- a/drivers/block/null_blk_main.c
> +++ b/drivers/block/null_blk_main.c
> @@ -208,6 +208,14 @@ static unsigned int g_zone_nr_conv;
>  module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
>  MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block 
> device is zoned. Default: 0");
>  
> +static unsigned int g_zone_max_open;
> +module_param_named(zone_max_open, g_zone_max_open, uint, 0444);
> +MODULE_PARM_DESC(zone_max_open, "Maximum number of open zones when block 
> device is zoned. Default: 0 (no limit)");
> +
> +static unsigned int g_zone_max_active;
> +module_param_named(zone_max_active, g_zone_max_active, uint, 0444);
> +MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block 
> device is zoned. Default: 0 (no limit)");
> +
>  static struct nullb_device *null_alloc_dev(void);
>  static void null_free_dev(struct nullb_device *dev);
>  static void null_del_dev(struct nullb *nullb);
> @@ -347,6 +355,8 @@ NULLB_DEVICE_ATTR(zoned, bool, NULL);
>  NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
>  NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
>  NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
> +NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
> +NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
>  
>  static ssize_t nullb_device_power_show(struct config_item *item, char *page)
>  {
> @@ -464,6 +474,8 @@ static struct configfs_attribute *nullb_device_attrs[] = {
>       &nullb_device_attr_zone_size,
>       &nullb_device_attr_zone_capacity,
>       &nullb_device_attr_zone_nr_conv,
> +     &nullb_device_attr_zone_max_open,
> +     &nullb_device_attr_zone_max_active,
>       NULL,
>  };
>  
> @@ -517,7 +529,7 @@ nullb_group_drop_item(struct config_group *group, struct 
> config_item *item)
>  static ssize_t memb_group_features_show(struct config_item *item, char *page)
>  {
>       return snprintf(page, PAGE_SIZE,
> -                     
> "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv\n");
> +                     
> "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active\n");
>  }
>  
>  CONFIGFS_ATTR_RO(memb_group_, features);
> @@ -580,6 +592,8 @@ static struct nullb_device *null_alloc_dev(void)
>       dev->zone_size = g_zone_size;
>       dev->zone_capacity = g_zone_capacity;
>       dev->zone_nr_conv = g_zone_nr_conv;
> +     dev->zone_max_open = g_zone_max_open;
> +     dev->zone_max_active = g_zone_max_active;
>       return dev;
>  }
>  
> diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
> index 3d25c9ad23831..e7e341e811fbf 100644
> --- a/drivers/block/null_blk_zoned.c
> +++ b/drivers/block/null_blk_zoned.c
> @@ -51,6 +51,22 @@ int null_init_zoned_dev(struct nullb_device *dev, struct 
> request_queue *q)
>                       dev->zone_nr_conv);
>       }
>  
> +     /* Max active zones has to be < nbr of seq zones in order to be 
> enforceable */
> +     if (dev->zone_max_active >= dev->nr_zones - dev->zone_nr_conv) {
> +             dev->zone_max_active = 0;
> +             pr_info("zone_max_active limit disabled, limit >= zone 
> count\n");
> +     }
> +
> +     /* Max open zones has to be <= max active zones */
> +     if (dev->zone_max_active && dev->zone_max_open > dev->zone_max_active) {
> +             dev->zone_max_open = dev->zone_max_active;
> +             pr_info("changed the maximum number of open zones to %u\n",
> +                     dev->nr_zones);
> +     } else if (dev->zone_max_open >= dev->nr_zones - dev->zone_nr_conv) {
> +             dev->zone_max_open = 0;
> +             pr_info("zone_max_open limit disabled, limit >= zone count\n");
> +     }
> +
>       for (i = 0; i <  dev->zone_nr_conv; i++) {
>               struct blk_zone *zone = &dev->zones[i];
>  
> @@ -99,6 +115,8 @@ int null_register_zoned_dev(struct nullb *nullb)
>       }
>  
>       blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
> +     blk_queue_max_open_zones(q, dev->zone_max_open);
> +     blk_queue_max_active_zones(q, dev->zone_max_active);
>  
>       return 0;
>  }
> @@ -159,6 +177,103 @@ size_t null_zone_valid_read_len(struct nullb *nullb,
>       return (zone->wp - sector) << SECTOR_SHIFT;
>  }
>  
> +static blk_status_t null_close_zone(struct nullb_device *dev, struct 
> blk_zone *zone)
> +{
> +     if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
> +             return BLK_STS_IOERR;
> +
> +     switch (zone->cond) {
> +     case BLK_ZONE_COND_CLOSED:
> +             /* close operation on closed is not an error */
> +             return BLK_STS_OK;
> +     case BLK_ZONE_COND_IMP_OPEN:
> +             dev->nr_zones_imp_open--;
> +             break;
> +     case BLK_ZONE_COND_EXP_OPEN:
> +             dev->nr_zones_exp_open--;
> +             break;
> +     case BLK_ZONE_COND_EMPTY:
> +     case BLK_ZONE_COND_FULL:
> +     default:
> +             return BLK_STS_IOERR;
> +     }
> +
> +     if (zone->wp == zone->start) {
> +             zone->cond = BLK_ZONE_COND_EMPTY;
> +     } else {
> +             zone->cond = BLK_ZONE_COND_CLOSED;
> +             dev->nr_zones_closed++;
> +     }
> +
> +     return BLK_STS_OK;
> +}
> +
> +static void null_close_first_imp_zone(struct nullb_device *dev)
> +{
> +     unsigned int i;
> +
> +     for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
> +             if (dev->zones[i].cond == BLK_ZONE_COND_IMP_OPEN) {
> +                     null_close_zone(dev, &dev->zones[i]);
> +                     return;
> +             }
> +     }
> +}
> +
> +static bool null_can_set_active(struct nullb_device *dev)
> +{
> +     if (!dev->zone_max_active)
> +             return true;
> +
> +     return dev->nr_zones_exp_open + dev->nr_zones_imp_open +
> +            dev->nr_zones_closed < dev->zone_max_active;
> +}
> +
> +static bool null_can_open(struct nullb_device *dev)
> +{
> +     if (!dev->zone_max_open)
> +             return true;
> +
> +     if (dev->nr_zones_exp_open + dev->nr_zones_imp_open < 
> dev->zone_max_open)
> +             return true;
> +
> +     if (dev->nr_zones_imp_open && null_can_set_active(dev)) {
> +             null_close_first_imp_zone(dev);
> +             return true;
> +     }
> +
> +     return false;
> +}
> +
> +/*
> + * This function matches the manage open zone resources function in the ZBC 
> standard,
> + * with the addition of max active zones support (added in the ZNS standard).
> + *
> + * The function determines if a zone can transition to implicit open or 
> explicit open,
> + * while maintaining the max open zone (and max active zone) limit(s). It 
> may close an
> + * implicit open zone in order to make additional zone resources available.
> + *
> + * ZBC states that an implicit open zone shall be closed only if there is not
> + * room within the open limit. However, with the addition of an active limit,
> + * it is not certain that closing an implicit open zone will allow a new zone
> + * to be opened, since we might already be at the active limit capacity.
> + */
> +static bool null_manage_zone_resources(struct nullb_device *dev, struct 
> blk_zone *zone)

I still do not like the name. Since this return a bool, what about
null_has_zone_resources() ?

> +{
> +     switch (zone->cond) {
> +     case BLK_ZONE_COND_EMPTY:
> +             if (!null_can_set_active(dev))
> +                     return false;
> +             fallthrough;
> +     case BLK_ZONE_COND_CLOSED:
> +             return null_can_open(dev);
> +     default:
> +             /* Should never be called for other states */
> +             WARN_ON(1);
> +             return false;
> +     }
> +}
> +
>  static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
>                                   unsigned int nr_sectors, bool append)
>  {
> @@ -177,43 +292,155 @@ static blk_status_t null_zone_write(struct nullb_cmd 
> *cmd, sector_t sector,
>               /* Cannot write to a full zone */
>               return BLK_STS_IOERR;
>       case BLK_ZONE_COND_EMPTY:
> +     case BLK_ZONE_COND_CLOSED:
> +             if (!null_manage_zone_resources(dev, zone))
> +                     return BLK_STS_IOERR;
> +             break;
>       case BLK_ZONE_COND_IMP_OPEN:
>       case BLK_ZONE_COND_EXP_OPEN:
> +             break;
> +     default:
> +             /* Invalid zone condition */
> +             return BLK_STS_IOERR;
> +     }
> +
> +     /*
> +      * Regular writes must be at the write pointer position.
> +      * Zone append writes are automatically issued at the write
> +      * pointer and the position returned using the request or BIO
> +      * sector.
> +      */
> +     if (append) {
> +             sector = zone->wp;
> +             if (cmd->bio)
> +                     cmd->bio->bi_iter.bi_sector = sector;
> +             else
> +                     cmd->rq->__sector = sector;
> +     } else if (sector != zone->wp) {
> +             return BLK_STS_IOERR;
> +     }
> +
> +     if (zone->wp + nr_sectors > zone->start + zone->capacity)
> +             return BLK_STS_IOERR;
> +
> +     if (zone->cond == BLK_ZONE_COND_CLOSED) {
> +             dev->nr_zones_closed--;
> +             dev->nr_zones_imp_open++;
> +     } else if (zone->cond == BLK_ZONE_COND_EMPTY) {
> +             dev->nr_zones_imp_open++;
> +     }
> +     if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
> +             zone->cond = BLK_ZONE_COND_IMP_OPEN;
> +
> +     ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
> +     if (ret != BLK_STS_OK)
> +             return ret;
> +
> +     zone->wp += nr_sectors;
> +     if (zone->wp == zone->start + zone->capacity) {
> +             if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
> +                     dev->nr_zones_exp_open--;
> +             else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
> +                     dev->nr_zones_imp_open--;
> +             zone->cond = BLK_ZONE_COND_FULL;
> +     }
> +     return BLK_STS_OK;
> +}
> +
> +static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone 
> *zone)
> +{
> +     if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
> +             return BLK_STS_IOERR;
> +
> +     switch (zone->cond) {
> +     case BLK_ZONE_COND_EXP_OPEN:
> +             /* open operation on exp open is not an error */
> +             return BLK_STS_OK;
> +     case BLK_ZONE_COND_EMPTY:
> +             if (!null_manage_zone_resources(dev, zone))
> +                     return BLK_STS_IOERR;
> +             break;
> +     case BLK_ZONE_COND_IMP_OPEN:
> +             dev->nr_zones_imp_open--;
> +             break;
>       case BLK_ZONE_COND_CLOSED:
> -             /*
> -              * Regular writes must be at the write pointer position.
> -              * Zone append writes are automatically issued at the write
> -              * pointer and the position returned using the request or BIO
> -              * sector.
> -              */
> -             if (append) {
> -                     sector = zone->wp;
> -                     if (cmd->bio)
> -                             cmd->bio->bi_iter.bi_sector = sector;
> -                     else
> -                             cmd->rq->__sector = sector;
> -             } else if (sector != zone->wp) {
> +             if (!null_manage_zone_resources(dev, zone))
>                       return BLK_STS_IOERR;
> -             }
> +             dev->nr_zones_closed--;
> +             break;
> +     case BLK_ZONE_COND_FULL:
> +     default:
> +             return BLK_STS_IOERR;
> +     }
> +
> +     zone->cond = BLK_ZONE_COND_EXP_OPEN;
> +     dev->nr_zones_exp_open++;
>  
> -             if (zone->wp + nr_sectors > zone->start + zone->capacity)
> +     return BLK_STS_OK;
> +}
> +
> +static blk_status_t null_finish_zone(struct nullb_device *dev, struct 
> blk_zone *zone)
> +{
> +     if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
> +             return BLK_STS_IOERR;
> +
> +     switch (zone->cond) {
> +     case BLK_ZONE_COND_FULL:
> +             /* finish operation on full is not an error */
> +             return BLK_STS_OK;
> +     case BLK_ZONE_COND_EMPTY:
> +             if (!null_manage_zone_resources(dev, zone))

OK. So you are hitting a fuzzy case here that is not actually well described in
the standards. That is, does finishing an empty zone necessarilly imply a
temporary transition through imp open ? Which you are assuming is a yes here.
Personally, I would say that is not necessary, but no strong feeling either way.

>                       return BLK_STS_IOERR;
> +             break;
> +     case BLK_ZONE_COND_IMP_OPEN:
> +             dev->nr_zones_imp_open--;
> +             break;
> +     case BLK_ZONE_COND_EXP_OPEN:
> +             dev->nr_zones_exp_open--;
> +             break;
> +     case BLK_ZONE_COND_CLOSED:
> +             if (!null_manage_zone_resources(dev, zone))
> +                     return BLK_STS_IOERR;
> +             dev->nr_zones_closed--;
> +             break;
> +     default:
> +             return BLK_STS_IOERR;
> +     }
>  
> -             if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
> -                     zone->cond = BLK_ZONE_COND_IMP_OPEN;
> +     zone->cond = BLK_ZONE_COND_FULL;
> +     zone->wp = zone->start + zone->len;
>  
> -             ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
> -             if (ret != BLK_STS_OK)
> -                     return ret;
> +     return BLK_STS_OK;
> +}
> +
> +static blk_status_t null_reset_zone(struct nullb_device *dev, struct 
> blk_zone *zone)
> +{
> +     if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
> +             return BLK_STS_IOERR;
>  
> -             zone->wp += nr_sectors;
> -             if (zone->wp == zone->start + zone->capacity)
> -                     zone->cond = BLK_ZONE_COND_FULL;
> +     switch (zone->cond) {
> +     case BLK_ZONE_COND_EMPTY:
> +             /* reset operation on empty is not an error */
>               return BLK_STS_OK;
> +     case BLK_ZONE_COND_IMP_OPEN:
> +             dev->nr_zones_imp_open--;
> +             break;
> +     case BLK_ZONE_COND_EXP_OPEN:
> +             dev->nr_zones_exp_open--;
> +             break;
> +     case BLK_ZONE_COND_CLOSED:
> +             dev->nr_zones_closed--;
> +             break;
> +     case BLK_ZONE_COND_FULL:
> +             break;
>       default:
> -             /* Invalid zone condition */
>               return BLK_STS_IOERR;
>       }
> +
> +     zone->cond = BLK_ZONE_COND_EMPTY;
> +     zone->wp = zone->start;
> +
> +     return BLK_STS_OK;
>  }
>  
>  static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
> @@ -222,56 +449,34 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd 
> *cmd, enum req_opf op,
>       struct nullb_device *dev = cmd->nq->dev;
>       unsigned int zone_no = null_zone_no(dev, sector);
>       struct blk_zone *zone = &dev->zones[zone_no];
> +     blk_status_t ret = BLK_STS_OK;
>       size_t i;
>  
>       switch (op) {
>       case REQ_OP_ZONE_RESET_ALL:
> -             for (i = 0; i < dev->nr_zones; i++) {
> -                     if (zone[i].type == BLK_ZONE_TYPE_CONVENTIONAL)
> -                             continue;
> -                     zone[i].cond = BLK_ZONE_COND_EMPTY;
> -                     zone[i].wp = zone[i].start;
> -             }
> +             for (i = dev->zone_nr_conv; i < dev->nr_zones; i++)
> +                     null_reset_zone(dev, &dev->zones[i]);
>               break;
>       case REQ_OP_ZONE_RESET:
> -             if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
> -                     return BLK_STS_IOERR;
> -
> -             zone->cond = BLK_ZONE_COND_EMPTY;
> -             zone->wp = zone->start;
> +             ret = null_reset_zone(dev, zone);
>               break;
>       case REQ_OP_ZONE_OPEN:
> -             if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
> -                     return BLK_STS_IOERR;
> -             if (zone->cond == BLK_ZONE_COND_FULL)
> -                     return BLK_STS_IOERR;
> -
> -             zone->cond = BLK_ZONE_COND_EXP_OPEN;
> +             ret = null_open_zone(dev, zone);
>               break;
>       case REQ_OP_ZONE_CLOSE:
> -             if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
> -                     return BLK_STS_IOERR;
> -             if (zone->cond == BLK_ZONE_COND_FULL)
> -                     return BLK_STS_IOERR;
> -
> -             if (zone->wp == zone->start)
> -                     zone->cond = BLK_ZONE_COND_EMPTY;
> -             else
> -                     zone->cond = BLK_ZONE_COND_CLOSED;
> +             ret = null_close_zone(dev, zone);
>               break;
>       case REQ_OP_ZONE_FINISH:
> -             if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
> -                     return BLK_STS_IOERR;
> -
> -             zone->cond = BLK_ZONE_COND_FULL;
> -             zone->wp = zone->start + zone->len;
> +             ret = null_finish_zone(dev, zone);
>               break;
>       default:
>               return BLK_STS_NOTSUPP;
>       }
>  
> -     trace_nullb_zone_op(cmd, zone_no, zone->cond);
> -     return BLK_STS_OK;
> +     if (ret == BLK_STS_OK)
> +             trace_nullb_zone_op(cmd, zone_no, zone->cond);
> +
> +     return ret;
>  }
>  
>  blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op,
> 

Apart from the commit message and function name nits above, this looks good to 
me.

Reviewed-by: Damien Le Moal <damien.lem...@wdc.com>

-- 
Damien Le Moal
Western Digital Research

Reply via email to