Author: allanjude
Date: Thu Jul 16 16:32:16 2020
New Revision: 363255
URL: https://svnweb.freebsd.org/changeset/base/363255

Log:
  Add VIRTIO_BLK_T_DISCARD support to the virtio-blk driver
  
  If the hypervisor advertises support for the DISCARD command then the
  guest can perform TRIM commands, freeing space on the backing store.
  
  If VIRTIO_BLK_F_DISCARD is enabled, advertise DISKFLAG_CANDELETE
  
  Tested with FreeBSD guests on bhyve and KVM
  
  Reviewed by:  jhb
  Tested by:    freqlabs
  MFC after:    1 month
  Relnotes:     yes
  Sponsored by: Klara Inc.
  Differential Revision:        https://reviews.freebsd.org/D21708

Modified:
  head/sys/dev/virtio/block/virtio_blk.c
  head/sys/dev/virtio/block/virtio_blk.h

Modified: head/sys/dev/virtio/block/virtio_blk.c
==============================================================================
--- head/sys/dev/virtio/block/virtio_blk.c      Thu Jul 16 15:12:52 2020        
(r363254)
+++ head/sys/dev/virtio/block/virtio_blk.c      Thu Jul 16 16:32:16 2020        
(r363255)
@@ -81,6 +81,7 @@ struct vtblk_softc {
 #define VTBLK_FLAG_SUSPEND     0x0008
 #define VTBLK_FLAG_BARRIER     0x0010
 #define VTBLK_FLAG_WC_CONFIG   0x0020
+#define VTBLK_FLAG_DISCARD     0x0040
 
        struct virtqueue        *vtblk_vq;
        struct sglist           *vtblk_sglist;
@@ -112,6 +113,7 @@ static struct virtio_feature_desc vtblk_feature_desc[]
        { VIRTIO_BLK_F_WCE,             "WriteCache"    },
        { VIRTIO_BLK_F_TOPOLOGY,        "Topology"      },
        { VIRTIO_BLK_F_CONFIG_WCE,      "ConfigWCE"     },
+       { VIRTIO_BLK_F_DISCARD,         "Discard"       },
 
        { 0, NULL }
 };
@@ -210,6 +212,7 @@ TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writeca
      VIRTIO_BLK_F_WCE                  | \
      VIRTIO_BLK_F_TOPOLOGY             | \
      VIRTIO_BLK_F_CONFIG_WCE           | \
+     VIRTIO_BLK_F_DISCARD              | \
      VIRTIO_RING_F_INDIRECT_DESC)
 
 #define VTBLK_MTX(_sc)         &(_sc)->vtblk_mtx
@@ -459,7 +462,7 @@ vtblk_config_change(device_t dev)
        vtblk_read_config(sc, &blkcfg);
 
        /* Capacity is always in 512-byte units. */
-       capacity = blkcfg.capacity * 512;
+       capacity = blkcfg.capacity * VTBLK_BSIZE;
 
        if (sc->vtblk_disk->d_mediasize != capacity)
                vtblk_resize_disk(sc, capacity);
@@ -544,13 +547,14 @@ vtblk_strategy(struct bio *bp)
         * be a better way to report our readonly'ness to GEOM above.
         */
        if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
-           (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) {
+           (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH ||
+           bp->bio_cmd == BIO_DELETE)) {
                vtblk_bio_done(sc, bp, EROFS);
                return;
        }
 
        if ((bp->bio_cmd != BIO_READ) && (bp->bio_cmd != BIO_WRITE) &&
-           (bp->bio_cmd != BIO_FLUSH)) {
+           (bp->bio_cmd != BIO_FLUSH) && (bp->bio_cmd != BIO_DELETE)) {
                vtblk_bio_done(sc, bp, EOPNOTSUPP);
                return;
        }
@@ -563,6 +567,13 @@ vtblk_strategy(struct bio *bp)
                return;
        }
 
+       if ((bp->bio_cmd == BIO_DELETE) &&
+           !(sc->vtblk_flags & VTBLK_FLAG_DISCARD)) {
+               VTBLK_UNLOCK(sc);
+               vtblk_bio_done(sc, bp, EOPNOTSUPP);
+               return;
+       }
+
        bioq_insert_tail(&sc->vtblk_bioq, bp);
        vtblk_startio(sc);
 
@@ -598,6 +609,8 @@ vtblk_setup_features(struct vtblk_softc *sc)
                sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
        if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
                sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG;
+       if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD))
+               sc->vtblk_flags |= VTBLK_FLAG_DISCARD;
 }
 
 static int
@@ -687,12 +700,12 @@ vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio
                dp->d_dump = vtblk_dump;
 
        /* Capacity is always in 512-byte units. */
-       dp->d_mediasize = blkcfg->capacity * 512;
+       dp->d_mediasize = blkcfg->capacity * VTBLK_BSIZE;
 
        if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
                dp->d_sectorsize = blkcfg->blk_size;
        else
-               dp->d_sectorsize = 512;
+               dp->d_sectorsize = VTBLK_BSIZE;
 
        /*
         * The VirtIO maximum I/O size is given in terms of segments.
@@ -726,6 +739,11 @@ vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio
                    dp->d_stripesize;
        }
 
+       if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD)) {
+               dp->d_flags |= DISKFLAG_CANDELETE;
+               dp->d_delmaxsize = blkcfg->max_discard_sectors * VTBLK_BSIZE;
+       }
+
        if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
                sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
        else
@@ -876,12 +894,16 @@ vtblk_request_bio(struct vtblk_softc *sc)
                break;
        case BIO_READ:
                req->vbr_hdr.type = VIRTIO_BLK_T_IN;
-               req->vbr_hdr.sector = bp->bio_offset / 512;
+               req->vbr_hdr.sector = bp->bio_offset / VTBLK_BSIZE;
                break;
        case BIO_WRITE:
                req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
-               req->vbr_hdr.sector = bp->bio_offset / 512;
+               req->vbr_hdr.sector = bp->bio_offset / VTBLK_BSIZE;
                break;
+       case BIO_DELETE:
+               req->vbr_hdr.type = VIRTIO_BLK_T_DISCARD;
+               req->vbr_hdr.sector = bp->bio_offset / VTBLK_BSIZE;
+               break;
        default:
                panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
        }
@@ -935,6 +957,20 @@ vtblk_request_execute(struct vtblk_softc *sc, struct v
                /* BIO_READ means the host writes into our buffer. */
                if (bp->bio_cmd == BIO_READ)
                        writable = sg->sg_nseg - 1;
+       } else if (bp->bio_cmd == BIO_DELETE) {
+               struct virtio_blk_discard_write_zeroes *discard;
+
+               discard = malloc(sizeof(*discard), M_DEVBUF, M_NOWAIT | M_ZERO);
+               if (discard == NULL)
+                       return (ENOMEM);
+               discard->sector = bp->bio_offset / VTBLK_BSIZE;
+               discard->num_sectors = bp->bio_bcount / VTBLK_BSIZE;
+               bp->bio_driver1 = discard;
+               error = sglist_append(sg, discard, sizeof(*discard));
+               if (error || sg->sg_nseg == sg->sg_maxseg) {
+                       panic("%s: bio %p data buffer too big %d",
+                           __func__, bp, error);
+               }
        }
 
        writable++;
@@ -1095,6 +1131,11 @@ vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp,
                bp->bio_flags |= BIO_ERROR;
        }
 
+       if (bp->bio_driver1 != NULL) {
+               free(bp->bio_driver1, M_DEVBUF);
+               bp->bio_driver1 = NULL;
+       }
+
        biodone(bp);
 }
 
@@ -1124,7 +1165,12 @@ vtblk_read_config(struct vtblk_softc *sc, struct virti
        VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY, geometry, blkcfg);
        VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg);
        VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, topology, blkcfg);
-       VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, writeback, blkcfg);
+       VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, wce, blkcfg);
+       VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_sectors,
+           blkcfg);
+       VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_seg, blkcfg);
+       VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, discard_sector_alignment,
+           blkcfg);
 }
 
 #undef VTBLK_GET_CONFIG
@@ -1282,7 +1328,7 @@ vtblk_dump_write(struct vtblk_softc *sc, void *virtual
        req->vbr_ack = -1;
        req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
        req->vbr_hdr.ioprio = 1;
-       req->vbr_hdr.sector = offset / 512;
+       req->vbr_hdr.sector = offset / VTBLK_BSIZE;
 
        req->vbr_bp = &buf;
        g_reset_bio(&buf);
@@ -1331,7 +1377,7 @@ vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
 
        /* Set either writeback (1) or writethrough (0) mode. */
        virtio_write_dev_config_1(sc->vtblk_dev,
-           offsetof(struct virtio_blk_config, writeback), wc);
+           offsetof(struct virtio_blk_config, wce), wc);
 }
 
 static int
@@ -1346,7 +1392,7 @@ vtblk_write_cache_enabled(struct vtblk_softc *sc,
                if (wc >= 0 && wc < VTBLK_CACHE_MAX)
                        vtblk_set_write_cache(sc, wc);
                else
-                       wc = blkcfg->writeback;
+                       wc = blkcfg->wce;
        } else
                wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE);
 

Modified: head/sys/dev/virtio/block/virtio_blk.h
==============================================================================
--- head/sys/dev/virtio/block/virtio_blk.h      Thu Jul 16 15:12:52 2020        
(r363254)
+++ head/sys/dev/virtio/block/virtio_blk.h      Thu Jul 16 16:32:16 2020        
(r363255)
@@ -33,20 +33,27 @@
 #ifndef _VIRTIO_BLK_H
 #define _VIRTIO_BLK_H
 
+#define        VTBLK_BSIZE     512
+
 /* Feature bits */
-#define VIRTIO_BLK_F_BARRIER   0x0001  /* Does host support barriers? */
-#define VIRTIO_BLK_F_SIZE_MAX  0x0002  /* Indicates maximum segment size */
-#define VIRTIO_BLK_F_SEG_MAX   0x0004  /* Indicates maximum # of segments */
-#define VIRTIO_BLK_F_GEOMETRY  0x0010  /* Legacy geometry available  */
-#define VIRTIO_BLK_F_RO                0x0020  /* Disk is read-only */
-#define VIRTIO_BLK_F_BLK_SIZE  0x0040  /* Block size of disk is available*/
-#define VIRTIO_BLK_F_SCSI      0x0080  /* Supports scsi command passthru */
-#define VIRTIO_BLK_F_WCE       0x0200  /* Writeback mode enabled after reset */
-#define VIRTIO_BLK_F_TOPOLOGY  0x0400  /* Topology information is available */
-#define VIRTIO_BLK_F_CONFIG_WCE 0x0800 /* Writeback mode available in config */
 
-#define VIRTIO_BLK_ID_BYTES    20      /* ID string length */
+#define VIRTIO_BLK_F_BARRIER           0x0001  /* Does host support barriers? 
*/
+#define VIRTIO_BLK_F_SIZE_MAX          0x0002  /* Indicates maximum segment 
size */
+#define VIRTIO_BLK_F_SEG_MAX           0x0004  /* Indicates maximum # of 
segments */
+#define VIRTIO_BLK_F_GEOMETRY          0x0010  /* Legacy geometry available  */
+#define VIRTIO_BLK_F_RO                        0x0020  /* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE          0x0040  /* Block size of disk is 
available*/
+#define VIRTIO_BLK_F_SCSI              0x0080  /* Supports scsi command 
passthru */
+#define VIRTIO_BLK_F_FLUSH             0x0200  /* Flush command supported */
+#define VIRTIO_BLK_F_WCE               0x0200  /* Legacy alias for FLUSH */
+#define VIRTIO_BLK_F_TOPOLOGY          0x0400  /* Topology information is 
available */
+#define VIRTIO_BLK_F_CONFIG_WCE                0x0800  /* Writeback mode 
available in config */
+#define VIRTIO_BLK_F_MQ                        0x1000  /* Support more than 
one vq */
+#define VIRTIO_BLK_F_DISCARD           0x2000  /* Trim blocks */
+#define VIRTIO_BLK_F_WRITE_ZEROES      0x4000  /* Write zeros */
 
+#define VIRTIO_BLK_ID_BYTES            20      /* ID string length */
+
 struct virtio_blk_config {
        /* The capacity (in 512-byte sectors). */
        uint64_t capacity;
@@ -66,15 +73,29 @@ struct virtio_blk_config {
 
        /* Topology of the device (if VIRTIO_BLK_F_TOPOLOGY) */
        struct virtio_blk_topology {
+               /* Exponent for physical block per logical block. */
                uint8_t physical_block_exp;
+               /* Alignment offset in logical blocks. */
                uint8_t alignment_offset;
+               /* Minimum I/O size without performance penalty in logical
+                * blocks. */
                uint16_t min_io_size;
+               /* Optimal sustained I/O size in logical blocks. */
                uint32_t opt_io_size;
        } topology;
 
        /* Writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */
-       uint8_t writeback;
-
+       uint8_t wce;
+       uint8_t unused;
+       /* Number of vqs, only available when VIRTIO_BLK_F_MQ is set */
+       uint16_t num_queues;
+       uint32_t max_discard_sectors;
+       uint32_t max_discard_seg;
+       uint32_t discard_sector_alignment;
+       uint32_t max_write_zeroes_sectors;
+       uint32_t max_write_zeroes_seg;
+       uint8_t write_zeroes_may_unmap;
+       uint8_t unused1[3];
 } __packed;
 
 /*
@@ -89,24 +110,35 @@ struct virtio_blk_config {
  */
 
 /* These two define direction. */
-#define VIRTIO_BLK_T_IN                0
-#define VIRTIO_BLK_T_OUT       1
+#define VIRTIO_BLK_T_IN                        0
+#define VIRTIO_BLK_T_OUT               1
 
 /* This bit says it's a scsi command, not an actual read or write. */
-#define VIRTIO_BLK_T_SCSI_CMD  2
+#define VIRTIO_BLK_T_SCSI_CMD          2
+#define VIRTIO_BLK_T_SCSI_CMD_OUT      3
 
 /* Cache flush command */
-#define VIRTIO_BLK_T_FLUSH     4
+#define VIRTIO_BLK_T_FLUSH             4
+#define VIRTIO_BLK_T_FLUSH_OUT         5
 
 /* Get device ID command */
-#define VIRTIO_BLK_T_GET_ID    8
+#define VIRTIO_BLK_T_GET_ID            8
 
+/* Discard command */
+#define VIRTIO_BLK_T_DISCARD           11
+
+/* Write zeros command */
+#define VIRTIO_BLK_T_WRITE_ZEROES      13
+
 /* Barrier before this op. */
-#define VIRTIO_BLK_T_BARRIER   0x80000000
+#define VIRTIO_BLK_T_BARRIER           0x80000000
 
 /* ID string length */
-#define VIRTIO_BLK_ID_BYTES    20
+#define VIRTIO_BLK_ID_BYTES            20
 
+/* Unmap this range (only valid for write zeroes command) */
+#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP     0x00000001
+
 /* This is the first element of the read scatter-gather list. */
 struct virtio_blk_outhdr {
        /* VIRTIO_BLK_T* */
@@ -115,6 +147,15 @@ struct virtio_blk_outhdr {
        uint32_t ioprio;
        /* Sector (ie. 512 byte offset) */
        uint64_t sector;
+};
+
+struct virtio_blk_discard_write_zeroes {
+       uint64_t sector;
+       uint32_t num_sectors;
+       struct {
+               uint32_t unmap:1;
+               uint32_t reserved:31;
+       } flags;
 };
 
 struct virtio_scsi_inhdr {
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to