Author: avg
Date: Mon Feb 17 16:30:18 2014
New Revision: 262081
URL: http://svnweb.freebsd.org/changeset/base/262081

Log:
  MFC r254591,255753: Enhance the ZFS vdev layer to maintain both a
  logical and a physical minimum allocation size for devices

Modified:
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c
  stable/9/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
Directory Properties:
  stable/9/sys/   (props changed)
  stable/9/sys/cddl/contrib/opensolaris/   (props changed)

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c       Mon Feb 
17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c       Mon Feb 
17 16:30:18 2014        (r262081)
@@ -5197,7 +5197,7 @@ l2arc_compress_buf(l2arc_buf_hdr_t *l2hd
        len = l2hdr->b_asize;
        cdata = zio_data_buf_alloc(len);
        csize = zio_compress_data(ZIO_COMPRESS_LZ4, l2hdr->b_tmp_cdata,
-           cdata, l2hdr->b_asize);
+           cdata, l2hdr->b_asize, (size_t)SPA_MINBLOCKSIZE);
 
        if (csize == 0) {
                /* zero block, indicate that there's nothing to write */
@@ -5437,6 +5437,8 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd)
 
        ASSERT(!l2arc_vdev_present(vd));
 
+       vdev_ashift_optimize(vd);
+
        /*
         * Create a new l2arc device entry.
         */

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c  Mon Feb 
17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c  Mon Feb 
17 16:30:18 2014        (r262081)
@@ -226,6 +226,27 @@ metaslab_class_space_update(metaslab_cla
        atomic_add_64(&mc->mc_dspace, dspace_delta);
 }
 
+void
+metaslab_class_minblocksize_update(metaslab_class_t *mc)
+{
+       metaslab_group_t *mg;
+       vdev_t *vd;
+       uint64_t minashift = UINT64_MAX;
+
+       if ((mg = mc->mc_rotor) == NULL) {
+               mc->mc_minblocksize = SPA_MINBLOCKSIZE;
+               return;
+       }
+
+       do {
+               vd = mg->mg_vd;
+               if (vd->vdev_ashift < minashift)
+                       minashift = vd->vdev_ashift;
+       } while ((mg = mg->mg_next) != mc->mc_rotor);
+
+       mc->mc_minblocksize = 1ULL << minashift;
+}
+
 uint64_t
 metaslab_class_get_alloc(metaslab_class_t *mc)
 {
@@ -250,6 +271,12 @@ metaslab_class_get_dspace(metaslab_class
        return (spa_deflate(mc->mc_spa) ? mc->mc_dspace : mc->mc_space);
 }
 
+uint64_t
+metaslab_class_get_minblocksize(metaslab_class_t *mc)
+{
+       return (mc->mc_minblocksize);
+}
+
 /*
  * ==========================================================================
  * Metaslab groups
@@ -389,6 +416,7 @@ metaslab_group_activate(metaslab_group_t
                mgnext->mg_prev = mg;
        }
        mc->mc_rotor = mg;
+       metaslab_class_minblocksize_update(mc);
 }
 
 void
@@ -420,6 +448,7 @@ metaslab_group_passivate(metaslab_group_
 
        mg->mg_prev = NULL;
        mg->mg_next = NULL;
+       metaslab_class_minblocksize_update(mc);
 }
 
 static void

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c       Mon Feb 
17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c       Mon Feb 
17 16:30:18 2014        (r262081)
@@ -3505,6 +3505,7 @@ spa_create(const char *pool, nvlist_t *n
            (error = spa_validate_aux(spa, nvroot, txg,
            VDEV_ALLOC_ADD)) == 0) {
                for (int c = 0; c < rvd->vdev_children; c++) {
+                       vdev_ashift_optimize(rvd->vdev_child[c]);
                        vdev_metaslab_set_size(rvd->vdev_child[c]);
                        vdev_expand(rvd->vdev_child[c], txg);
                }

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c        
Mon Feb 17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c        
Mon Feb 17 16:30:18 2014        (r262081)
@@ -514,8 +514,10 @@ spa_config_update(spa_t *spa, int what)
                 */
                for (c = 0; c < rvd->vdev_children; c++) {
                        vdev_t *tvd = rvd->vdev_child[c];
-                       if (tvd->vdev_ms_array == 0)
+                       if (tvd->vdev_ms_array == 0) {
+                               vdev_ashift_optimize(tvd);
                                vdev_metaslab_set_size(tvd);
+                       }
                        vdev_expand(tvd, txg);
                }
        }

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h      
Mon Feb 17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h      
Mon Feb 17 16:30:18 2014        (r262081)
@@ -70,6 +70,7 @@ extern uint64_t metaslab_class_get_alloc
 extern uint64_t metaslab_class_get_space(metaslab_class_t *mc);
 extern uint64_t metaslab_class_get_dspace(metaslab_class_t *mc);
 extern uint64_t metaslab_class_get_deferred(metaslab_class_t *mc);
+extern uint64_t metaslab_class_get_minblocksize(metaslab_class_t *mc);
 
 extern metaslab_group_t *metaslab_group_create(metaslab_class_t *mc,
     vdev_t *vd);

Modified: 
stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h 
Mon Feb 17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h 
Mon Feb 17 16:30:18 2014        (r262081)
@@ -50,6 +50,7 @@ struct metaslab_class {
        uint64_t                mc_deferred;    /* total deferred frees */
        uint64_t                mc_space;       /* total space (alloc + free) */
        uint64_t                mc_dspace;      /* total deflated space */
+       uint64_t                mc_minblocksize;
 };
 
 struct metaslab_group {

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h   Mon Feb 
17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h   Mon Feb 
17 16:30:18 2014        (r262081)
@@ -93,6 +93,17 @@ struct dsl_dataset;
 #define        SPA_BLOCKSIZES          (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT 
+ 1)
 
 /*
+ * Maximum supported logical ashift.
+ *
+ * The current 8k allocation block size limit is due to the 8k
+ * aligned/sized operations performed by vdev_probe() on
+ * vdev_label->vl_pad2.  Using another "safe region" for these tests
+ * would allow the limit to be raised to 16k, at the expense of
+ * only having 8 available uberblocks in the label area.
+ */
+#define        SPA_MAXASHIFT           13
+
+/*
  * Size of block to hold the configuration data (a packed nvlist)
  */
 #define        SPA_CONFIG_BLOCKSIZE    (1ULL << 14)

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h  Mon Feb 
17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h  Mon Feb 
17 16:30:18 2014        (r262081)
@@ -78,6 +78,7 @@ extern void vdev_rele(vdev_t *);
 extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg);
 extern void vdev_metaslab_fini(vdev_t *vd);
 extern void vdev_metaslab_set_size(vdev_t *);
+extern void vdev_ashift_optimize(vdev_t *);
 extern void vdev_expand(vdev_t *vd, uint64_t txg);
 extern void vdev_split(vdev_t *vd);
 extern void vdev_deadman(vdev_t *vd);

Modified: 
stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h     
Mon Feb 17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h     
Mon Feb 17 16:30:18 2014        (r262081)
@@ -57,7 +57,7 @@ typedef struct vdev_cache_entry vdev_cac
  * Virtual device operations
  */
 typedef int    vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size,
-    uint64_t *ashift);
+    uint64_t *logical_ashift, uint64_t *physical_ashift);
 typedef void   vdev_close_func_t(vdev_t *vd);
 typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
 typedef int    vdev_io_start_func_t(zio_t *zio);
@@ -133,6 +133,24 @@ struct vdev {
        uint64_t        vdev_min_asize; /* min acceptable asize         */
        uint64_t        vdev_max_asize; /* max acceptable asize         */
        uint64_t        vdev_ashift;    /* block alignment shift        */
+       /*
+        * Logical block alignment shift
+        *
+        * The smallest sized/aligned I/O supported by the device.
+        */
+       uint64_t        vdev_logical_ashift;
+       /*
+        * Physical block alignment shift
+        *
+        * The device supports logical I/Os with vdev_logical_ashift
+        * size/alignment, but optimum performance will be achieved by
+        * aligning/sizing requests to vdev_physical_ashift.  Smaller
+        * requests may be inflated or incur device level read-modify-write
+        * operations.
+        *
+        * May be 0 to indicate no preference (i.e. use vdev_logical_ashift).
+         */
+       uint64_t        vdev_physical_ashift;
        uint64_t        vdev_state;     /* see VDEV_STATE_* #defines    */
        uint64_t        vdev_prevstate; /* used when reopening a vdev   */
        vdev_ops_t      *vdev_ops;      /* vdev operations              */

Modified: 
stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h  
Mon Feb 17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h  
Mon Feb 17 16:30:18 2014        (r262081)
@@ -79,7 +79,7 @@ extern int lz4_decompress(void *src, voi
  * Compress and decompress data if necessary.
  */
 extern size_t zio_compress_data(enum zio_compress c, void *src, void *dst,
-    size_t s_len);
+    size_t s_len, size_t minblocksize);
 extern int zio_decompress_data(enum zio_compress c, void *src, void *dst,
     size_t s_len, size_t d_len);
 

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c      Mon Feb 
17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c      Mon Feb 
17 16:30:18 2014        (r262081)
@@ -52,6 +52,51 @@ SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CT
  * Virtual device management.
  */
 
+/**
+ * The limit for ZFS to automatically increase a top-level vdev's ashift
+ * from logical ashift to physical ashift.
+ *
+ * Example: one or more 512B emulation child vdevs
+ *          child->vdev_ashift = 9 (512 bytes)
+ *          child->vdev_physical_ashift = 12 (4096 bytes)
+ *          zfs_max_auto_ashift = 11 (2048 bytes)
+ *
+ * On pool creation or the addition of a new top-leve vdev, ZFS will
+ * bump the ashift of the top-level vdev to 2048.
+ *
+ * Example: one or more 512B emulation child vdevs
+ *          child->vdev_ashift = 9 (512 bytes)
+ *          child->vdev_physical_ashift = 12 (4096 bytes)
+ *          zfs_max_auto_ashift = 13 (8192 bytes)
+ *
+ * On pool creation or the addition of a new top-leve vdev, ZFS will
+ * bump the ashift of the top-level vdev to 4096.
+ */
+static uint64_t zfs_max_auto_ashift = SPA_MAXASHIFT;
+
+static int
+sysctl_vfs_zfs_max_auto_ashift(SYSCTL_HANDLER_ARGS)
+{
+       uint64_t val;
+       int err;
+
+       val = zfs_max_auto_ashift;
+       err = sysctl_handle_64(oidp, &val, 0, req);
+       if (err != 0 || req->newptr == NULL)
+               return (err);
+
+       if (val > SPA_MAXASHIFT)
+               val = SPA_MAXASHIFT;
+
+       zfs_max_auto_ashift = val;
+
+       return (0);
+}
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift,
+    CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(uint64_t),
+    sysctl_vfs_zfs_max_auto_ashift, "QU",
+    "Cap on logical -> physical ashift adjustment on new top-level vdevs.");
+
 static vdev_ops_t *vdev_ops_table[] = {
        &vdev_root_ops,
        &vdev_raidz_ops,
@@ -746,6 +791,8 @@ vdev_add_parent(vdev_t *cvd, vdev_ops_t 
        mvd->vdev_min_asize = cvd->vdev_min_asize;
        mvd->vdev_max_asize = cvd->vdev_max_asize;
        mvd->vdev_ashift = cvd->vdev_ashift;
+       mvd->vdev_logical_ashift = cvd->vdev_logical_ashift;
+       mvd->vdev_physical_ashift = cvd->vdev_physical_ashift;
        mvd->vdev_state = cvd->vdev_state;
        mvd->vdev_crtxg = cvd->vdev_crtxg;
 
@@ -777,6 +824,8 @@ vdev_remove_parent(vdev_t *cvd)
            mvd->vdev_ops == &vdev_replacing_ops ||
            mvd->vdev_ops == &vdev_spare_ops);
        cvd->vdev_ashift = mvd->vdev_ashift;
+       cvd->vdev_logical_ashift = mvd->vdev_logical_ashift;
+       cvd->vdev_physical_ashift = mvd->vdev_physical_ashift;
 
        vdev_remove_child(mvd, cvd);
        vdev_remove_child(pvd, mvd);
@@ -1120,7 +1169,8 @@ vdev_open(vdev_t *vd)
        uint64_t osize = 0;
        uint64_t max_osize = 0;
        uint64_t asize, max_asize, psize;
-       uint64_t ashift = 0;
+       uint64_t logical_ashift = 0;
+       uint64_t physical_ashift = 0;
 
        ASSERT(vd->vdev_open_thread == curthread ||
            spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
@@ -1150,7 +1200,8 @@ vdev_open(vdev_t *vd)
                return (SET_ERROR(ENXIO));
        }
 
-       error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, &ashift);
+       error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize,
+           &logical_ashift, &physical_ashift);
 
        /*
         * Reset the vdev_reopening flag so that we actually close
@@ -1248,6 +1299,17 @@ vdev_open(vdev_t *vd)
                return (SET_ERROR(EINVAL));
        }
 
+       vd->vdev_physical_ashift =
+           MAX(physical_ashift, vd->vdev_physical_ashift);
+       vd->vdev_logical_ashift = MAX(logical_ashift, vd->vdev_logical_ashift);
+       vd->vdev_ashift = MAX(vd->vdev_logical_ashift, vd->vdev_ashift);
+
+       if (vd->vdev_logical_ashift > SPA_MAXASHIFT) {
+               vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
+                   VDEV_AUX_ASHIFT_TOO_BIG);
+               return (EINVAL);
+       }
+
        if (vd->vdev_asize == 0) {
                /*
                 * This is the first-ever open, so use the computed values.
@@ -1255,19 +1317,15 @@ vdev_open(vdev_t *vd)
                 */
                vd->vdev_asize = asize;
                vd->vdev_max_asize = max_asize;
-               vd->vdev_ashift = MAX(ashift, vd->vdev_ashift);
        } else {
                /*
-                * Detect if the alignment requirement has increased.
-                * We don't want to make the pool unavailable, just
-                * issue a warning instead.
+                * Make sure the alignment requirement hasn't increased.
                 */
-               if (ashift > vd->vdev_top->vdev_ashift &&
+               if (vd->vdev_ashift > vd->vdev_top->vdev_ashift &&
                    vd->vdev_ops->vdev_op_leaf) {
-                       cmn_err(CE_WARN,
-                           "Disk, '%s', has a block alignment that is "
-                           "larger than the pool's alignment\n",
-                           vd->vdev_path);
+                       vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
+                           VDEV_AUX_BAD_LABEL);
+                       return (EINVAL);
                }
                vd->vdev_max_asize = max_asize;
        }
@@ -1577,6 +1635,23 @@ vdev_metaslab_set_size(vdev_t *vd)
        vd->vdev_ms_shift = MAX(vd->vdev_ms_shift, SPA_MAXBLOCKSHIFT);
 }
 
+/*
+ * Maximize performance by inflating the configured ashift for
+ * top level vdevs to be as close to the physical ashift as
+ * possible without exceeding the administrator specified
+ * limit.
+ */
+void
+vdev_ashift_optimize(vdev_t *vd)
+{
+       if (vd == vd->vdev_top &&
+           (vd->vdev_ashift < vd->vdev_physical_ashift) &&
+           (vd->vdev_ashift < zfs_max_auto_ashift)) {
+               vd->vdev_ashift = MIN(zfs_max_auto_ashift,
+                   vd->vdev_physical_ashift);
+       }
+}
+
 void
 vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg)
 {
@@ -2511,6 +2586,10 @@ vdev_get_stats(vdev_t *vd, vdev_stat_t *
        if (vd->vdev_ops->vdev_op_leaf)
                vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
        vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize;
+       vs->vs_configured_ashift = vd->vdev_top != NULL
+           ? vd->vdev_top->vdev_ashift : vd->vdev_ashift;
+       vs->vs_logical_ashift = vd->vdev_logical_ashift;
+       vs->vs_physical_ashift = vd->vdev_physical_ashift;
        mutex_exit(&vd->vdev_stat_lock);
 
        /*

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c Mon Feb 
17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c Mon Feb 
17 16:30:18 2014        (r262081)
@@ -49,7 +49,7 @@ vdev_file_rele(vdev_t *vd)
 
 static int
 vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
-    uint64_t *ashift)
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
        vdev_file_t *vf;
        vnode_t *vp;
@@ -132,7 +132,8 @@ skip_open:
        }
 
        *max_psize = *psize = vattr.va_size;
-       *ashift = SPA_MINBLOCKSHIFT;
+       *logical_ashift = SPA_MINBLOCKSHIFT;
+       *physical_ashift = SPA_MINBLOCKSHIFT;
 
        return (0);
 }

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c Mon Feb 
17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c Mon Feb 
17 16:30:18 2014        (r262081)
@@ -576,7 +576,7 @@ vdev_geom_open_by_path(vdev_t *vd, int c
 
 static int
 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
-    uint64_t *ashift)
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
        struct g_provider *pp;
        struct g_consumer *cp;
@@ -662,9 +662,13 @@ vdev_geom_open(vdev_t *vd, uint64_t *psi
        *max_psize = *psize = pp->mediasize;
 
        /*
-        * Determine the device's minimum transfer size.
+        * Determine the device's minimum transfer size and preferred
+        * transfer size.
         */
-       *ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
+       *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
+       *physical_ashift = 0;
+       if (pp->stripesize)
+               *physical_ashift = highbit(pp->stripesize) - 1;
 
        /*
         * Clear the nowritecache settings, so that on a vdev_reopen()

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c       
Mon Feb 17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c       
Mon Feb 17 16:30:18 2014        (r262081)
@@ -132,7 +132,7 @@ vdev_mirror_map_alloc(zio_t *zio)
 
 static int
 vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
-    uint64_t *ashift)
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
        int numerrors = 0;
        int lasterror = 0;
@@ -155,7 +155,9 @@ vdev_mirror_open(vdev_t *vd, uint64_t *a
 
                *asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
                *max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
-               *ashift = MAX(*ashift, cvd->vdev_ashift);
+               *logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift);
+               *physical_ashift = MAX(*physical_ashift,
+                   cvd->vdev_physical_ashift);
        }
 
        if (numerrors == vd->vdev_children) {

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c      
Mon Feb 17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c      
Mon Feb 17 16:30:18 2014        (r262081)
@@ -45,7 +45,7 @@
 /* ARGSUSED */
 static int
 vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
-    uint64_t *ashift)
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
        /*
         * Really this should just fail.  But then the root vdev will be in the
@@ -55,7 +55,8 @@ vdev_missing_open(vdev_t *vd, uint64_t *
         */
        *psize = 0;
        *max_psize = 0;
-       *ashift = 0;
+       *logical_ashift = 0;
+       *physical_ashift = 0;
        return (0);
 }
 

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c        
Mon Feb 17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c        
Mon Feb 17 16:30:18 2014        (r262081)
@@ -1478,7 +1478,7 @@ vdev_raidz_reconstruct(raidz_map_t *rm, 
 
 static int
 vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
-    uint64_t *ashift)
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
        vdev_t *cvd;
        uint64_t nparity = vd->vdev_nparity;
@@ -1507,7 +1507,9 @@ vdev_raidz_open(vdev_t *vd, uint64_t *as
 
                *asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
                *max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
-               *ashift = MAX(*ashift, cvd->vdev_ashift);
+               *logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift);
+               *physical_ashift = MAX(*physical_ashift,
+                   cvd->vdev_physical_ashift);
        }
 
        *asize *= vd->vdev_children;

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c Mon Feb 
17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c Mon Feb 
17 16:30:18 2014        (r262081)
@@ -55,7 +55,7 @@ too_many_errors(vdev_t *vd, int numerror
 
 static int
 vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
-    uint64_t *ashift)
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
        int lasterror = 0;
        int numerrors = 0;
@@ -83,7 +83,8 @@ vdev_root_open(vdev_t *vd, uint64_t *asi
 
        *asize = 0;
        *max_asize = 0;
-       *ashift = 0;
+       *logical_ashift = 0;
+       *physical_ashift = 0;
 
        return (0);
 }

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c       Mon Feb 
17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c       Mon Feb 
17 16:30:18 2014        (r262081)
@@ -1092,8 +1092,10 @@ zio_write_bp_init(zio_t *zio)
        }
 
        if (compress != ZIO_COMPRESS_OFF) {
+               metaslab_class_t *mc = spa_normal_class(spa);
                void *cbuf = zio_buf_alloc(lsize);
-               psize = zio_compress_data(compress, zio->io_data, cbuf, lsize);
+               psize = zio_compress_data(compress, zio->io_data, cbuf, lsize,
+                   (size_t)metaslab_class_get_minblocksize(mc));
                if (psize == 0 || psize == lsize) {
                        compress = ZIO_COMPRESS_OFF;
                        zio_buf_free(cbuf, lsize);

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c      
Mon Feb 17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c      
Mon Feb 17 16:30:18 2014        (r262081)
@@ -77,7 +77,8 @@ zio_compress_select(enum zio_compress ch
 }
 
 size_t
-zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len)
+zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len,
+    size_t minblocksize)
 {
        uint64_t *word, *word_end;
        size_t c_len, d_len, r_len;
@@ -102,7 +103,7 @@ zio_compress_data(enum zio_compress c, v
                return (s_len);
 
        /* Compress at least 12.5% */
-       d_len = P2ALIGN(s_len - (s_len >> 3), (size_t)SPA_MINBLOCKSIZE);
+       d_len = P2ALIGN(s_len - (s_len >> 3), minblocksize);
        if (d_len == 0)
                return (s_len);
 
@@ -115,14 +116,14 @@ zio_compress_data(enum zio_compress c, v
         * Cool.  We compressed at least as much as we were hoping to.
         * For both security and repeatability, pad out the last sector.
         */
-       r_len = P2ROUNDUP(c_len, (size_t)SPA_MINBLOCKSIZE);
+       r_len = P2ROUNDUP(c_len, minblocksize);
        if (r_len > c_len) {
                bzero((char *)dst + c_len, r_len - c_len);
                c_len = r_len;
        }
 
        ASSERT3U(c_len, <=, d_len);
-       ASSERT(P2PHASE(c_len, (size_t)SPA_MINBLOCKSIZE) == 0);
+       ASSERT(P2PHASE(c_len, minblocksize) == 0);
 
        return (c_len);
 }

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h       Mon Feb 
17 16:30:01 2014        (r262080)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h       Mon Feb 
17 16:30:18 2014        (r262081)
@@ -620,7 +620,8 @@ typedef enum vdev_aux {
        VDEV_AUX_IO_FAILURE,    /* experienced I/O failure              */
        VDEV_AUX_BAD_LOG,       /* cannot read log chain(s)             */
        VDEV_AUX_EXTERNAL,      /* external diagnosis                   */
-       VDEV_AUX_SPLIT_POOL     /* vdev was split off into another pool */
+       VDEV_AUX_SPLIT_POOL,    /* vdev was split off into another pool */
+       VDEV_AUX_ASHIFT_TOO_BIG /* vdev's min block size is too large   */
 } vdev_aux_t;
 
 /*
@@ -714,7 +715,13 @@ typedef struct vdev_stat {
        uint64_t        vs_self_healed;         /* self-healed bytes    */
        uint64_t        vs_scan_removing;       /* removing?    */
        uint64_t        vs_scan_processed;      /* scan processed bytes */
+       uint64_t        vs_configured_ashift;   /* TLV vdev_ashift      */
+       uint64_t        vs_logical_ashift;      /* vdev_logical_ashift  */
+       uint64_t        vs_physical_ashift;     /* vdev_physical_ashift */
 } vdev_stat_t;
+#define VDEV_STAT_VALID(field, uint64_t_field_count) \
+    ((uint64_t_field_count * sizeof(uint64_t)) >= \
+     (offsetof(vdev_stat_t, field) + sizeof(((vdev_stat_t *)NULL)->field)))
 
 /*
  * DDT statistics.  Note: all fields should be 64-bit because this
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to