Hello Matt,

you wrote about panic in u3 & u4:
> These stack traces look like 6569719 (fixed in s10u5).

Then I suppose it's also fixed by 127127-11 because that patch mentions 6569719.
According to my zfs-hardness-test script this is true.
Instead of crashing with an panic, with 127127-11 these servers now show 
hanging zfs commands like update 5.

Please try my test script on a test server or see below.

> For update 5, you could start with the kernel stack of the hung commands.
> (use ::pgrep and ::findstack)  We might also need the sync thread's stack
> (something like ::walk spa | ::print spa_t
> spa_dsl_pool->dp_txg.tx_sync_thread | ::findstack)

Okay, I'll give it a try.

$ uname -a  
SunOS qacult10 5.10 Generic_137111-08 sun4u sparc SUNW,Ultra-5_10
$ head -1 /etc/release 
                       Solaris 10 5/08 s10s_u5wos_10 SPARC
$ ps -ef|grep zfs
    root 23795 23466   0 11:02:45 pts/1       0:00 ssh localhost zfs receive 
hardness-test/received
    root 23782 23779   0 11:02:45 ?           0:01 zfs receive 
hardness-test/received
    root 23807 23804   0 11:02:52 ?           0:00 zfs receive 
hardness-test/received
    root 23466 23145   0 11:00:35 pts/1       0:00 /usr/bin/bash 
./zfs-hardness-test.sh
    root 23793 23466   0 11:02:45 pts/1       0:00 /usr/bin/bash 
./zfs-hardness-test.sh
    root 23804 23797   0 11:02:52 ?           0:00 sh -c zfs receive 
hardness-test/received
    root 23779     1   0 11:02:45 ?           0:00 sh -c zfs receive 
hardness-test/received

It seems that a receiving process (pid 23782) already killed has not yet 
finished.
After killing and aborting data transmission, the script does a retry of the 
send-receive pipe (with same arguments) with pid 23807 on receiving end.
There must be a deadlock/race condition.

$ mdb -k
Loading modules: [ unix krtld genunix specfs dtrace ufs pcipsy ip hook neti 
sctp arp usba fcp fctl zfs random nfs audiosup md lofs logindmux sd ptm fcip 
crypto ipc ]
> ::pgrep "zfs$"
S    PID   PPID   PGID    SID    UID      FLAGS             ADDR NAME
R  23782  23779  23779  23779      0 0x4a004000 000003000171cc90 zfs
R  23807  23804  23804  23804      0 0x4a004000 0000030001728058 zfs
> ::pgrep "zfs$" | ::walk thread | ::findstack -v
stack pointer for thread 30000d24480: 2a1007fc8c1
[ 000002a1007fc8c1 cv_wait+0x38() ]
  000002a1007fc971 delay+0x90(1, 183f000, 17cdef7, 17cdef8, 1, 18c0578)
  000002a1007fca21 dnode_special_close+0x20(300221e0a58, 7, 1, 300221e0c68, 7, 
  300221e0a58)
  000002a1007fcad1 dmu_objset_evict+0xb8(30003a8dc40, 300027cf500, 7b652000, 
  70407538, 7b652000, 70407400)
  000002a1007fcb91 dsl_dataset_evict+0x34(30003a8dc40, 30003a8dc40, 0, 
  300027cf500, 3000418c2c0, 30022366200)
  000002a1007fcc41 dbuf_evict_user+0x48(7b6140b0, 30022366200, 30003a8dc48, 0, 0
  , 30022355e20)
  000002a1007fccf1 dbuf_rele+0x8c(30022355e78, 30022355e20, 70400400, 3, 3, 3)
  000002a1007fcda1 dmu_recvbackup+0x94c(300017c7400, 300017c7d80, 300017c7c28, 
  300017c7416, 16, 1)
  000002a1007fcf71 zfs_ioc_recvbackup+0x74(300017c7000, 0, 30004320150, 0, 0, 
  300017c7400)
  000002a1007fd031 zfsdev_ioctl+0x15c(70401400, 57, ffbfee20, 1d, 74, ef0)
  000002a1007fd0e1 fop_ioctl+0x20(30001d7a0c0, 5a1d, ffbfee20, 100003, 
  300027da0c0, 12247f8)
  000002a1007fd191 ioctl+0x184(3, 300043216f8, ffbfee20, 0, 1ec08, 5a1d)
  000002a1007fd2e1 syscall_trap32+0xcc(3, 5a1d, ffbfee20, 0, 1ec08, ff34774c)
stack pointer for thread 30003d12e00: 2a1009dca41
[ 000002a1009dca41 turnstile_block+0x600() ]
  000002a1009dcaf1 mutex_vector_enter+0x3f0(0, 0, 30022355e78, 30000d24480, 
  30000d24480, 0)
  000002a1009dcba1 dbuf_read+0x6c(30022355e20, 0, 1, 1, 0, 300220f1cf8)
  000002a1009dcc61 dmu_bonus_hold+0xec(0, 15, 30022355e20, 2a1009dd5d8, 8, 0)
  000002a1009dcd21 dsl_dataset_open_obj+0x2c(3000418c2c0, 15, 0, 9, 300043ebe88
  , 2a1009dd6a8)
  000002a1009dcde1 dsl_dataset_open_spa+0x140(0, 7b64d000, 3000418c488, 
  300043ebe88, 2a1009dd768, 9)
  000002a1009dceb1 dmu_objset_open+0x20(30003ca9000, 5, 9, 2a1009dd828, 1, 
  300043ebe88)
  000002a1009dcf71 zfs_ioc_objset_stats+0x18(30003ca9000, 0, 0, 0, 70401400, 39
  )
  000002a1009dd031 zfsdev_ioctl+0x15c(70401400, 39, ffbfc468, 13, 4c, ef0)
  000002a1009dd0e1 fop_ioctl+0x20(30001d7a0c0, 5a13, ffbfc468, 100003, 
  300027da010, 12247f8)
  000002a1009dd191 ioctl+0x184(3, 300043208f8, ffbfc468, 0, 1010101, 5a13)
  000002a1009dd2e1 syscall_trap32+0xcc(3, 5a13, ffbfc468, 0, 1010101, 7cb88)
> 
> ::walk spa | ::print spa_t
{
    spa_name = 0x30022613108 "hardness-test"
    spa_avl = {
        avl_child = [ 0, 0 ]
        avl_pcb = 0x1
    }
    spa_config = 0x3002244abd0
    spa_config_syncing = 0
    spa_config_txg = 0x4
    spa_config_cache_lock = {
        _opaque = [ 0 ]
    }
    spa_sync_pass = 0x1
    spa_state = 0
    spa_inject_ref = 0
    spa_traverse_wanted = 0
    spa_sync_on = 0x1
    spa_load_state = 0 (SPA_LOAD_NONE)
    spa_zio_issue_taskq = [ 0x300225e5528, 0x300225e56d8, 0x300225e5888, 
0x300225e5a38, 0x300225e5be8, 0x300225e5d98 ]
    spa_zio_intr_taskq = [ 0x300225e5600, 0x300225e57b0, 0x300225e5960, 
0x300225e5b10, 0x300225e5cc0, 0x300225e5e70 ]
    spa_dsl_pool = 0x3000418c2c0
    spa_normal_class = 0x30022613d98
    spa_first_txg = 0
    spa_final_txg = 0xffffffffffffffff
    spa_freeze_txg = 0xffffffffffffffff
    spa_meta_objset = 0x300004fc070
    spa_vdev_txg_list = {
        tl_lock = {
            _opaque = [ 0 ]
        }
        tl_offset = 0x2a8
        tl_head = [ 0, 0, 0, 0 ]
    }
    spa_root_vdev = 0x300225f7540
    spa_load_guid = 0
    spa_dirty_list = {
        list_size = 0x4a8
        list_offset = 0x2d8
        list_head = {
            list_next = 0x3000413cf38
            list_prev = 0x3000413cf38
        }
    }
    spa_spares_object = 0
    spa_sparelist = 0
    spa_spares = 0
    spa_nspares = 0
    spa_sync_spares = 0 (B_FALSE)
    spa_config_object = 0xb
    spa_syncing_txg = 0x133
    spa_sync_bplist_obj = 0xc
    spa_sync_bplist = {
        bpl_lock = {
            _opaque = [ 0 ]
        }
        bpl_mos = 0x300004fc070
        bpl_object = 0xc
        bpl_blockshift = 0xe
        bpl_bpshift = 0x7
        bpl_havecomp = 0x1
        bpl_queue = 0
        bpl_phys = 0
        bpl_dbuf = 0
        bpl_cached_dbuf = 0
    }
    spa_traverse_lock = {
        _opaque = [ 0 ]
    }
    spa_ubsync = {
        ub_magic = 0xbab10c           
        ub_version = 0x4
        ub_txg = 0x133
        ub_guid_sum = 0x6529f6b1f918f571
        ub_timestamp = 0x49214601
        ub_rootbp = {
            blk_dva = [
                {
                    dva_word = [ 0x1, 0x1459e ]
                }
                {
                    dva_word = [ 0x1, 0x2005d ]
                }
                {
                    dva_word = [ 0x1, 0x2c1c2 ]
                }
            ]
            blk_prop = 0xb070300000001
            blk_pad = [ 0, 0, 0 ]
            blk_birth = 0x41
            blk_fill = 0x34
            blk_cksum = {
                zc_word = [ 0xd2f7bf464, 0x520149a2ccc, 0x104d18095b2f6, 
0x2352b6fe44334b ]
            }
        }
    }
    spa_uberblock = {
        ub_magic = 0xbab10c
        ub_version = 0x4
        ub_txg = 0x133
        ub_guid_sum = 0x6529f6b1f918f571
        ub_timestamp = 0x49214601
        ub_rootbp = {
            blk_dva = [
                {
                    dva_word = [ 0x1, 0x1459e ]
                }
                {
                    dva_word = [ 0x1, 0x2005d ]
                }
                {
                    dva_word = [ 0x1, 0x2c1c2 ]
                }
            ]
            blk_prop = 0xb070300000001
            blk_pad = [ 0, 0, 0 ]
            blk_birth = 0x41
            blk_fill = 0x34
            blk_cksum = {
                zc_word = [ 0xd2f7bf464, 0x520149a2ccc, 0x104d18095b2f6, 
0x2352b6fe44334b ]
            }
        }
    }
    spa_scrub_lock = {
        _opaque = [ 0 ]
    }
    spa_scrub_thread = 0
    spa_scrub_th = 0
    spa_scrub_restart_txg = 0x18
    spa_scrub_mintxg = 0
    spa_scrub_maxtxg = 0
    spa_scrub_inflight = 0
    spa_scrub_maxinflight = 0x46
    spa_scrub_errors = 0
    spa_scrub_suspended = 0
    spa_scrub_cv = {
        _opaque = 0
    }
    spa_scrub_io_cv = {
        _opaque = 0                   
    }
    spa_scrub_stop = 0
    spa_scrub_active = 0
    spa_scrub_type = 0
    spa_scrub_finished = 0
    spa_async_lock = {
        _opaque = [ 0 ]
    }
    spa_async_thread = 0
    spa_async_suspended = 0
    spa_async_cv = {
        _opaque = 0
    }
    spa_async_tasks = 0
    spa_root = 0
    spa_uberblock_lock = {
        _opaque = [ 0 ]
    }
    spa_ena = 0
    spa_last_open_failed = 0 (B_FALSE)
    spa_errlog_lock = {
        _opaque = [ 0 ]
    }
    spa_errlog_last = 0
    spa_errlog_scrub = 0
    spa_errlist_lock = {
        _opaque = [ 0 ]
    }
    spa_errlist_last = {
        avl_root = 0
        avl_compar = spa_error_entry_compare
        avl_offset = 0x28
        avl_numnodes = 0
        avl_size = 0x40
    }
    spa_errlist_scrub = {
        avl_root = 0
        avl_compar = spa_error_entry_compare
        avl_offset = 0x28
        avl_numnodes = 0
        avl_size = 0x40
    }
    spa_deflate = 0x1
    spa_history = 0xd
    spa_history_lock = {
        _opaque = [ 0 ]
    }
    spa_pending_vdev = 0
    spa_pending_spares = 0
    spa_pending_nspares = 0
    spa_config_lock = {
        scl_lock = {
            _opaque = [ 0 ]
        }
        scl_count = {
            rc_count = 0
        }
        scl_writer = 0
        scl_cv = {
            _opaque = 0
        }
    }
    spa_refcount = {
        rc_count = 0xa
    }
}
> spa_dsl_pool->dp_txg.tx_sync_thread | ::findstack
mdb: failed to dereference symbol: unknown symbol name
>

--------------------------

Hope this is enough?

- Andreas
-- 
This message posted from opensolaris.org
_______________________________________________
zfs-discuss mailing list
zfs-discuss@opensolaris.org
http://mail.opensolaris.org/mailman/listinfo/zfs-discuss

Reply via email to