Hello Matt, you wrote about panic in u3 & u4: > These stack traces look like 6569719 (fixed in s10u5).
Then I suppose it's also fixed by 127127-11 because that patch mentions 6569719. According to my zfs-hardness-test script this is true. Instead of crashing with an panic, with 127127-11 these servers now show hanging zfs commands like update 5. Please try my test script on a test server or see below. > For update 5, you could start with the kernel stack of the hung commands. > (use ::pgrep and ::findstack) We might also need the sync thread's stack > (something like ::walk spa | ::print spa_t > spa_dsl_pool->dp_txg.tx_sync_thread | ::findstack) Okay, I'll give it a try. $ uname -a SunOS qacult10 5.10 Generic_137111-08 sun4u sparc SUNW,Ultra-5_10 $ head -1 /etc/release Solaris 10 5/08 s10s_u5wos_10 SPARC $ ps -ef|grep zfs root 23795 23466 0 11:02:45 pts/1 0:00 ssh localhost zfs receive hardness-test/received root 23782 23779 0 11:02:45 ? 0:01 zfs receive hardness-test/received root 23807 23804 0 11:02:52 ? 0:00 zfs receive hardness-test/received root 23466 23145 0 11:00:35 pts/1 0:00 /usr/bin/bash ./zfs-hardness-test.sh root 23793 23466 0 11:02:45 pts/1 0:00 /usr/bin/bash ./zfs-hardness-test.sh root 23804 23797 0 11:02:52 ? 0:00 sh -c zfs receive hardness-test/received root 23779 1 0 11:02:45 ? 0:00 sh -c zfs receive hardness-test/received It seems that a receiving process (pid 23782) already killed has not yet finished. After killing and aborting data transmission, the script does a retry of the send-receive pipe (with same arguments) with pid 23807 on receiving end. There must be a deadlock/race condition. $ mdb -k Loading modules: [ unix krtld genunix specfs dtrace ufs pcipsy ip hook neti sctp arp usba fcp fctl zfs random nfs audiosup md lofs logindmux sd ptm fcip crypto ipc ] > ::pgrep "zfs$" S PID PPID PGID SID UID FLAGS ADDR NAME R 23782 23779 23779 23779 0 0x4a004000 000003000171cc90 zfs R 23807 23804 23804 23804 0 0x4a004000 0000030001728058 zfs > ::pgrep "zfs$" | ::walk thread | ::findstack -v stack pointer for thread 30000d24480: 2a1007fc8c1 [ 000002a1007fc8c1 cv_wait+0x38() ] 000002a1007fc971 delay+0x90(1, 183f000, 17cdef7, 17cdef8, 1, 18c0578) 000002a1007fca21 dnode_special_close+0x20(300221e0a58, 7, 1, 300221e0c68, 7, 300221e0a58) 000002a1007fcad1 dmu_objset_evict+0xb8(30003a8dc40, 300027cf500, 7b652000, 70407538, 7b652000, 70407400) 000002a1007fcb91 dsl_dataset_evict+0x34(30003a8dc40, 30003a8dc40, 0, 300027cf500, 3000418c2c0, 30022366200) 000002a1007fcc41 dbuf_evict_user+0x48(7b6140b0, 30022366200, 30003a8dc48, 0, 0 , 30022355e20) 000002a1007fccf1 dbuf_rele+0x8c(30022355e78, 30022355e20, 70400400, 3, 3, 3) 000002a1007fcda1 dmu_recvbackup+0x94c(300017c7400, 300017c7d80, 300017c7c28, 300017c7416, 16, 1) 000002a1007fcf71 zfs_ioc_recvbackup+0x74(300017c7000, 0, 30004320150, 0, 0, 300017c7400) 000002a1007fd031 zfsdev_ioctl+0x15c(70401400, 57, ffbfee20, 1d, 74, ef0) 000002a1007fd0e1 fop_ioctl+0x20(30001d7a0c0, 5a1d, ffbfee20, 100003, 300027da0c0, 12247f8) 000002a1007fd191 ioctl+0x184(3, 300043216f8, ffbfee20, 0, 1ec08, 5a1d) 000002a1007fd2e1 syscall_trap32+0xcc(3, 5a1d, ffbfee20, 0, 1ec08, ff34774c) stack pointer for thread 30003d12e00: 2a1009dca41 [ 000002a1009dca41 turnstile_block+0x600() ] 000002a1009dcaf1 mutex_vector_enter+0x3f0(0, 0, 30022355e78, 30000d24480, 30000d24480, 0) 000002a1009dcba1 dbuf_read+0x6c(30022355e20, 0, 1, 1, 0, 300220f1cf8) 000002a1009dcc61 dmu_bonus_hold+0xec(0, 15, 30022355e20, 2a1009dd5d8, 8, 0) 000002a1009dcd21 dsl_dataset_open_obj+0x2c(3000418c2c0, 15, 0, 9, 300043ebe88 , 2a1009dd6a8) 000002a1009dcde1 dsl_dataset_open_spa+0x140(0, 7b64d000, 3000418c488, 300043ebe88, 2a1009dd768, 9) 000002a1009dceb1 dmu_objset_open+0x20(30003ca9000, 5, 9, 2a1009dd828, 1, 300043ebe88) 000002a1009dcf71 zfs_ioc_objset_stats+0x18(30003ca9000, 0, 0, 0, 70401400, 39 ) 000002a1009dd031 zfsdev_ioctl+0x15c(70401400, 39, ffbfc468, 13, 4c, ef0) 000002a1009dd0e1 fop_ioctl+0x20(30001d7a0c0, 5a13, ffbfc468, 100003, 300027da010, 12247f8) 000002a1009dd191 ioctl+0x184(3, 300043208f8, ffbfc468, 0, 1010101, 5a13) 000002a1009dd2e1 syscall_trap32+0xcc(3, 5a13, ffbfc468, 0, 1010101, 7cb88) > > ::walk spa | ::print spa_t { spa_name = 0x30022613108 "hardness-test" spa_avl = { avl_child = [ 0, 0 ] avl_pcb = 0x1 } spa_config = 0x3002244abd0 spa_config_syncing = 0 spa_config_txg = 0x4 spa_config_cache_lock = { _opaque = [ 0 ] } spa_sync_pass = 0x1 spa_state = 0 spa_inject_ref = 0 spa_traverse_wanted = 0 spa_sync_on = 0x1 spa_load_state = 0 (SPA_LOAD_NONE) spa_zio_issue_taskq = [ 0x300225e5528, 0x300225e56d8, 0x300225e5888, 0x300225e5a38, 0x300225e5be8, 0x300225e5d98 ] spa_zio_intr_taskq = [ 0x300225e5600, 0x300225e57b0, 0x300225e5960, 0x300225e5b10, 0x300225e5cc0, 0x300225e5e70 ] spa_dsl_pool = 0x3000418c2c0 spa_normal_class = 0x30022613d98 spa_first_txg = 0 spa_final_txg = 0xffffffffffffffff spa_freeze_txg = 0xffffffffffffffff spa_meta_objset = 0x300004fc070 spa_vdev_txg_list = { tl_lock = { _opaque = [ 0 ] } tl_offset = 0x2a8 tl_head = [ 0, 0, 0, 0 ] } spa_root_vdev = 0x300225f7540 spa_load_guid = 0 spa_dirty_list = { list_size = 0x4a8 list_offset = 0x2d8 list_head = { list_next = 0x3000413cf38 list_prev = 0x3000413cf38 } } spa_spares_object = 0 spa_sparelist = 0 spa_spares = 0 spa_nspares = 0 spa_sync_spares = 0 (B_FALSE) spa_config_object = 0xb spa_syncing_txg = 0x133 spa_sync_bplist_obj = 0xc spa_sync_bplist = { bpl_lock = { _opaque = [ 0 ] } bpl_mos = 0x300004fc070 bpl_object = 0xc bpl_blockshift = 0xe bpl_bpshift = 0x7 bpl_havecomp = 0x1 bpl_queue = 0 bpl_phys = 0 bpl_dbuf = 0 bpl_cached_dbuf = 0 } spa_traverse_lock = { _opaque = [ 0 ] } spa_ubsync = { ub_magic = 0xbab10c ub_version = 0x4 ub_txg = 0x133 ub_guid_sum = 0x6529f6b1f918f571 ub_timestamp = 0x49214601 ub_rootbp = { blk_dva = [ { dva_word = [ 0x1, 0x1459e ] } { dva_word = [ 0x1, 0x2005d ] } { dva_word = [ 0x1, 0x2c1c2 ] } ] blk_prop = 0xb070300000001 blk_pad = [ 0, 0, 0 ] blk_birth = 0x41 blk_fill = 0x34 blk_cksum = { zc_word = [ 0xd2f7bf464, 0x520149a2ccc, 0x104d18095b2f6, 0x2352b6fe44334b ] } } } spa_uberblock = { ub_magic = 0xbab10c ub_version = 0x4 ub_txg = 0x133 ub_guid_sum = 0x6529f6b1f918f571 ub_timestamp = 0x49214601 ub_rootbp = { blk_dva = [ { dva_word = [ 0x1, 0x1459e ] } { dva_word = [ 0x1, 0x2005d ] } { dva_word = [ 0x1, 0x2c1c2 ] } ] blk_prop = 0xb070300000001 blk_pad = [ 0, 0, 0 ] blk_birth = 0x41 blk_fill = 0x34 blk_cksum = { zc_word = [ 0xd2f7bf464, 0x520149a2ccc, 0x104d18095b2f6, 0x2352b6fe44334b ] } } } spa_scrub_lock = { _opaque = [ 0 ] } spa_scrub_thread = 0 spa_scrub_th = 0 spa_scrub_restart_txg = 0x18 spa_scrub_mintxg = 0 spa_scrub_maxtxg = 0 spa_scrub_inflight = 0 spa_scrub_maxinflight = 0x46 spa_scrub_errors = 0 spa_scrub_suspended = 0 spa_scrub_cv = { _opaque = 0 } spa_scrub_io_cv = { _opaque = 0 } spa_scrub_stop = 0 spa_scrub_active = 0 spa_scrub_type = 0 spa_scrub_finished = 0 spa_async_lock = { _opaque = [ 0 ] } spa_async_thread = 0 spa_async_suspended = 0 spa_async_cv = { _opaque = 0 } spa_async_tasks = 0 spa_root = 0 spa_uberblock_lock = { _opaque = [ 0 ] } spa_ena = 0 spa_last_open_failed = 0 (B_FALSE) spa_errlog_lock = { _opaque = [ 0 ] } spa_errlog_last = 0 spa_errlog_scrub = 0 spa_errlist_lock = { _opaque = [ 0 ] } spa_errlist_last = { avl_root = 0 avl_compar = spa_error_entry_compare avl_offset = 0x28 avl_numnodes = 0 avl_size = 0x40 } spa_errlist_scrub = { avl_root = 0 avl_compar = spa_error_entry_compare avl_offset = 0x28 avl_numnodes = 0 avl_size = 0x40 } spa_deflate = 0x1 spa_history = 0xd spa_history_lock = { _opaque = [ 0 ] } spa_pending_vdev = 0 spa_pending_spares = 0 spa_pending_nspares = 0 spa_config_lock = { scl_lock = { _opaque = [ 0 ] } scl_count = { rc_count = 0 } scl_writer = 0 scl_cv = { _opaque = 0 } } spa_refcount = { rc_count = 0xa } } > spa_dsl_pool->dp_txg.tx_sync_thread | ::findstack mdb: failed to dereference symbol: unknown symbol name > -------------------------- Hope this is enough? - Andreas -- This message posted from opensolaris.org _______________________________________________ zfs-discuss mailing list zfs-discuss@opensolaris.org http://mail.opensolaris.org/mailman/listinfo/zfs-discuss