Hi,

My desktop paniced last night during a zfs receive operation.  This
is a dual opteron system running snv_47 and bfu'd to DEBUG project bits that
are in sync with the onnv gate as of two days ago.  The project bits
are for Opteron FMA and don't appear at all active in the panic.
I'll log a bug unless someone reconises this as a known issue:

> ::status
debugging crash dump vmcore.0 (64-bit) from enogas
operating system: 5.11 onnv-dev (i86pc)
panic message:
assertion failed: ((&dnp->dn_blkptr[0])->blk_birth == 0) || list_head(&dn->dn_dirty_dbufs[txgoff]) != 0L || dn->dn_next_blksz[txgoff] >> 9 == dnp->dn_datablkszsec, file: ../../common/fs/zfs/dnode_syn
dump content: kernel pages only

> $c
vpanic()
assfail+0x7e(fffffffff06daa80, fffffffff06daa58, 220)
dnode_sync+0x5ef(ffffffff8e0ce3f8, 0, ffffffff8e0c81c0, ffffffff8adde1c0)
dmu_objset_sync_dnodes+0xa4(ffffffff8be25340, ffffffff8be25480, 
ffffffff8adde1c0)
dmu_objset_sync+0xfd(ffffffff8be25340, ffffffff8adde1c0)
dsl_dataset_sync+0x4a(ffffffff8e2286c0, ffffffff8adde1c0)
dsl_pool_sync+0xa7(ffffffff89ef3900, 248bbb)
spa_sync+0x1d5(ffffffff82ea2700, 248bbb)
txg_sync_thread+0x221(ffffffff89ef3900)
thread_start+8()

dnode_sync(dnode_t *dn, int level, zio_t *zio, dmu_tx_t *tx)
{
        free_range_t *rp;
        int txgoff = tx->tx_txg & TXG_MASK;
        dnode_phys_t *dnp = dn->dn_phys;
...
        if (dn->dn_next_blksz[txgoff]) {
                ASSERT(P2PHASE(dn->dn_next_blksz[txgoff],
                    SPA_MINBLOCKSIZE) == 0);
                ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[0]) ||
                    list_head(&dn->dn_dirty_dbufs[txgoff]) != NULL ||
                    dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT ==
                    dnp->dn_datablkszsec);
                ...
        }
...
}


We get

        txgoff = 0x248bbb & 0x3 = 0x3
        dnp = 0xfffffe80e648b400

> 0xfffffe80e648b400::print dnode_phys_t
{
    dn_type = 0x16
    dn_indblkshift = 0xe
    dn_nlevels = 0x1
    dn_nblkptr = 0x3
    dn_bonustype = 0
    dn_checksum = 0
    dn_compress = 0
    dn_flags = 0x1
    dn_datablkszsec = 0x1c
    dn_bonuslen = 0
    dn_pad2 = [ 0, 0, 0, 0 ]
    dn_maxblkid = 0
    dn_used = 0x800
    dn_pad3 = [ 0, 0, 0, 0 ]
    dn_blkptr = [
        {
            blk_dva = [
                {
                    dva_word = [ 0x2, 0x3015472 ]
                }
                {
                    dva_word = [ 0x2, 0x4613b32 ]
                }
                {
                    dva_word = [ 0, 0 ]
                }
            ]
            blk_prop = 0x801607030001001b
            blk_pad = [ 0, 0, 0 ]
            blk_birth = 0x221478
            blk_fill = 0x1
            blk_cksum = {
                zc_word = [ 0x4b4b88c4e6, 0x39c18ca2a5a1, 0x16ea3555d00431,
0x640a1f2b2c8b322 ]
            }
        }
    ]
    dn_bonus = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... ]
}

So regarding the assertion we have dnp->dn_blkptr[0])->blk_birth == 0x221478

> ffffffff8e0ce3f8::print -at dnode_t dn_dirty_dbufs[3]
{
    ffffffff8e0ce510 size_t dn_dirty_dbufs[3].list_size = 0x198
    ffffffff8e0ce518 size_t dn_dirty_dbufs[3].list_offset = 0x120
    ffffffff8e0ce520 struct list_node dn_dirty_dbufs[3].list_head = {
        ffffffff8e0ce520 struct list_node *list_next = 0xffffffff8e0ce520
        ffffffff8e0ce528 struct list_node *list_prev = 0xffffffff8e0ce520
    }
}

So we have list_empty() for that list (list_next above points to list_head)
and list_head() will have returned NULL.  So we're relying on the
3rd component of the assertion to pass:

> ffffffff8e0ce3f8::print dnode_t dn_next_blksz
dn_next_blksz = [ 0, 0, 0, 0x4a00 ]

We're using the 0x4a00 from that.  0x4a00 >> 9 = 0x25; from the
dnode_phys_t above we have dnp->dn_datablkszsec of 0x1c.  Boom.

Sun folks can login to enogas.uk and /var/crash/enogas/*,0 is
accessible.

Gavin







_______________________________________________
zfs-discuss mailing list
zfs-discuss@opensolaris.org
http://mail.opensolaris.org/mailman/listinfo/zfs-discuss

Reply via email to