On Sat, Feb 13, 2016 at 11:37:17AM +0100, Stefan Sperling wrote:
> On Sat, Feb 13, 2016 at 03:42:16PM +1100, Jonathan Gray wrote:
> > On Fri, Feb 12, 2016 at 11:07:35AM -0500, Johan Huldtgren wrote:
> > > uvm_fault(0xffffffff8193f240, 0x38, 0, 1) -> e
> > > kernel: page fault trap, code=0
> > > Stopped at      sr_validate_io+0x36:    movl    0x38(%r9),%r10d
> > > ddb{1}> trace
> > > sr_validate_io() at sr_validate_io+0x36
> > > sr_raid5_rw() at sr_raid5_rw+0x40
> > > sr_raid_recreate_wu() at sr_raid_recreate_wu+0x2c
> > > sr_wu_done_callback() at sr_wu_done_callback+0x17a
> > > taskq_thread() at taskq_thread+0x6c
> > 
> > Thanks for all the detail you've provided here.  The fault appears to be
> > caused by a NULL xs.  A diff to error in that case is provided below.
> > 
> > Perhaps someone familiar with the softraid/scsi code can comment as to
> > why this is occuring.
> 
> The trace of the first crash reported by Johan looks like it could perhaps
> be caused by a garbage xs, doesn't it?
> 
> Quoting http://marc.info/?l=openbsd-misc&m=145477770306396&w=2
> panic: Non dma-reachable buffer at curaddr 0xffffffff81115888(raw)
> Stopped at Debugger+0x9: leave
> TID PID UID PRFLAGS PFLAGS CPU COMMAND
> *25637 25637 0 0x14000 0x200 1 srdis
> Debugger() at Debugger+0x9
> panic() at panic+0xfe
> _bus_dmamap_load_buffer() at _bus_dmamap_load_buffer+0x1b6
> _bus_dmamap_load() at _bus_dmamap_load+0x7f
> ahci_load_prdt() at ahci_load_prdt+0x97
> ahci_ata_cmd() at ahci_ata_cmd+0x69
> atascsi_disk_cmd() at atascsis_disk_cmd+0x1b1
> scsi_xs_exec() scsi_xs_exec+0x35
> sdstart() at sdstart+0x16f
> scsi_iopool_run() at scsi_iopool_run+0x5d
> scsi_xsh_runqueue() at scsi_xsh_runqueue+0x13d
> scsi_xsh_add() at scsi_xsh_add+0x98
> sdstrategy() at sdstrategy+0x10f
> spec_strategy() at spec_strategy+0x53
> end trace frame: 0xffff800032ca1e40, count: 0
> 
> Maybe there's an uninitialized variable which happened to contain
> zeros in this second instance of the crash?

The potentially uninitialised variable use turns out to be these:

Index: softraid.c
===================================================================
RCS file: /cvs/src/sys/dev/softraid.c,v
retrieving revision 1.365
diff -u -p -r1.365 softraid.c
--- softraid.c  29 Dec 2015 04:46:28 -0000      1.365
+++ softraid.c  14 Feb 2016 00:28:14 -0000
@@ -3113,7 +3113,7 @@ sr_rebuild_init(struct sr_discipline *sd
        struct disklabel        label;
        struct vnode            *vn;
        u_int64_t               size;
-       int64_t                 csize;
+       int64_t                 csize = 0;
        char                    devname[32];
        int                     rv = EINVAL, open = 0;
        int                     cid, i, part, status;
@@ -3206,6 +3206,7 @@ sr_rebuild_init(struct sr_discipline *sd
                    devname);
                goto done;
        }
+       /*         here */
        if (size < csize) {
                sr_error(sc, "%s partition too small, at least %lld bytes "
                    "required", devname, (long long)(csize << DEV_BSHIFT));
@@ -3657,7 +3658,7 @@ sr_ioctl_installboot(struct sr_softc *sc
     struct bioc_installboot *bb)
 {
        void                    *bootblk = NULL, *bootldr = NULL;
-       struct sr_chunk         *chunk;
+       struct sr_chunk         *chunk = NULL;
        struct sr_meta_opt_item *omi;
        struct sr_meta_boot     *sbm;
        struct disk             *dk;
@@ -3786,7 +3787,7 @@ sr_ioctl_installboot(struct sr_softc *sc
        sd->sd_meta->ssdi.ssd_vol_flags |= BIOC_SCBOOTABLE;
        if (sr_meta_save(sd, SR_META_DIRTY)) {
                sr_error(sc, "could not save metadata to %s",
-                   chunk->src_devname);
+                   chunk ? chunk->src_devname : "disk");
                goto done;
        }
 

Reply via email to