2011/9/15 Michel D?nzer <michel at daenzer.net>: > From: Michel D?nzer <michel.daenzer at amd.com> > > This was only the case if the GPU reset was triggered from the CS ioctl, > otherwise other processes could happily enter the CS ioctl and wreak havoc > during the GPU reset. > > This is a little complicated because the GPU reset can be triggered from the > CS ioctl, in which case we're already holding the mutex, or from other call > paths, in which case we need to lock the mutex. AFAICT the mutex API doesn't > allow nested locking or finding out the mutex owner, so we need to handle this > with some helper functions. > > Signed-off-by: Michel D?nzer <michel.daenzer at amd.com>
For both patches: Reviewed-by: Alex Deucher <alexander.deucher at amd.com> > --- > ?drivers/gpu/drm/radeon/radeon.h ? ? ? ?| ? 60 > ++++++++++++++++++++++++++++++++ > ?drivers/gpu/drm/radeon/radeon_cs.c ? ? | ? 14 ++++---- > ?drivers/gpu/drm/radeon/radeon_device.c | ? 16 ++++++++ > ?3 files changed, 83 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index ef0e0e0..89304d9 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -1203,6 +1203,8 @@ struct radeon_device { > ? ? ? ?struct radeon_pm ? ? ? ? ? ? ? ?pm; > ? ? ? ?uint32_t ? ? ? ? ? ? ? ? ? ? ? ?bios_scratch[RADEON_BIOS_NUM_SCRATCH]; > ? ? ? ?struct mutex ? ? ? ? ? ? ? ? ? ?cs_mutex; > + ? ? ? struct task_struct ? ? ? ? ? ? ?*cs_mutex_owner; > + ? ? ? struct mutex ? ? ? ? ? ? ? ? ? ?cs_mutex_owner_mutex; > ? ? ? ?struct radeon_wb ? ? ? ? ? ? ? ?wb; > ? ? ? ?struct radeon_dummy_page ? ? ? ?dummy_page; > ? ? ? ?bool ? ? ? ? ? ? ? ? ? ? ? ? ? ?gpu_lockup; > @@ -1241,6 +1243,64 @@ struct radeon_device { > ? ? ? ?struct radeon_i2c_chan *i2c_bus[RADEON_MAX_I2C_BUS]; > ?}; > > + > +/* > + * CS mutex helpers > + */ > + > +static inline void cs_mutex_lock(struct radeon_device *rdev) > +{ > + ? ? ? mutex_lock(&rdev->cs_mutex); > + > + ? ? ? mutex_lock(&rdev->cs_mutex_owner_mutex); > + ? ? ? rdev->cs_mutex_owner = current; > + ? ? ? mutex_unlock(&rdev->cs_mutex_owner_mutex); > +} > + > +static inline void cs_mutex_unlock(struct radeon_device *rdev) > +{ > + ? ? ? mutex_lock(&rdev->cs_mutex_owner_mutex); > + ? ? ? rdev->cs_mutex_owner = NULL; > + ? ? ? mutex_unlock(&rdev->cs_mutex_owner_mutex); > + > + ? ? ? mutex_unlock(&rdev->cs_mutex); > +} > + > +/* > + * Check if this process locked the CS mutex already; if it didn't, lock it. > + * > + * Returns: > + * true: This function locked the mutex. > + * false: This function didn't lock the mutex (this process already locked it > + * before), so the caller probably shouldn't unlock it. > + */ > +static inline int cs_mutex_ensure_locked(struct radeon_device *rdev) > +{ > + ? ? ? int took_mutex; > + ? ? ? int have_mutex; > + > + ? ? ? mutex_lock(&rdev->cs_mutex_owner_mutex); > + ? ? ? took_mutex = mutex_trylock(&rdev->cs_mutex); > + ? ? ? if (took_mutex) { > + ? ? ? ? ? ? ? /* The mutex was unlocked before, so it's ours now */ > + ? ? ? ? ? ? ? rdev->cs_mutex_owner = current; > + ? ? ? ? ? ? ? have_mutex = true; > + ? ? ? } else { > + ? ? ? ? ? ? ? /* Somebody already locked the mutex. Was it this process? */ > + ? ? ? ? ? ? ? have_mutex = (rdev->cs_mutex_owner == current); > + ? ? ? } > + ? ? ? mutex_unlock(&rdev->cs_mutex_owner_mutex); > + > + ? ? ? if (!have_mutex) { > + ? ? ? ? ? ? ? /* Another process locked the mutex, take it */ > + ? ? ? ? ? ? ? cs_mutex_lock(rdev); > + ? ? ? ? ? ? ? took_mutex = true; > + ? ? ? } > + > + ? ? ? return took_mutex; > +} > + > + > ?int radeon_device_init(struct radeon_device *rdev, > ? ? ? ? ? ? ? ? ? ? ? struct drm_device *ddev, > ? ? ? ? ? ? ? ? ? ? ? struct pci_dev *pdev, > diff --git a/drivers/gpu/drm/radeon/radeon_cs.c > b/drivers/gpu/drm/radeon/radeon_cs.c > index fae00c0..61e3063 100644 > --- a/drivers/gpu/drm/radeon/radeon_cs.c > +++ b/drivers/gpu/drm/radeon/radeon_cs.c > @@ -222,7 +222,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, > struct drm_file *filp) > ? ? ? ?struct radeon_cs_chunk *ib_chunk; > ? ? ? ?int r; > > - ? ? ? mutex_lock(&rdev->cs_mutex); > + ? ? ? cs_mutex_lock(rdev); > ? ? ? ?/* initialize parser */ > ? ? ? ?memset(&parser, 0, sizeof(struct radeon_cs_parser)); > ? ? ? ?parser.filp = filp; > @@ -233,14 +233,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, > struct drm_file *filp) > ? ? ? ?if (r) { > ? ? ? ? ? ? ? ?DRM_ERROR("Failed to initialize parser !\n"); > ? ? ? ? ? ? ? ?radeon_cs_parser_fini(&parser, r); > - ? ? ? ? ? ? ? mutex_unlock(&rdev->cs_mutex); > + ? ? ? ? ? ? ? cs_mutex_unlock(rdev); > ? ? ? ? ? ? ? ?return r; > ? ? ? ?} > ? ? ? ?r = ?radeon_ib_get(rdev, &parser.ib); > ? ? ? ?if (r) { > ? ? ? ? ? ? ? ?DRM_ERROR("Failed to get ib !\n"); > ? ? ? ? ? ? ? ?radeon_cs_parser_fini(&parser, r); > - ? ? ? ? ? ? ? mutex_unlock(&rdev->cs_mutex); > + ? ? ? ? ? ? ? cs_mutex_unlock(rdev); > ? ? ? ? ? ? ? ?return r; > ? ? ? ?} > ? ? ? ?r = radeon_cs_parser_relocs(&parser); > @@ -248,7 +248,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, > struct drm_file *filp) > ? ? ? ? ? ? ? ?if (r != -ERESTARTSYS) > ? ? ? ? ? ? ? ? ? ? ? ?DRM_ERROR("Failed to parse relocation %d!\n", r); > ? ? ? ? ? ? ? ?radeon_cs_parser_fini(&parser, r); > - ? ? ? ? ? ? ? mutex_unlock(&rdev->cs_mutex); > + ? ? ? ? ? ? ? cs_mutex_unlock(rdev); > ? ? ? ? ? ? ? ?return r; > ? ? ? ?} > ? ? ? ?/* Copy the packet into the IB, the parser will read from the > @@ -260,14 +260,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, > struct drm_file *filp) > ? ? ? ?if (r || parser.parser_error) { > ? ? ? ? ? ? ? ?DRM_ERROR("Invalid command stream !\n"); > ? ? ? ? ? ? ? ?radeon_cs_parser_fini(&parser, r); > - ? ? ? ? ? ? ? mutex_unlock(&rdev->cs_mutex); > + ? ? ? ? ? ? ? cs_mutex_unlock(rdev); > ? ? ? ? ? ? ? ?return r; > ? ? ? ?} > ? ? ? ?r = radeon_cs_finish_pages(&parser); > ? ? ? ?if (r) { > ? ? ? ? ? ? ? ?DRM_ERROR("Invalid command stream !\n"); > ? ? ? ? ? ? ? ?radeon_cs_parser_fini(&parser, r); > - ? ? ? ? ? ? ? mutex_unlock(&rdev->cs_mutex); > + ? ? ? ? ? ? ? cs_mutex_unlock(rdev); > ? ? ? ? ? ? ? ?return r; > ? ? ? ?} > ? ? ? ?r = radeon_ib_schedule(rdev, parser.ib); > @@ -275,7 +275,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, > struct drm_file *filp) > ? ? ? ? ? ? ? ?DRM_ERROR("Failed to schedule IB !\n"); > ? ? ? ?} > ? ? ? ?radeon_cs_parser_fini(&parser, r); > - ? ? ? mutex_unlock(&rdev->cs_mutex); > + ? ? ? cs_mutex_unlock(rdev); > ? ? ? ?return r; > ?} > > diff --git a/drivers/gpu/drm/radeon/radeon_device.c > b/drivers/gpu/drm/radeon/radeon_device.c > index 5f0fd85..5f3d02d 100644 > --- a/drivers/gpu/drm/radeon/radeon_device.c > +++ b/drivers/gpu/drm/radeon/radeon_device.c > @@ -712,6 +712,8 @@ int radeon_device_init(struct radeon_device *rdev, > > ? ? ? ?/* mutex initialization are all done here so we > ? ? ? ? * can recall function without having locking issues */ > + ? ? ? mutex_init(&rdev->cs_mutex_owner_mutex); > + ? ? ? rdev->cs_mutex_owner = NULL; > ? ? ? ?mutex_init(&rdev->cs_mutex); > ? ? ? ?mutex_init(&rdev->ib_pool.mutex); > ? ? ? ?mutex_init(&rdev->cp.mutex); > @@ -949,6 +951,16 @@ int radeon_gpu_reset(struct radeon_device *rdev) > ?{ > ? ? ? ?int r; > ? ? ? ?int resched; > + ? ? ? int took_cs_mutex; > + > + ? ? ? /* Prevent CS ioctl from interfering */ > + ? ? ? took_cs_mutex = cs_mutex_ensure_locked(rdev); > + ? ? ? if (!rdev->gpu_lockup) { > + ? ? ? ? ? ? ? DRM_DEBUG("GPU already reset by previous CS mutex holder\n"); > + ? ? ? ? ? ? ? if (took_cs_mutex) > + ? ? ? ? ? ? ? ? ? ? ? cs_mutex_unlock(rdev); > + ? ? ? ? ? ? ? return 0; > + ? ? ? } > > ? ? ? ?radeon_save_bios_scratch_regs(rdev); > ? ? ? ?/* block TTM */ > @@ -962,8 +974,12 @@ int radeon_gpu_reset(struct radeon_device *rdev) > ? ? ? ? ? ? ? ?radeon_restore_bios_scratch_regs(rdev); > ? ? ? ? ? ? ? ?drm_helper_resume_force_mode(rdev->ddev); > ? ? ? ? ? ? ? ?ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); > + ? ? ? ? ? ? ? if (took_cs_mutex) > + ? ? ? ? ? ? ? ? ? ? ? cs_mutex_unlock(rdev); > ? ? ? ? ? ? ? ?return 0; > ? ? ? ?} > + ? ? ? if (took_cs_mutex) > + ? ? ? ? ? ? ? cs_mutex_unlock(rdev); > ? ? ? ?/* bad news, how to tell it to userspace ? */ > ? ? ? ?dev_info(rdev->dev, "GPU reset failed\n"); > ? ? ? ?return r; > -- > 1.7.6.3 > > _______________________________________________ > dri-devel mailing list > dri-devel at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/dri-devel >