On Mon, 2026-06-01 at 10:36 +0000, Alice Ryhl wrote:
> On Sat, May 30, 2026 at 04:35:11PM +0200, Philipp Stanner wrote:
> >
[…]
> > +use pin_init::pin_init_from_closure;
> > +
> > +use core::{
> > + marker::PhantomData, //
> > + ops::Deref,
> > + ptr,
> > + ptr::{
> > + drop_in_place,
> > + NonNull, //
> > + },
> > + sync::atomic::{
> > + AtomicU64,
> > + Ordering, //
> > + },
>
> Use atomics from the kernel crate instead.
OK.
>
> > +};
> > +
> > +use bindings::ECANCELED;
> > +
> > +use kernel::str::CString;
> > +use kernel::sync::{
> > + aref::{
> > + ARef,
> > + AlwaysRefCounted, //
> > + },
> > + Arc,
> > + ArcBorrow, //
> > +};
> > +
> > +/// VTable for dma_fence backend_ops callbacks.
> > +//
> > +// Mandatory dma_fence backend_ops are implemented implicitly through
> > +// [`FenceCtx`]. Additional ones shall get implemented on this trait,
> > which then
> > +// shall be demanded for the fence context data.
> > +pub trait FenceCtxOps {}
>
> This empty trait is unused.
(discussed in the other thread with Boris)
>
> > +/// A dma-fence context. A fence context takes care of associating related
> > fences with each other,
> > +/// providing each with raising sequence numbers and a common identifier.
> > +#[pin_data(PinnedDrop)]
> > +pub struct FenceCtx<F: Send + Sync, C: Send + Sync> {
>
> No need to list any trait bounds here. You can list them on `impl`
> blocks only.
ACK.
>
> >
[…]
> > + {
> > + // Uses `pin_init_from_closure` instead of `try_pin_init!` so that
> > on
> > + // `-ENOENT` (already signaled) the callback can be read back from
> > the
> > + // partially-initialized slot and returned through the error.
> > + //
> > + // SAFETY: `pin_init_from_closure` requires:
> > + // - On `Ok(())`: the slot is fully initialized and valid for
> > `Drop`.
> > + // - On `Err(_)`: the slot is clean, i.e.: no
> > partially-initialized fields
> > + // remain, and the slot can be deallocated without dropping.
> > + //
> > + // We uphold this as follows:
> > + // - On success: all three fields are initialized. Ok(()) is
> > returned.
> > + // - On ENOENT (already signaled): `callback` and `fence` are read
> > back
> > + // from the slot via `ptr::read`, leaving the slot clean. `cb`
> > was
> > + // initialized by `dma_fence_add_callback` (it calls
> > + // `INIT_LIST_HEAD(&cb->node)` even on error), but `cb` is
> > + // `Opaque<dma_fence_cb>` which has no `Drop`, so not dropping
> > it is
> > + // fine. The callback is returned through `AlreadySignaled(T)`.
> > + // - On other errors: same cleanup as ENOENT, error returned as
> > + // `Other(e)`.
> > + unsafe {
> > + pin_init_from_closure(move |slot: *mut Self| {
> > + let slot_callback = &raw mut (*slot).callback;
> > + let slot_fence = &raw mut (*slot).fence;
> > + let slot_cb = &raw mut (*slot).cb;
> > +
> > + // Write callback and fence first — must be visible before
> > + // dma_fence_add_callback makes the registration live.
> > + core::ptr::write(slot_callback, callback);
> > + core::ptr::write(slot_fence, ARef::from(fence));
>
> Here you are incrementing the fence refcount. It's better to change the
> function argument to ARef<Fence> so that the user can avoid this
> increment if they happen to own a refcount they're willing to give up.
Agreed, will do
>
> > + let ret = to_result(bindings::dma_fence_add_callback(
> > + fence.inner.get(),
> > + Opaque::cast_into(slot_cb),
> > + Some(Self::dma_fence_callback),
> > + ));
> > +
> > + match ret {
> > + Ok(()) => Ok(()),
> > + Err(e) => {
> > + // Read back what we wrote to leave the slot clean.
> > + let cb_back = core::ptr::read(slot_callback);
> > + let _fence_back = core::ptr::read(slot_fence);
>
> This can be drop_in_place().
>
> > + if e.to_errno() == ENOENT.to_errno() {
> > + Err(CallbackError::AlreadySignaled(cb_back))
> > + } else {
> > + Err(CallbackError::Other(e))
> > + }
> > + }
> > + }
> > + })
> > + }
> > + }
> > +
> > + /// Raw dma fence callback that is called by the C code.
> > + ///
> > + /// # Safety
> > + ///
> > + /// This is only called by the dma_fence subsystem with valid pointers.
> > + unsafe extern "C" fn dma_fence_callback(
> > + _fence: *mut bindings::dma_fence,
> > + cb: *mut bindings::dma_fence_cb,
> > + ) {
> > + let ptr = Opaque::cast_from(cb).cast_mut();
> > +
> > + // SAFETY: All `cb` we can receive here have been created in such
> > a way
> > + // that they are embedded into a `FenceCbRegistration`. The backend
> > + // ensures synchronisation so whoever holds the registration object
> > + // cannot drop it while this code is running. See
> > `FenceCbRegistration::drop`.
> > + unsafe {
> > + let reg: *mut Self = container_of!(ptr, Self, cb);
> > +
> > + (*reg).callback.called();
> > + }
> > + }
> > +
> > + /// Returns a reference to the fence this callback is registered on.
> > + pub fn fence(self: Pin<&Self>) -> &Fence {
>
> Can be simplified to `fn fence(&self) -> &Fence`.
>
> > + &self.get_ref().fence
> > + }
> > +}
> > +
> > +#[pinned_drop]
> > +impl<T: FenceCb> PinnedDrop for FenceCbRegistration<T> {
> > + fn drop(self: Pin<&mut Self>) {
> > + // Always call dma_fence_remove_callback, even if `callback` has
> > already
> > + // been taken by `dma_fence_callback`. This is necessary for
> > + // synchronization: `dma_fence_remove_callback` acquires
> > `fence->lock`,
> > + // which ensures that any in-flight `dma_fence_signal` (which
> > calls our
> > + // callback while holding the same lock) has completed before we
> > free
> > + // the struct.
> > + //
> > + // Without this, Drop can race with a concurrent signal:
> > + // CPU0 (signal, lock held): take() -> signaled(fence_ref) (in
> > progress)
> > + // CPU1 (drop): sees is_some()==false -> skips lock -> frees
> > struct
> > + // CPU0: accesses fence_ref -> use-after-free
> > + //
> > + // When the callback has already fired, the signal path detached
> > the
> > + // list node via INIT_LIST_HEAD, so dma_fence_remove_callback just
> > sees
> > + // an empty node and returns false — the lock acquisition is the
> > only
> > + // thing that matters.
> > + //
> > + // SAFETY: The fence pointer is valid and the cb was initialized by
> > + // dma_fence_add_callback during construction.
> > + unsafe {
> > + bindings::dma_fence_remove_callback(self.fence.as_raw(),
> > self.cb.get());
> > + }
>
> Formatting nit: Usually the ; goes outside the unsafe block.
I could have sworn that it was rustfmt who did that? Maybe because the
; was inside to begin with.
>
> > + }
> > +}
> > +
> > +// SAFETY: FenceCbRegistration can be sent between threads
> > +unsafe impl<T: FenceCb> Send for FenceCbRegistration<T> {}
> > +
> > +// SAFETY: &FenceCbRegistration can be shared between threads if &T can.
> > +unsafe impl<T: FenceCb> Sync for FenceCbRegistration<T> where T: Sync {}
>
> There's no &FenceCbRegistration<T> -> &T accessor, so I don't think this
> bound is required.
>
> unsafe impl<T: FenceCb> Sync for FenceCbRegistration<T> {}
>
> There also can't be such an accessor in the future because the closure
> takes a &mut T.
Hm, very correct. The entire design only allows serial access.
>
> > +/// The receiving counterpart of a [`DriverFence`], designed to register
> > callbacks
> > +/// on, check the signalled state etc. A [`Fence`] cannot be signalled.
> > +/// A [`Fence`] is always refcounted.
> > +pub struct Fence {
> > + /// The actual dma_fence passed to C.
> > + inner: Opaque<bindings::dma_fence>,
> > +}
> > +
> > +// SAFETY: Fences are literally designed to be shared between threads.
> > +unsafe impl Send for Fence {}
> > +// SAFETY: Fences are literally designed to be shared between threads.
> > +unsafe impl Sync for Fence {}
> > +
> > +impl Fence {
> > + /// Check whether the fence was signalled at the moment of the
> > function call.
> > + pub fn is_signaled(&self) -> bool {
> > + // SAFETY: self is by definition still valid. The backend ensures
> > proper
> > + // locking.
> > + unsafe { bindings::dma_fence_is_signaled(self.as_raw()) }
> > + }
> > +
> > + fn as_raw(&self) -> *mut bindings::dma_fence {
> > + self.inner.get()
> > + }
> > +
> > + /// Create a [`Fence`] from a raw C [`bindings::dma_fence`].
> > + ///
> > + /// # Safety
> > + ///
> > + /// `ptr` must point to an initialized fence that is embedded into a
> > [`Fence`].
> > + pub unsafe fn from_raw<'a>(ptr: *mut bindings::dma_fence) -> &'a Self {
> > + // SAFETY: Safe as per the function's overall safety requirements.
> > + unsafe { &*ptr.cast() }
> > + }
> > +}
> > +
> > +// SAFETY: These implement the C backends refcounting methods which are
> > proven to work correctly.
> > +unsafe impl AlwaysRefCounted for Fence {
> > + fn inc_ref(&self) {
> > + // SAFETY: `self.as_raw()` is a pointer to a valid `struct
> > dma_fence`.
> > + unsafe { bindings::dma_fence_get(self.as_raw()) }
> > + }
> > +
> > + /// # Safety
> > + ///
> > + /// `ptr`must be a valid pointer to a [`DriverFence`].
> > + unsafe fn dec_ref(ptr: NonNull<Self>) {
> > + // SAFETY: `ptr` is never a NULL pointer; and when `dec_ref()` is
> > called
> > + // the fence is by definition still valid.
> > + let fence = unsafe { (*ptr.as_ptr()).inner.get() };
> > +
> > + // SAFETY: Valid because `fence` was created validly above.
> > + unsafe { bindings::dma_fence_put(fence) }
> > + }
> > +}
> > +
> > +#[repr(C)] // Necessary to guarantee that `inner` always comes first so
> > that we can cast.
> > +#[pin_data]
> > +struct DriverFenceData<F: Send + Sync, C: Send + Sync> {
>
> Ditto here about trait bounds. (And everywhere else.)
>
> > + #[pin]
> > + /// The inner fence.
> > + inner: Fence,
> > + /// Pointer to access the FenceCtx. Useful for obtaining name
> > parameters.
> > + // The FenceCtx lives as long as at least all its fences, hence this
> > is safe.
> > + fctx: Arc<FenceCtx<F, C>>,
> > + /// The API user's data. As required by [`DriverFenceAllowedData`],
> > this either
> > + /// does not need drop, or must live in a [`rcu::RcuBox`]. It is
> > essential
> > + /// that the data only performs operations legal in atomic context in
> > its
> > + /// [`Drop`] implementation.
> > + #[pin]
> > + data: F,
> > +}
> > +
> > +/// A trait to enforce that all data in a [`DriverFence`] either does not
> > need
> > +/// drop, or lives in a [`RcuBox`].
> > +pub trait DriverFenceAllowedData: private::Sealed {}
> > +
> > +mod private {
> > + pub trait Sealed {}
> > +}
> > +
> > +impl<F: Copy> DriverFenceAllowedData for F {}
> > +impl<F: Send> DriverFenceAllowedData for RcuBox<F> {}
> > +
> > +impl<F: Copy> private::Sealed for F {}
> > +impl<F: Send> private::Sealed for RcuBox<F> {}
>
> Why sealed? Just make the trait unsafe and require the things you
> require from the user.
This is far better. We definitely only allow the user to pass A or B,
and only then it compiles.
The unsafe implementation could be messed up.
I thought that's what Sealed is for. Or isn't it?
>
> > +/// A synchronization primitive mainly for GPU drivers.
> > +///
> > +/// Fences are always reference counted. The typical use case is that one
> > side registers
> > +/// callbacks on the fence which will perform a certain action (such as
> > queueing work) once the
> > +/// other side signals the fence.
> > +///
> > +/// # Examples
> > +///
> > +/// ```
> > +/// use kernel::dma_buf::{DriverFence, FenceCtx, FenceCb,
> > FenceCbRegistration};
> > +/// use kernel::str::CString;
> > +/// use kernel::sync::{
> > +/// aref::ARef,
> > +/// rcu::RcuBox, //
> > +/// };
> > +/// use core::ops::Deref;
> > +/// use core::fmt::Display;
>
> Use fmt traits from kernel instead. (Actually, I don't think you use
> Display at all here?)
I tried, see a few lines below:
>
> > +/// struct CallbackData { }
> > +///
> > +/// impl FenceCb for CallbackData {
> > +/// fn called(&mut self) {
> > +/// pr_info!("DmaFence callback executed.\n");
> > +/// }
> > +/// }
> > +///
> > +/// let driver_name = CString::try_from_fmt(fmt!("dummy_driver"))?;
> > +/// let timeline_name = CString::try_from_fmt(fmt!("dummy_timeline"))?;
> > +///
> > +/// let fctx = FenceCtx::new(driver_name, timeline_name, ())?;
> > +///
> > +/// let fence_data = CString::try_from_fmt(fmt!("dummy_data"))?;
> > +/// // DriverFence::data must either not need drop, or live in an RcuBox.
> > +/// let fence_data = RcuBox::new(fence_data, GFP_KERNEL)?;
> > +///
> > +/// let fence_alloc =
> > fctx.as_arc_borrow().new_fence_allocation(fence_data)?;
> > +/// let mut fence = fctx.new_fence(fence_alloc);
> > +///
> > +/// let cb_data = CallbackData { };
> > +/// let waiting_fence = ARef::from(fence.as_fence());
> > +/// let cb_reg = FenceCbRegistration::new(&waiting_fence, cb_data);
> > +/// let cb_reg = KBox::pin_init(cb_reg, GFP_KERNEL)?;
> > +///
> > +/// // DriverFence implements Deref.
> > +/// // FIXME: unit test claims that CString does not implement Display.
> > Why?
> > +/// // pr_info!("Fence's inner data is: {}", fence.deref().deref());
Lazily, I was hoping that someone here will tell me how that is
supposed to be done correctly 8-)
> > +///
> > +/// // TODO begin_signalling
> > +/// fence.signal(Ok(()));
> > +/// assert_eq!(waiting_fence.is_signaled(), true);
> > +///
> > +/// Ok::<(), Error>(())
> > +/// ```
> > +pub struct DriverFence<F: Send + Sync, C: Send + Sync> {
> > + /// The actual content of the fence. Lives in a raw pointer so that its
> > + /// memory can be managed independently. Valid until both the
> > [`DriverFence`]
> > + /// and all associated [`Fence`]s have disappeared.
> > + data: NonNull<DriverFenceData<F, C>>,
> > +}
> > +
> > +/// A pre-prepared DMA fence, carrying the user's data and the memory it
> > and the
> > +/// fence reside in. Only useful for creating a [`DriverFence`]. Splitting
> > +/// allocation and full initialization is necessary because fences cannot
> > be
> > +/// allocated dynamically in some circumstances (deadlock).
> > +pub struct DriverFenceAllocation<F: Send + Sync, C: Send + Sync> {
> > + /// The memory for the actual content of the fence.
> > + /// Handed over to a [`DriverFence`], or deallocated once the
> > + /// [`DriverFenceAllocation`] drops.
> > + data: KBox<DriverFenceData<F, C>>,
> > +}
> > +
> > +impl<F: Send + Sync + DriverFenceAllowedData, C: Send + Sync>
> > DriverFenceAllocation<F, C> {
> > + /// Create a new allocation slot that can later be used to create a
> > fully
> > + /// initialized [`DriverFence`] without the need to allocate.
> > + pub fn new(fctx: Arc<FenceCtx<F, C>>, data: F) -> Result<Self> {
> > + let fence_data = DriverFenceData {
> > + // `inner` remains uninitialized until a [`DriverFence`] takes
> > over.
> > + inner: Fence {
> > + inner: Opaque::uninit(),
> > + },
> > + fctx,
> > + data,
> > + };
> > +
> > + // In order to support the C dma_fence callbacks, it is necessary
> > for
> > + // a `Fence` and a `DriverFence` to live in the same allocation,
> > + // because the C backend passes a dma_fence, from which the driver
> > most
> > + // likely wants to be able to access its `data` in `DriverFence`.
> > + //
> > + // Hence, we need the manage the memory manually. It will be freed
> > by the
> > + // C backend automatically once the refcount within `Fence` drops
> > to 0.
> > + let data = KBox::new(fence_data, GFP_KERNEL | __GFP_ZERO)?;
> > +
> > + Ok(Self { data })
> > + }
> > +
> > + fn as_raw(&self) -> *mut bindings::dma_fence {
> > + self.data.inner.inner.get()
> > + }
> > +}
> > +
> > +impl<F: Send + Sync, C: Send + Sync> DriverFence<F, C> {
> > + fn as_raw(&self) -> *mut bindings::dma_fence {
> > + // SAFETY: Valid because `self` is valid.
> > + let fence_data = unsafe { &mut *self.data.as_ptr() };
> > +
> > + fence_data.inner.inner.get()
> > + }
> > +
> > + /// Create a [`DriverFence`] from a raw pointer to a
> > [`bindings::dma_fence`].
> > + ///
> > + /// # Safety
> > + ///
> > + /// `ptr` must be a valid pointer to a `dma_fence` that was obtained
> > through
> > + /// a [`DriverFence`] with matching generic data for both fence and
> > associated
> > + /// [`FenceCtx`].
> > + unsafe fn from_raw(ptr: *mut bindings::dma_fence) -> Self {
> > + let opaque_fence = Opaque::cast_from(ptr);
> > +
> > + // SAFETY: Safe due to the function's overall safety requirements.
> > + let fence_ptr = unsafe { container_of!(opaque_fence, Fence, inner)
> > };
> > +
> > + // DriverFenceData is repr(C) and a Fence is its first member.
> > + let fence_data_ptr = fence_ptr as *mut DriverFenceData<F, C>;
> > +
> > + // SAFETY: `fence_data_ptr` was created validly above.
> > + let data = unsafe { NonNull::new_unchecked(fence_data_ptr) };
> > +
> > + Self { data }
> > + }
> > +
> > + /// Return the underlying [`Fence`].
> > + pub fn as_fence(&self) -> &Fence {
> > + // SAFETY: `self` is by definition still valid, and it cannot drop
> > until
> > + // this new reference is gone.
> > + unsafe { Fence::from_raw(self.as_raw()) }
> > + }
> > +
> > + /// Signal the fence. This will invoke all registered callbacks.
> > + pub fn signal(self, res: Result) {
> > + let fence = self.as_raw();
> > + let mut fence_flags: usize = 0;
> > + let flag_ptr = &raw mut fence_flags;
> > +
> > + // SAFETY: Once a `DriverFence` is initialized, the inner `fence`
> > is
> > + // valid and initialized. It is valid until the refcount drops
> > + // to 0, which can earliest happen once the `DriverFence` has been
> > dropped.
> > + unsafe {
> > + bindings::dma_fence_lock_irqsave(fence, flag_ptr);
> > + if !bindings::dma_fence_is_signaled_locked(fence) {
> > + if let Err(err) = res {
> > + bindings::dma_fence_set_error(fence, err.to_errno());
> > + }
> > + bindings::dma_fence_signal_locked(fence);
> > + }
> > + bindings::dma_fence_unlock_irqrestore(fence, flag_ptr);
> > + }
>
> This single unsafe blocks spans five different unsafe operations.
Same discussion with Danilo. I'd prefer it this way, but I guess
separate blocks also have some advantages.
>
> > + }
> > +}
> > +
> > +// SAFETY: Fences are literally designed to be shared between threads.
> > +unsafe impl<F: Send + Sync, C: Send + Sync> Send for DriverFence<F, C> {}
> > +
> > +impl<F: Send + Sync, C: Send + Sync> Deref for DriverFence<F, C> {
> > + type Target = F;
> > +
> > + fn deref(&self) -> &Self::Target {
> > + // SAFETY: Thanks to refcounting, `data` is always valid as long
> > as `self` is.
> > + let data = unsafe { &*self.data.as_ptr() };
> > +
> > + &data.data
> > + }
> > +}
> > +
> > +/// A borrowed [`DriverFence`]. All you can do with it is access your user
> > data
> > +/// and obtain a [`Fence`].
> > +pub struct DriverFenceBorrow<F: Send + Sync, C: Send + Sync> {
> > + /// The actual content of the fence. Lives in a raw pointer so that its
> > + /// memory can be managed independently. Valid until both the
> > [`DriverFence`]
> > + /// and all associated [`Fence`]s have disappeared.
> > + data: NonNull<DriverFenceData<F, C>>,
> > +}
> > +
> > +impl<F: Send + Sync, C: Send + Sync> Deref for DriverFenceBorrow<F, C> {
> > + type Target = F;
> > +
> > + fn deref(&self) -> &Self::Target {
> > + // SAFETY: Thanks to refcounting, `data` is always valid as long
> > as `self` is.
> > + let data = unsafe { &*self.data.as_ptr() };
> > +
> > + &data.data
> > + }
> > +}
> > +
> > +impl<F: Send + Sync, C: Send + Sync> DriverFenceBorrow<F, C> {
> > + fn as_raw(&self) -> *mut bindings::dma_fence {
> > + // SAFETY: Valid because `self` is valid.
> > + let fence_data = unsafe { &mut *self.data.as_ptr() };
> > +
> > + fence_data.inner.inner.get()
> > + }
> > +
> > + /// Return the underlying [`Fence`].
> > + pub fn as_fence(&self) -> &Fence {
> > + // SAFETY: `self` is by definition still valid, and it cannot drop
> > until
> > + // this new reference is gone.
> > + unsafe { Fence::from_raw(self.as_raw()) }
> > + }
> > +
> > + /// Get a [`DriverFenceBorrow`] from a raw pointer.
> > + ///
> > + /// # Safety
> > + ///
> > + /// `ptr` must point to a raw dma_fence within a [`Fence`] within a
> > [`DriverFenceData`].
> > + unsafe fn from_raw(ptr: *mut bindings::dma_fence) -> Self {
> > + let opaque_fence = Opaque::cast_from(ptr);
> > +
> > + // SAFETY: Safe due to the function's overall safety requirements.
> > + let fence_ptr = unsafe { container_of!(opaque_fence, Fence, inner)
> > };
> > +
> > + // DriverFenceData is repr(C) and a Fence is its first member.
> > + let fence_data_ptr = fence_ptr as *mut DriverFenceData<F, C>;
> > +
> > + // SAFETY: `fence_data_ptr` was created validly above.
> > + let data = unsafe { NonNull::new_unchecked(fence_data_ptr) };
> > +
> > + Self { data }
> > + }
> > +}
> > +
> > +// SAFETY: The Rust dma_fence abstractions are already designed around the
> > inner
> > +// C `dma_fence`, which can serve safely as the identification point when
> > being
> > +// owned by C. Moreover, safety is ensured by not dropping `DriverFence`
> > and by
> > +// only allowing operations without side effects on the Borrowed type.
> > +unsafe impl<F: Send + Sync + 'static, C: Send + Sync + 'static>
> > ForeignOwnable
> > + for DriverFence<F, C>
> > +{
> > + // `DriverFence` is merely a wrapper around a raw pointer. Thus, we
> > can just
> > + // use it directly.
> > + type Borrowed<'a> = DriverFenceBorrow<F, C>;
> > + type BorrowedMut<'a> = DriverFenceBorrow<F, C>;
> > +
> > + const FOREIGN_ALIGN: usize =
> > core::mem::align_of::<bindings::dma_fence>();
> > +
> > + fn into_foreign(self) -> *mut c_void {
> > + let fence = self;
> > +
> > + let ptr = fence.as_raw();
> > +
> > + // DriverFence must not drop.
> > + core::mem::forget(fence);
>
> Nit: Modern Rust uses ManuallyDrop instead of forget().
You mean still take `self` here, then stuff it into ManuallyDrop and
let it go out of scope, aye?
Thx for the review,
P.