On 20.10.20 11:28, Jan Beulich wrote:
On 16.10.2020 12:58, Juergen Gross wrote:
--- a/xen/arch/x86/pv/shim.c
+++ b/xen/arch/x86/pv/shim.c
@@ -660,11 +660,12 @@ void pv_shim_inject_evtchn(unsigned int port)
if ( port_is_valid(guest, port) )
{
struct evtchn *chn = evtchn_from_port(guest, port);
- unsigned long flags;
- spin_lock_irqsave(&chn->lock, flags);
- evtchn_port_set_pending(guest, chn->notify_vcpu_id, chn);
- spin_unlock_irqrestore(&chn->lock, flags);
+ if ( evtchn_read_trylock(chn) )
+ {
+ evtchn_port_set_pending(guest, chn->notify_vcpu_id, chn);
+ evtchn_read_unlock(chn);
+ }
Does this want some form of else, e.g. at least a printk_once()?
No, I don't think so.
This is just a race with the port_is_valid() test above where the
port is just being switched to invalid.
@@ -360,7 +352,7 @@ static long
evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
if ( rc )
goto out;
- flags = double_evtchn_lock(lchn, rchn);
+ double_evtchn_lock(lchn, rchn);
This introduces an unfortunate conflict with my conversion of
the per-domain event lock to an rw one: It acquires rd's lock
in read mode only, while the requirements here would not allow
doing so. (Same in evtchn_close() then.)
Is it a problem to use write mode for those cases?
@@ -736,7 +723,8 @@ int evtchn_send(struct domain *ld, unsigned int lport)
lchn = evtchn_from_port(ld, lport);
- spin_lock_irqsave(&lchn->lock, flags);
+ if ( !evtchn_read_trylock(lchn) )
+ return 0;
With this, the auxiliary call to xsm_evtchn_send() up from
here should also go away again (possibly in a separate follow-
on, which would then likely be a clean revert).
Yes.
@@ -798,9 +786,11 @@ void send_guest_vcpu_virq(struct vcpu *v, uint32_t virq)
d = v->domain;
chn = evtchn_from_port(d, port);
- spin_lock(&chn->lock);
- evtchn_port_set_pending(d, v->vcpu_id, chn);
- spin_unlock(&chn->lock);
+ if ( evtchn_read_trylock(chn) )
+ {
+ evtchn_port_set_pending(d, v->vcpu_id, chn);
+ evtchn_read_unlock(chn);
+ }
out:
spin_unlock_irqrestore(&v->virq_lock, flags);
@@ -829,9 +819,11 @@ void send_guest_global_virq(struct domain *d, uint32_t
virq)
goto out;
chn = evtchn_from_port(d, port);
- spin_lock(&chn->lock);
- evtchn_port_set_pending(d, chn->notify_vcpu_id, chn);
- spin_unlock(&chn->lock);
+ if ( evtchn_read_trylock(chn) )
+ {
+ evtchn_port_set_pending(d, chn->notify_vcpu_id, chn);
+ evtchn_read_unlock(chn);
+ }
out:
spin_unlock_irqrestore(&v->virq_lock, flags);
As said before, I think these lock uses can go away altogether.
I shall put together a patch.
And on the whole I'd really prefer if we first convinced ourselves
that there's no way to simply get rid of the IRQ-safe locking
forms instead, before taking a decision to go with this model with
its extra constraints.
@@ -1060,15 +1053,16 @@ int evtchn_unmask(unsigned int port)
{
struct domain *d = current->domain;
struct evtchn *evtchn;
- unsigned long flags;
if ( unlikely(!port_is_valid(d, port)) )
return -EINVAL;
evtchn = evtchn_from_port(d, port);
- spin_lock_irqsave(&evtchn->lock, flags);
- evtchn_port_unmask(d, evtchn);
- spin_unlock_irqrestore(&evtchn->lock, flags);
+ if ( evtchn_read_trylock(evtchn) )
+ {
+ evtchn_port_unmask(d, evtchn);
+ evtchn_read_unlock(evtchn);
+ }
I think this wants mentioning together with send / query in the
description.
Okay.
--- a/xen/include/xen/event.h
+++ b/xen/include/xen/event.h
@@ -105,6 +105,60 @@ void notify_via_xen_event_channel(struct domain *ld, int
lport);
#define bucket_from_port(d, p) \
((group_from_port(d, p))[((p) % EVTCHNS_PER_GROUP) / EVTCHNS_PER_BUCKET])
+#define EVENT_WRITE_LOCK_INC INT_MIN
+
+/*
+ * Lock an event channel exclusively. This is allowed only with holding
+ * d->event_lock AND when the channel is free or unbound either when taking
+ * or when releasing the lock, as any concurrent operation on the event
+ * channel using evtchn_read_trylock() will just assume the event channel is
+ * free or unbound at the moment.
... when the evtchn_read_trylock() returns false.
Okay.
+ */
+static inline void evtchn_write_lock(struct evtchn *evtchn)
+{
+ int val;
+
+ /*
+ * The lock can't be held by a writer already, as all writers need to
+ * hold d->event_lock.
+ */
+ ASSERT(atomic_read(&evtchn->lock) >= 0);
+
+ /* No barrier needed, atomic_add_return() is full barrier. */
+ for ( val = atomic_add_return(EVENT_WRITE_LOCK_INC, &evtchn->lock);
+ val != EVENT_WRITE_LOCK_INC;
The _INC suffix is slightly odd for this 2nd use, but I guess
the dual use will make it so for about any name you may pick.
I'll switch to a normal rwlock in V4.
Juergen