> +static void gsi_trans_tre_fill(struct gsi_tre *dest_tre, dma_addr_t addr, > + u32 len, bool last_tre, bool bei, > + enum ipa_cmd_opcode opcode) > +{ > + struct gsi_tre tre; > + > + tre.addr = cpu_to_le64(addr); > + tre.len_opcode = gsi_tre_len_opcode(opcode, len); > + tre.reserved = 0; > + tre.flags = gsi_tre_flags(last_tre, bei, opcode); > + > + *dest_tre = tre; /* Write TRE as a single (16-byte) unit */ > +}
Have you checked that the atomic write is actually what happens here, but looking at the compiler output? You might need to add a 'volatile' qualifier to the dest_tre argument so the temporary structure doesn't get optimized away here. > +/* Cancel a channel's pending transactions */ > +void gsi_channel_trans_cancel_pending(struct gsi_channel *channel) > +{ > + struct gsi_trans_info *trans_info = &channel->trans_info; > + u32 evt_ring_id = channel->evt_ring_id; > + struct gsi *gsi = channel->gsi; > + struct gsi_evt_ring *evt_ring; > + struct gsi_trans *trans; > + unsigned long flags; > + > + evt_ring = &gsi->evt_ring[evt_ring_id]; > + > + spin_lock_irqsave(&evt_ring->ring.spinlock, flags); > + > + list_for_each_entry(trans, &trans_info->pending, links) > + trans->result = -ECANCELED; > + > + list_splice_tail_init(&trans_info->pending, &trans_info->complete); > + > + spin_unlock_irqrestore(&evt_ring->ring.spinlock, flags); > + > + spin_lock_irqsave(&gsi->spinlock, flags); > + > + if (gsi->event_enable_bitmap & BIT(evt_ring_id)) > + gsi_event_handle(gsi, evt_ring_id); > + > + spin_unlock_irqrestore(&gsi->spinlock, flags); > +} That is a lot of irqsave()/irqrestore() operations. Do you actually call all of these functions from hardirq context? Arnd