On Mon, Nov 11, 2024 at 07:32:44PM +0530, Jyothi Kumar Seerapu wrote:
> The I2C driver gets an interrupt upon transfer completion.
> For multiple messages in a single transfer, N interrupts will be
> received for N messages, leading to significant software interrupt
> latency. To mitigate this latency, utilize Block Event Interrupt (BEI)

Please rewrite this to the tone that the reader doesn't know what Block
Event Interrupt is, or that it exists.

> only when an interrupt is necessary. This means large transfers can be
> split into multiple chunks of 8 messages internally, without expecting
> interrupts for the first 7 message completions, only the last one will
> trigger an interrupt indicating 8 messages completed.
> 
> By implementing BEI, multi-message transfers can be divided into
> chunks of 8 messages, improving overall transfer time.

You already wrote this in the paragraph above.


Where is this number 8 coming from btw?

> This optimization reduces transfer time from 168 ms to 48 ms for a
> series of 200 I2C write messages in a single transfer, with a
> clock frequency support of 100 kHz.
> 
> BEI optimizations are currently implemented for I2C write transfers only,
> as there is no use case for multiple I2C read messages in a single transfer
> at this time.
> 
> Signed-off-by: Jyothi Kumar Seerapu <quic_jseer...@quicinc.com>
> ---
> 
> v1 -> v2:
>    - Moved gi2c_gpi_xfer->msg_idx_cnt to separate local variable.
>    - Updated goto labels for error scenarios in geni_i2c_gpi function
>    - memset tx_multi_xfer to 0.
>    - Removed passing current msg index to geni_i2c_gpi.
>    - Fixed kernel test robot reported compilation issues.    
> 
>  drivers/i2c/busses/i2c-qcom-geni.c | 203 +++++++++++++++++++++++++----
>  1 file changed, 178 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/i2c/busses/i2c-qcom-geni.c 
> b/drivers/i2c/busses/i2c-qcom-geni.c
> index 7a22e1f46e60..04a7d926dadc 100644
> --- a/drivers/i2c/busses/i2c-qcom-geni.c
> +++ b/drivers/i2c/busses/i2c-qcom-geni.c
> @@ -100,6 +100,10 @@ struct geni_i2c_dev {
>       struct dma_chan *rx_c;
>       bool gpi_mode;
>       bool abort_done;
> +     bool is_tx_multi_xfer;
> +     u32 num_msgs;
> +     u32 tx_irq_cnt;
> +     struct gpi_i2c_config *gpi_config;
>  };
>  
>  struct geni_i2c_desc {
> @@ -500,6 +504,7 @@ static int geni_i2c_tx_one_msg(struct geni_i2c_dev *gi2c, 
> struct i2c_msg *msg,
>  static void i2c_gpi_cb_result(void *cb, const struct dmaengine_result 
> *result)
>  {
>       struct geni_i2c_dev *gi2c = cb;
> +     struct gpi_multi_xfer *tx_multi_xfer;
>  
>       if (result->result != DMA_TRANS_NOERROR) {
>               dev_err(gi2c->se.dev, "DMA txn failed:%d\n", result->result);
> @@ -508,7 +513,21 @@ static void i2c_gpi_cb_result(void *cb, const struct 
> dmaengine_result *result)
>               dev_dbg(gi2c->se.dev, "DMA xfer has pending: %d\n", 
> result->residue);
>       }
>  
> -     complete(&gi2c->done);
> +     if (gi2c->is_tx_multi_xfer) {

Wouldn't it be cleaner to treat the !is_tx_multi_xfer case as a
multi-xfer of length 1?

> +             tx_multi_xfer = &gi2c->gpi_config->multi_xfer;
> +
> +             /*
> +              * Send Completion for last message or multiple of 
> NUM_MSGS_PER_IRQ.
> +              */
> +             if ((tx_multi_xfer->irq_msg_cnt == gi2c->num_msgs - 1) ||
> +                 (!((tx_multi_xfer->irq_msg_cnt + 1) % NUM_MSGS_PER_IRQ))) {
> +                     tx_multi_xfer->irq_cnt++;
> +                     complete(&gi2c->done);

Why? You're removing the wait_for_completion_timeout() from
geni_i2c_gpi_xfer() when is_tx_multi_xfer is set.

> +             }
> +             tx_multi_xfer->irq_msg_cnt++;
> +     } else {
> +             complete(&gi2c->done);
> +     }
>  }
>  
>  static void geni_i2c_gpi_unmap(struct geni_i2c_dev *gi2c, struct i2c_msg 
> *msg,
> @@ -526,7 +545,42 @@ static void geni_i2c_gpi_unmap(struct geni_i2c_dev 
> *gi2c, struct i2c_msg *msg,
>       }
>  }
>  
> -static int geni_i2c_gpi(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,
> +/**
> + * gpi_i2c_multi_desc_unmap() - unmaps the buffers post multi message TX 
> transfers
> + * @dev: pointer to the corresponding dev node
> + * @gi2c: i2c dev handle
> + * @msgs: i2c messages array
> + * @peripheral: pointer to the gpi_i2c_config
> + */
> +static void gpi_i2c_multi_desc_unmap(struct geni_i2c_dev *gi2c, struct 
> i2c_msg msgs[],
> +                                  struct gpi_i2c_config *peripheral)
> +{
> +     u32 msg_xfer_cnt, wr_idx = 0;
> +     struct gpi_multi_xfer *tx_multi_xfer = &peripheral->multi_xfer;
> +
> +     /*
> +      * In error case, need to unmap all messages based on the msg_idx_cnt.
> +      * Non-error case unmap all the processed messages.

What is the benefit of this optimization, compared to keeping things
simple and just unmap all buffers at the end of geni_i2c_gpi_xfer()?

> +      */
> +     if (gi2c->err)
> +             msg_xfer_cnt = tx_multi_xfer->msg_idx_cnt;
> +     else
> +             msg_xfer_cnt = tx_multi_xfer->irq_cnt * NUM_MSGS_PER_IRQ;
> +
> +     /* Unmap the processed DMA buffers based on the received interrupt 
> count */
> +     for (; tx_multi_xfer->unmap_msg_cnt < msg_xfer_cnt; 
> tx_multi_xfer->unmap_msg_cnt++) {
> +             if (tx_multi_xfer->unmap_msg_cnt == gi2c->num_msgs)
> +                     break;
> +             wr_idx = tx_multi_xfer->unmap_msg_cnt % QCOM_GPI_MAX_NUM_MSGS;
> +             geni_i2c_gpi_unmap(gi2c, &msgs[tx_multi_xfer->unmap_msg_cnt],
> +                                tx_multi_xfer->dma_buf[wr_idx],
> +                                tx_multi_xfer->dma_addr[wr_idx],
> +                                NULL, (dma_addr_t)NULL);
> +             tx_multi_xfer->freed_msg_cnt++;
> +     }
> +}
> +
> +static int geni_i2c_gpi(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[],
>                       struct dma_slave_config *config, dma_addr_t *dma_addr_p,
>                       void **buf, unsigned int op, struct dma_chan *dma_chan)
>  {
> @@ -538,26 +592,48 @@ static int geni_i2c_gpi(struct geni_i2c_dev *gi2c, 
> struct i2c_msg *msg,
>       enum dma_transfer_direction dma_dirn;
>       struct dma_async_tx_descriptor *desc;
>       int ret;
> +     struct gpi_multi_xfer *gi2c_gpi_xfer;
> +     dma_cookie_t cookie;
> +     u32 msg_idx;
>  
>       peripheral = config->peripheral_config;
> -
> -     dma_buf = i2c_get_dma_safe_msg_buf(msg, 1);
> -     if (!dma_buf)
> -             return -ENOMEM;
> +     gi2c_gpi_xfer = &peripheral->multi_xfer;
> +     dma_buf = gi2c_gpi_xfer->dma_buf[gi2c_gpi_xfer->buf_idx];
> +     addr = gi2c_gpi_xfer->dma_addr[gi2c_gpi_xfer->buf_idx];
> +     msg_idx = gi2c_gpi_xfer->msg_idx_cnt;
> +
> +     dma_buf = i2c_get_dma_safe_msg_buf(&msgs[msg_idx], 1);
> +     if (!dma_buf) {
> +             ret = -ENOMEM;
> +             goto out;
> +     }
>  
>       if (op == I2C_WRITE)
>               map_dirn = DMA_TO_DEVICE;
>       else
>               map_dirn = DMA_FROM_DEVICE;
>  
> -     addr = dma_map_single(gi2c->se.dev->parent, dma_buf, msg->len, 
> map_dirn);
> +     addr = dma_map_single(gi2c->se.dev->parent, dma_buf,
> +                           msgs[msg_idx].len, map_dirn);
>       if (dma_mapping_error(gi2c->se.dev->parent, addr)) {
> -             i2c_put_dma_safe_msg_buf(dma_buf, msg, false);
> -             return -ENOMEM;
> +             i2c_put_dma_safe_msg_buf(dma_buf, &msgs[msg_idx], false);
> +             ret = -ENOMEM;
> +             goto out;
> +     }
> +
> +     if (gi2c->is_tx_multi_xfer) {
> +             if (((msg_idx + 1) % NUM_MSGS_PER_IRQ))
> +                     peripheral->flags |= QCOM_GPI_BLOCK_EVENT_IRQ;
> +             else
> +                     peripheral->flags &= ~QCOM_GPI_BLOCK_EVENT_IRQ;
> +
> +             /* BEI bit to be cleared for last TRE */
> +             if (msg_idx == gi2c->num_msgs - 1)
> +                     peripheral->flags &= ~QCOM_GPI_BLOCK_EVENT_IRQ;
>       }
>  
>       /* set the length as message for rx txn */
> -     peripheral->rx_len = msg->len;
> +     peripheral->rx_len = msgs[msg_idx].len;
>       peripheral->op = op;
>  
>       ret = dmaengine_slave_config(dma_chan, config);
> @@ -575,7 +651,8 @@ static int geni_i2c_gpi(struct geni_i2c_dev *gi2c, struct 
> i2c_msg *msg,
>       else
>               dma_dirn = DMA_DEV_TO_MEM;
>  
> -     desc = dmaengine_prep_slave_single(dma_chan, addr, msg->len, dma_dirn, 
> flags);
> +     desc = dmaengine_prep_slave_single(dma_chan, addr, msgs[msg_idx].len,
> +                                        dma_dirn, flags);
>       if (!desc) {
>               dev_err(gi2c->se.dev, "prep_slave_sg failed\n");
>               ret = -EIO;
> @@ -585,15 +662,48 @@ static int geni_i2c_gpi(struct geni_i2c_dev *gi2c, 
> struct i2c_msg *msg,
>       desc->callback_result = i2c_gpi_cb_result;
>       desc->callback_param = gi2c;
>  
> -     dmaengine_submit(desc);
> -     *buf = dma_buf;
> -     *dma_addr_p = addr;
> +     if (!((msgs[msg_idx].flags & I2C_M_RD) && op == I2C_WRITE)) {
> +             gi2c_gpi_xfer->msg_idx_cnt++;
> +             gi2c_gpi_xfer->buf_idx = (msg_idx + 1) % QCOM_GPI_MAX_NUM_MSGS;
> +     }
> +     cookie = dmaengine_submit(desc);
> +     if (dma_submit_error(cookie)) {
> +             dev_err(gi2c->se.dev,
> +                     "%s: dmaengine_submit failed (%d)\n", __func__, cookie);
> +             ret = -EINVAL;
> +             goto err_config;
> +     }
>  
> +     if (gi2c->is_tx_multi_xfer) {
> +             dma_async_issue_pending(gi2c->tx_c);
> +             if ((msg_idx == (gi2c->num_msgs - 1)) ||
> +                 (gi2c_gpi_xfer->msg_idx_cnt >=
> +                  QCOM_GPI_MAX_NUM_MSGS + gi2c_gpi_xfer->freed_msg_cnt)) {
> +                     ret = gpi_multi_desc_process(gi2c->se.dev, 
> gi2c_gpi_xfer,

A function call straight into the GPI driver? I'm not entirely familiar
with the details of the dmaengine API, but this doesn't look correct.

> +                                                  gi2c->num_msgs, 
> XFER_TIMEOUT,
> +                                                  &gi2c->done);
> +                     if (ret) {
> +                             dev_err(gi2c->se.dev,
> +                                     "I2C multi write msg transfer timeout: 
> %d\n",
> +                                     ret);
> +                             gi2c->err = ret;
> +                             goto err_config;
> +                     }
> +             }
> +     } else {
> +             /* Non multi descriptor message transfer */
> +             *buf = dma_buf;
> +             *dma_addr_p = addr;
> +     }
>       return 0;
>  
>  err_config:
> -     dma_unmap_single(gi2c->se.dev->parent, addr, msg->len, map_dirn);
> -     i2c_put_dma_safe_msg_buf(dma_buf, msg, false);
> +     dma_unmap_single(gi2c->se.dev->parent, addr,
> +                      msgs[msg_idx].len, map_dirn);
> +     i2c_put_dma_safe_msg_buf(dma_buf, &msgs[msg_idx], false);
> +
> +out:
> +     gi2c->err = ret;
>       return ret;
>  }
>  
> @@ -605,6 +715,7 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, 
> struct i2c_msg msgs[], i
>       unsigned long time_left;
>       dma_addr_t tx_addr, rx_addr;
>       void *tx_buf = NULL, *rx_buf = NULL;
> +     struct gpi_multi_xfer *tx_multi_xfer;
>       const struct geni_i2c_clk_fld *itr = gi2c->clk_fld;
>  
>       config.peripheral_config = &peripheral;
> @@ -618,6 +729,34 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, 
> struct i2c_msg msgs[], i
>       peripheral.set_config = 1;
>       peripheral.multi_msg = false;
>  
> +     gi2c->gpi_config = &peripheral;
> +     gi2c->num_msgs = num;
> +     gi2c->is_tx_multi_xfer = false;
> +     gi2c->tx_irq_cnt = 0;
> +
> +     tx_multi_xfer = &peripheral.multi_xfer;
> +     memset(tx_multi_xfer, 0, sizeof(struct gpi_multi_xfer));
> +
> +     /*
> +      * If number of write messages are four and higher then

Why four?

> +      * configure hardware for multi descriptor transfers with BEI.
> +      */
> +     if (num >= MIN_NUM_OF_MSGS_MULTI_DESC) {
> +             gi2c->is_tx_multi_xfer = true;
> +             for (i = 0; i < num; i++) {
> +                     if (msgs[i].flags & I2C_M_RD) {
> +                             /*
> +                              * Multi descriptor transfer with BEI
> +                              * support is enabled for write transfers.
> +                              * Add BEI optimization support for read
> +                              * transfers later.

Prefix this comment with "TODO:"

> +                              */
> +                             gi2c->is_tx_multi_xfer = false;
> +                             break;
> +                     }
> +             }
> +     }
> +
>       for (i = 0; i < num; i++) {
>               gi2c->cur = &msgs[i];
>               gi2c->err = 0;
> @@ -628,14 +767,16 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, 
> struct i2c_msg msgs[], i
>                       peripheral.stretch = 1;
>  
>               peripheral.addr = msgs[i].addr;
> +             if (i > 0 && (!(msgs[i].flags & I2C_M_RD)))
> +                     peripheral.multi_msg = false;
>  
> -             ret =  geni_i2c_gpi(gi2c, &msgs[i], &config,
> +             ret =  geni_i2c_gpi(gi2c, msgs, &config,
>                                   &tx_addr, &tx_buf, I2C_WRITE, gi2c->tx_c);
>               if (ret)
>                       goto err;
>  
>               if (msgs[i].flags & I2C_M_RD) {
> -                     ret =  geni_i2c_gpi(gi2c, &msgs[i], &config,
> +                     ret =  geni_i2c_gpi(gi2c, msgs, &config,
>                                           &rx_addr, &rx_buf, I2C_READ, 
> gi2c->rx_c);
>                       if (ret)
>                               goto err;
> @@ -643,18 +784,26 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, 
> struct i2c_msg msgs[], i
>                       dma_async_issue_pending(gi2c->rx_c);
>               }
>  
> -             dma_async_issue_pending(gi2c->tx_c);
> -
> -             time_left = wait_for_completion_timeout(&gi2c->done, 
> XFER_TIMEOUT);
> -             if (!time_left)
> -                     gi2c->err = -ETIMEDOUT;
> +             if (!gi2c->is_tx_multi_xfer) {
> +                     dma_async_issue_pending(gi2c->tx_c);
> +                     time_left = wait_for_completion_timeout(&gi2c->done, 
> XFER_TIMEOUT);

By making this conditional on !is_tx_multi_xfer transfers, what makes
the loop wait for the transfer to complete before you below unmap the
buffers?

> +                     if (!time_left) {
> +                             dev_err(gi2c->se.dev, "%s:I2C timeout\n", 
> __func__);
> +                             gi2c->err = -ETIMEDOUT;
> +                     }
> +             }
>  
>               if (gi2c->err) {
>                       ret = gi2c->err;
>                       goto err;
>               }
>  
> -             geni_i2c_gpi_unmap(gi2c, &msgs[i], tx_buf, tx_addr, rx_buf, 
> rx_addr);
> +             if (!gi2c->is_tx_multi_xfer) {
> +                     geni_i2c_gpi_unmap(gi2c, &msgs[i], tx_buf, tx_addr, 
> rx_buf, rx_addr);
> +             } else if (gi2c->tx_irq_cnt != tx_multi_xfer->irq_cnt) {
> +                     gi2c->tx_irq_cnt = tx_multi_xfer->irq_cnt;
> +                     gpi_i2c_multi_desc_unmap(gi2c, msgs, &peripheral);
> +             }
>       }
>  
>       return num;
> @@ -663,7 +812,11 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, 
> struct i2c_msg msgs[], i
>       dev_err(gi2c->se.dev, "GPI transfer failed: %d\n", ret);
>       dmaengine_terminate_sync(gi2c->rx_c);
>       dmaengine_terminate_sync(gi2c->tx_c);
> -     geni_i2c_gpi_unmap(gi2c, &msgs[i], tx_buf, tx_addr, rx_buf, rx_addr);
> +     if (gi2c->is_tx_multi_xfer)
> +             gpi_i2c_multi_desc_unmap(gi2c, msgs, &peripheral);
> +     else
> +             geni_i2c_gpi_unmap(gi2c, &msgs[i], tx_buf, tx_addr, rx_buf, 
> rx_addr);
> +

As above, it would be nice if multi-xfer was just a special case with a
single buffer; rather than inflating the cyclomatic complexity.

Regards,
Bjorn

>       return ret;
>  }
>  
> -- 
> 2.17.1
> 
> 

Reply via email to