On 2015-10-03 01:05, Peter Rosin wrote:
> Hi!
> 
> I recently upgraded from the atmel linux-3.18-at91 kernel to vanilla 4.2
> and everything seemed fine. Until I tried to write to the little eeprom
> chip. I then tried the linux-4.1-at91 kernel and that suffers too.
> 
> The symptoms are that it seems like writes get interrupted, and restarted
> again without properly initializing everything again. Inspecting the i2c
> bus during these fails gets me something like this (int hex) when I
> 
> echo abcdefghijklmnopqr > /sys/bus/i2c/devices/0-0050/eeprom
> 
> S a0 00 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f 70 P
> S a0 10 (clk and data low for a "long" time) 10 71 72 0a P
> 
> Notice how the address byte in the second chunk (10) is repeated after
> the strange event on the i2c bus.
> 
> I looked around and found that if I revert 
> a839ce663b3183209fdf7b1fc4796bfe2a4679c3
> "eeprom: at24: extend driver to allow writing via i2c_smbus_write_byte_data"
> eeprom writing starts working again.
> 
> AFAICT, the i2c-at91 bus driver makes the eeprom driver use the
> i2c_transfer code path both with that patch and with it reverted,
> so I sadly don't see why the patch makes a difference.
> 
> I'm on a board that is based on the sama5d31 evaluation kit, with a
> NXP SE97BTP,547 chip and this in the devicetree:
> 
>                       i2c0: i2c@f0014000 {
>                               status = "okay";
> 
>                               jc42@18 {
>                                       compatible = "jc42";
>                                       reg = <0x18>;
>                               };
> 
>                               eeprom@50 {
>                                       compatible = "24c02";
>                                       reg = <0x50>;
>                                       pagesize = <16>;
>                               };
>                       };

Ok, I found the culprit, and I double and triple checked it this time...

If I move to the very latest on the linux-3.18-at91 branch, the bug is
there too. Which made it vastly more palatable to bisect the bug.

The offender (in the 4.2 kernel) is 93563a6a71bb69dd324fc7354c60fb05f84aae6b
"i2c: at91: fix a race condition when using the DMA controller"
which is far more understandable. Ao, adding Cyrille Pitchen to the Cc list.

If I add that patch on top of my previously working tree, it behaves just
as newer kernels, i.e. equally bad. The patch doesn't revert cleanly, but
reverting the patch and quick-n-dirty-fixing the conflict on vanilla 4.2
makes the problem go away.

I have attached what I actually reverted.

Cheers,
Peter

>From d178e0636358e61503ac55d39c8612ef93c1d893 Mon Sep 17 00:00:00 2001
From: Peter Rosin <p...@axentia.se>
Date: Mon, 5 Oct 2015 10:16:18 +0200
Subject: [PATCH] Revert "i2c: at91: fix a race condition when using the DMA
 controller"

This reverts commit 93563a6a71bb69dd324fc7354c60fb05f84aae6b.

Conflicts:
	drivers/i2c/busses/i2c-at91.c
---
 drivers/i2c/busses/i2c-at91.c |   97 +++++++++--------------------------------
 1 file changed, 21 insertions(+), 76 deletions(-)

diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c
index 1c758cd1e1ba..b5a5ef26b142 100644
--- a/drivers/i2c/busses/i2c-at91.c
+++ b/drivers/i2c/busses/i2c-at91.c
@@ -74,9 +74,6 @@
 #define	AT91_TWI_NACK		BIT(8)	/* Not Acknowledged */
 #define	AT91_TWI_LOCK		BIT(23) /* TWI Lock due to Frame Errors */
 
-#define	AT91_TWI_INT_MASK \
-	(AT91_TWI_TXCOMP | AT91_TWI_RXRDY | AT91_TWI_TXRDY | AT91_TWI_NACK)
-
 #define	AT91_TWI_IER		0x0024	/* Interrupt Enable Register */
 #define	AT91_TWI_IDR		0x0028	/* Interrupt Disable Register */
 #define	AT91_TWI_IMR		0x002c	/* Interrupt Mask Register */
@@ -155,12 +152,13 @@ static void at91_twi_write(struct at91_twi_dev *dev, unsigned reg, unsigned val)
 
 static void at91_disable_twi_interrupts(struct at91_twi_dev *dev)
 {
-	at91_twi_write(dev, AT91_TWI_IDR, AT91_TWI_INT_MASK);
+	at91_twi_write(dev, AT91_TWI_IDR,
+		       AT91_TWI_TXCOMP | AT91_TWI_RXRDY | AT91_TWI_TXRDY);
 }
 
 static void at91_twi_irq_save(struct at91_twi_dev *dev)
 {
-	dev->imr = at91_twi_read(dev, AT91_TWI_IMR) & AT91_TWI_INT_MASK;
+	dev->imr = at91_twi_read(dev, AT91_TWI_IMR) & 0x7;
 	at91_disable_twi_interrupts(dev);
 }
 
@@ -255,16 +253,7 @@ static void at91_twi_write_data_dma_callback(void *data)
 	dma_unmap_single(dev->dev, sg_dma_address(&dev->dma.sg[0]),
 			 dev->buf_len, DMA_TO_DEVICE);
 
-	/*
-	 * When this callback is called, THR/TX FIFO is likely not to be empty
-	 * yet. So we have to wait for TXCOMP or NACK bits to be set into the
-	 * Status Register to be sure that the STOP bit has been sent and the
-	 * transfer is completed. The NACK interrupt has already been enabled,
-	 * we just have to enable TXCOMP one.
-	 */
-	at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_TXCOMP);
-	if (!dev->pdata->has_alt_cmd)
-		at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
+	at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
 }
 
 static void at91_twi_write_data_dma(struct at91_twi_dev *dev)
@@ -391,13 +380,10 @@ static void at91_twi_read_data_dma_callback(void *data)
 	dma_unmap_single(dev->dev, sg_dma_address(&dev->dma.sg[0]),
 			 dev->buf_len, DMA_FROM_DEVICE);
 
-	if (!dev->pdata->has_alt_cmd) {
-		/* The last two bytes have to be read without using dma */
-		dev->buf += dev->buf_len - 2;
-		dev->buf_len = 2;
-		ier |= AT91_TWI_RXRDY;
-	}
-	at91_twi_write(dev, AT91_TWI_IER, ier);
+	/* The last two bytes have to be read without using dma */
+	dev->buf += dev->buf_len - 2;
+	dev->buf_len = 2;
+	at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_RXRDY);
 }
 
 static void at91_twi_read_data_dma(struct at91_twi_dev *dev)
@@ -473,7 +459,7 @@ static irqreturn_t atmel_twi_interrupt(int irq, void *dev_id)
 	/* catch error flags */
 	dev->transfer_status |= status;
 
-	if (irqstatus & (AT91_TWI_TXCOMP | AT91_TWI_NACK)) {
+	if (irqstatus & AT91_TWI_TXCOMP) {
 		at91_disable_twi_interrupts(dev);
 		complete(&dev->cmd_complete);
 	}
@@ -488,49 +474,6 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev)
 	bool has_unre_flag = dev->pdata->has_unre_flag;
 	bool has_alt_cmd = dev->pdata->has_alt_cmd;
 
-	/*
-	 * WARNING: the TXCOMP bit in the Status Register is NOT a clear on
-	 * read flag but shows the state of the transmission at the time the
-	 * Status Register is read. According to the programmer datasheet,
-	 * TXCOMP is set when both holding register and internal shifter are
-	 * empty and STOP condition has been sent.
-	 * Consequently, we should enable NACK interrupt rather than TXCOMP to
-	 * detect transmission failure.
-	 * Indeed let's take the case of an i2c write command using DMA.
-	 * Whenever the slave doesn't acknowledge a byte, the LOCK, NACK and
-	 * TXCOMP bits are set together into the Status Register.
-	 * LOCK is a clear on write bit, which is set to prevent the DMA
-	 * controller from sending new data on the i2c bus after a NACK
-	 * condition has happened. Once locked, this i2c peripheral stops
-	 * triggering the DMA controller for new data but it is more than
-	 * likely that a new DMA transaction is already in progress, writing
-	 * into the Transmit Holding Register. Since the peripheral is locked,
-	 * these new data won't be sent to the i2c bus but they will remain
-	 * into the Transmit Holding Register, so TXCOMP bit is cleared.
-	 * Then when the interrupt handler is called, the Status Register is
-	 * read: the TXCOMP bit is clear but NACK bit is still set. The driver
-	 * manage the error properly, without waiting for timeout.
-	 * This case can be reproduced easyly when writing into an at24 eeprom.
-	 *
-	 * Besides, the TXCOMP bit is already set before the i2c transaction
-	 * has been started. For read transactions, this bit is cleared when
-	 * writing the START bit into the Control Register. So the
-	 * corresponding interrupt can safely be enabled just after.
-	 * However for write transactions managed by the CPU, we first write
-	 * into THR, so TXCOMP is cleared. Then we can safely enable TXCOMP
-	 * interrupt. If TXCOMP interrupt were enabled before writing into THR,
-	 * the interrupt handler would be called immediately and the i2c command
-	 * would be reported as completed.
-	 * Also when a write transaction is managed by the DMA controller,
-	 * enabling the TXCOMP interrupt in this function may lead to a race
-	 * condition since we don't know whether the TXCOMP interrupt is enabled
-	 * before or after the DMA has started to write into THR. So the TXCOMP
-	 * interrupt is enabled later by at91_twi_write_data_dma_callback().
-	 * Immediately after in that DMA callback, if the alternative command
-	 * mode is not used, we still need to send the STOP condition manually
-	 * writing the corresponding bit into the Control Register.
-	 */
-
 	dev_dbg(dev->dev, "transfer: %s %d bytes.\n",
 		(dev->msg->flags & I2C_M_RD) ? "read" : "write", dev->buf_len);
 
@@ -578,24 +521,26 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev)
 		 * seems to be the best solution.
 		 */
 		if (dev->use_dma && (dev->buf_len > AT91_I2C_DMA_THRESHOLD)) {
-			at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_NACK);
 			at91_twi_read_data_dma(dev);
-		} else {
+			/*
+			 * It is important to enable TXCOMP irq here because
+			 * doing it only when transferring the last two bytes
+			 * will mask NACK errors since TXCOMP is set when a
+			 * NACK occurs.
+			 */
 			at91_twi_write(dev, AT91_TWI_IER,
-				       AT91_TWI_TXCOMP |
-				       AT91_TWI_NACK |
-				       AT91_TWI_RXRDY);
-		}
+			       AT91_TWI_TXCOMP);
+		} else
+			at91_twi_write(dev, AT91_TWI_IER,
+			       AT91_TWI_TXCOMP | AT91_TWI_RXRDY);
 	} else {
 		if (dev->use_dma && (dev->buf_len > AT91_I2C_DMA_THRESHOLD)) {
-			at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_NACK);
 			at91_twi_write_data_dma(dev);
+			at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_TXCOMP);
 		} else {
 			at91_twi_write_next_byte(dev);
 			at91_twi_write(dev, AT91_TWI_IER,
-				       AT91_TWI_TXCOMP |
-				       AT91_TWI_NACK |
-				       AT91_TWI_TXRDY);
+				AT91_TWI_TXCOMP | AT91_TWI_TXRDY);
 		}
 	}
 
-- 
1.7.10.4

Reply via email to