On 2021/04/03 8:05, Sean Anderson wrote: > This rewrites poll_transfer, dw_writer, and dw_reader. > > * We now use RO transfers (instead of always using TR). This eliminates the > need to send out dummy words, and simplifies the transmit logic. > * All parameters (except regs and bits_per_word) are passed explicitly. > * Most parameters have been made explicit (instead of being recalculated on > every loop). > * Transfers are measured in units of frames instead of bytes. This matches > the measurements used by the device and eliminates several divisions by > bits_per_word. > * We now check if we have over-/under-flowed the FIFO. This should help > prevent hangs when the device stops the transfer and U-Boot doesn't > realize it (and then waits for the remaining data forever). TXUI is not > present in DW_APB_SSI, but in the worst case we just don't write data. > > Unfortunately, this doesn't seem to solve all problems, and there are > some remaining bugs (such as some transfers containing all 1s or all 0s) > when we have many fifo overflows. This is solved for DWC devices by > enabling clock stretching. > * poll_transfer is now used by dw_spi_exec_op as well as xfer. > * There are separate inner loops for 8-, 16-, and 32-bit frame sizes. This > should hopefully reduce the amount of over-/under-flows. However, I > haven't done exhaustive testing to ensure this is really necessary. In > particular, I was never able to prevent read overflows at 50MHz clock > speed.
That sounds like the long fight I had on Linux side... Did you account for the fact that the K210 has a buggy fifo depth (it reports 32 but is in fact 31) ? > > These changes should probably have been split up into several commits, but > several depend on each other, and it would be difficult to break this up > while preserving bisectability. > > Signed-off-by: Sean Anderson <sean...@gmail.com> > --- > > (no changes since v1) > > drivers/spi/designware_spi.c | 284 +++++++++++++++++++++-------------- > 1 file changed, 171 insertions(+), 113 deletions(-) > > diff --git a/drivers/spi/designware_spi.c b/drivers/spi/designware_spi.c > index 6375e6d778..72cca14887 100644 > --- a/drivers/spi/designware_spi.c > +++ b/drivers/spi/designware_spi.c > @@ -134,14 +134,10 @@ struct dw_spi_priv { > unsigned int freq; /* Default frequency */ > unsigned int mode; > > - const void *tx; > - const void *tx_end; > - void *rx; > - void *rx_end; > u32 fifo_len; /* depth of the FIFO buffer */ > > int bits_per_word; > - int len; > + int frames; /* Number of frames in the transfer */ > u8 cs; /* chip select pin */ > u8 tmode; /* TR/TO/RO/EEPROM */ > u8 type; /* SPI/SSP/MicroWire */ > @@ -372,14 +368,24 @@ static int dw_spi_probe(struct udevice *bus) > return 0; > } > > -/* Return the max entries we can fill into tx fifo */ > -static inline u32 tx_max(struct dw_spi_priv *priv) > +/** > + * dw_writer() - Write data frames to the tx fifo > + * @priv: Driver private info > + * @tx: The tx buffer > + * @idx: The number of data frames already transmitted > + * @tx_frames: The number of data frames left to transmit > + * @rx_frames: The number of data frames left to receive (0 if only > + * transmitting) > + * @frame_bytes: The number of bytes taken up by one data frame > + * > + * This function writes up to @tx_frames data frames using data from > @tx[@idx]. > + * > + * Return: The number of frames read > + */ > +static uint dw_writer(struct dw_spi_priv *priv, const void *tx, uint idx, > + uint tx_frames, uint rx_frames, uint frame_bytes) > { > - u32 tx_left, tx_room, rxtx_gap; > - > - tx_left = (priv->tx_end - priv->tx) / (priv->bits_per_word >> 3); > - tx_room = priv->fifo_len - dw_read(priv, DW_SPI_TXFLR); > - > + u32 tx_room = priv->fifo_len - dw_read(priv, DW_SPI_TXFLR); > /* > * Another concern is about the tx/rx mismatch, we > * thought about using (priv->fifo_len - rxflr - txflr) as > @@ -388,67 +394,131 @@ static inline u32 tx_max(struct dw_spi_priv *priv) > * shift registers. So a control from sw point of > * view is taken. > */ > - rxtx_gap = ((priv->rx_end - priv->rx) - (priv->tx_end - priv->tx)) / > - (priv->bits_per_word >> 3); > + u32 rxtx_gap = rx_frames - tx_frames; > + u32 count = min3(tx_frames, tx_room, (u32)(priv->fifo_len - rxtx_gap)); > + u32 *dr = priv->regs + DW_SPI_DR; > > - return min3(tx_left, tx_room, (u32)(priv->fifo_len - rxtx_gap)); > -} > + if (!count) > + return 0; > > -/* Return the max entries we should read out of rx fifo */ > -static inline u32 rx_max(struct dw_spi_priv *priv) > -{ > - u32 rx_left = (priv->rx_end - priv->rx) / (priv->bits_per_word >> 3); > - > - return min_t(u32, rx_left, dw_read(priv, DW_SPI_RXFLR)); > -} > - > -static void dw_writer(struct dw_spi_priv *priv) > -{ > - u32 max = tx_max(priv); > - u32 txw = 0xFFFFFFFF; > - > - while (max--) { > - /* Set the tx word if the transfer's original "tx" is not null > */ > - if (priv->tx_end - priv->len) { > - if (priv->bits_per_word == 8) > - txw = *(u8 *)(priv->tx); > - else > - txw = *(u16 *)(priv->tx); > - } > - dw_write(priv, DW_SPI_DR, txw); > - log_content("tx=0x%02x\n", txw); > - priv->tx += priv->bits_per_word >> 3; > +#define do_write(type) do { \ > + type *start = ((type *)tx) + idx; \ > + type *end = start + count; \ > + do { \ > + __raw_writel(*start++, dr); \ > + } while (start < end); \ > +} while (0) > + switch (frame_bytes) { > + case 1: > + do_write(u8); > + break; > + case 2: > + do_write(u16); > + break; > + case 3: > + case 4: > + default: > + do_write(u32); > + break; > } > +#undef do_write > + > + return count; > } > > -static void dw_reader(struct dw_spi_priv *priv) > +/** > + * dw_reader() - Read data frames from the rx fifo > + * @priv: Driver private data > + * @rx: The rx buffer > + * @idx: The number of data frames already received > + * @frames: The number of data frames left to receive > + * @frame_bytes: The size of a data frame in bytes > + * > + * This function reads up to @frames data frames from @rx[@idx]. > + * > + * Return: The number of frames read > + */ > +static uint dw_reader(struct dw_spi_priv *priv, void *rx, uint idx, uint > frames, > + uint frame_bytes) > { > - u32 max = rx_max(priv); > - u16 rxw; > + u32 rx_lvl = dw_read(priv, DW_SPI_RXFLR); > + u32 count = min(frames, rx_lvl); > + u32 *dr = priv->regs + DW_SPI_DR; > > - while (max--) { > - rxw = dw_read(priv, DW_SPI_DR); > - log_content("rx=0x%02x\n", rxw); > + if (!count) > + return 0; > > - /* Care about rx if the transfer's original "rx" is not null */ > - if (priv->rx_end - priv->len) { > - if (priv->bits_per_word == 8) > - *(u8 *)(priv->rx) = rxw; > - else > - *(u16 *)(priv->rx) = rxw; > - } > - priv->rx += priv->bits_per_word >> 3; > +#define do_read(type) do { \ > + type *start = ((type *)rx) + idx; \ > + type *end = start + count; \ > + do { \ > + *start++ = __raw_readl(dr); \ > + } while (start < end); \ > +} while (0) > + switch (frame_bytes) { > + case 1: > + do_read(u8); > + break; > + case 2: > + do_read(u16); > + break; > + case 3: > + case 4: > + default: > + do_read(u32); > + break; > } > +#undef do_read > + > + return count; > } > > -static int poll_transfer(struct dw_spi_priv *priv) > +/** > + * poll_transfer() - Transmit and receive data frames > + * @priv: Driver private data > + * @tx: The tx buffer. May be %NULL to only receive. > + * @rx: The rx buffer. May be %NULL to discard read data. > + * @frames: The number of data frames to transfer > + * > + * Transmit @tx, while recieving @rx. > + * > + * Return: The lesser of the number of frames transmitted or received. > + */ > +static uint poll_transfer(struct dw_spi_priv *priv, const void *tx, void *rx, > + uint frames) > { > - do { > - dw_writer(priv); > - dw_reader(priv); > - } while (priv->rx_end > priv->rx); > + uint frame_bytes = priv->bits_per_word >> 3; > + uint tx_idx = 0; > + uint rx_idx = 0; > + uint tx_frames = tx ? frames : 0; > + uint rx_frames = rx ? frames : 0; > > - return 0; > + while (tx_frames || rx_frames) { > + if (tx_frames) { > + uint tx_diff = dw_writer(priv, tx, tx_idx, tx_frames, > + rx_frames, frame_bytes); > + > + tx_idx += tx_diff; > + tx_frames -= tx_diff; > + } > + > + if (rx_frames) { > + uint rx_diff = dw_reader(priv, rx, rx_idx, rx_frames, > + frame_bytes); > + > + rx_idx += rx_diff; > + rx_frames -= rx_diff; > + } > + > + /* > + * If we don't read/write fast enough, the transfer stops. > + * Don't bother reading out what's left in the FIFO; it's > + * garbage. > + */ > + if (dw_read(priv, DW_SPI_RISR) & (ISR_RXOI | ISR_TXUI)) > + break; > + } > + return min(tx ? tx_idx : rx_idx, rx ? rx_idx : tx_idx); > } > > /* > @@ -474,19 +544,21 @@ static int dw_spi_xfer(struct udevice *dev, unsigned > int bitlen, > { > struct udevice *bus = dev->parent; > struct dw_spi_priv *priv = dev_get_priv(bus); > - const u8 *tx = dout; > + const void *tx = dout; > u8 *rx = din; > int ret = 0; > u32 cr0 = 0; > - u32 val; > - u32 cs; > + u32 val, cs; > + uint frames; > > /* spi core configured to do 8 bit transfers */ > - if (bitlen % 8) { > + if (bitlen % priv->bits_per_word) { > dev_err(dev, "Non byte aligned SPI transfer.\n"); > return -1; > } > > + frames = bitlen / priv->bits_per_word; > + > /* Start the transaction if necessary. */ > if (flags & SPI_XFER_BEGIN) > external_cs_manage(dev, false); > @@ -496,30 +568,22 @@ static int dw_spi_xfer(struct udevice *dev, unsigned > int bitlen, > else if (rx) > priv->tmode = CTRLR0_TMOD_RO; > else > - /* > - * In transmit only mode (CTRL0_TMOD_TO) input FIFO never gets > - * any data which breaks our logic in poll_transfer() above. > - */ > - priv->tmode = CTRLR0_TMOD_TR; > + priv->tmode = CTRLR0_TMOD_TO; > > cr0 = dw_spi_update_cr0(priv); > > - priv->len = bitlen >> 3; > - > - priv->tx = (void *)tx; > - priv->tx_end = priv->tx + priv->len; > - priv->rx = rx; > - priv->rx_end = priv->rx + priv->len; > - > /* Disable controller before writing control registers */ > dw_write(priv, DW_SPI_SSIENR, 0); > > - dev_dbg(dev, "cr0=%08x rx=%p tx=%p len=%d [bytes]\n", cr0, rx, tx, > - priv->len); > + dev_dbg(dev, "cr0=%08x rx=%p tx=%p frames=%d\n", cr0, rx, tx, > + frames); > /* Reprogram cr0 only if changed */ > if (dw_read(priv, DW_SPI_CTRLR0) != cr0) > dw_write(priv, DW_SPI_CTRLR0, cr0); > > + if (rx) > + dw_write(priv, DW_SPI_CTRLR1, frames - 1); > + > /* > * Configure the desired SS (slave select 0...3) in the controller > * The DW SPI controller will activate and deactivate this CS > @@ -531,8 +595,15 @@ static int dw_spi_xfer(struct udevice *dev, unsigned int > bitlen, > /* Enable controller after writing control registers */ > dw_write(priv, DW_SPI_SSIENR, 1); > > + /* > + * Prime the pump. RO-mode doesn't work unless something gets written to > + * the FIFO > + */ > + if (rx && !tx) > + dw_write(priv, DW_SPI_DR, 0xFFFFFFFF); > + > /* Start transfer in a polling loop */ > - ret = poll_transfer(priv); > + poll_transfer(priv, tx, rx, frames); > > /* > * Wait for current transmit operation to complete. > @@ -565,9 +636,10 @@ static int dw_spi_exec_op(struct spi_slave *slave, const > struct spi_mem_op *op) > int pos, i, ret = 0; > struct udevice *bus = slave->dev->parent; > struct dw_spi_priv *priv = dev_get_priv(bus); > + struct spi_mem_op *mut_op = (struct spi_mem_op *)op; > u8 op_len = sizeof(op->cmd.opcode) + op->addr.nbytes + op->dummy.nbytes; > u8 op_buf[op_len]; > - u32 cr0; > + u32 cr0, val; > > if (read) > priv->tmode = CTRLR0_TMOD_EPROMREAD; > @@ -597,47 +669,33 @@ static int dw_spi_exec_op(struct spi_slave *slave, > const struct spi_mem_op *op) > if (op->dummy.nbytes) > memset(op_buf + pos, 0xff, op->dummy.nbytes); > > - external_cs_manage(slave->dev, false); > + dw_writer(priv, &op_buf, 0, op_len, 0, sizeof(u8)); > > - priv->tx = &op_buf; > - priv->tx_end = priv->tx + op_len; > - priv->rx = NULL; > - priv->rx_end = NULL; > - while (priv->tx != priv->tx_end) > - dw_writer(priv); > + external_cs_manage(slave->dev, false); > + dw_write(priv, DW_SPI_SER, 1 << spi_chip_select(slave->dev)); > > /* > * XXX: The following are tight loops! Enabling debug messages may cause > * them to fail because we are not reading/writing the fifo fast enough. > */ > - if (read) { > - priv->rx = op->data.buf.in; > - priv->rx_end = priv->rx + op->data.nbytes; > + if (read) > + mut_op->data.nbytes = poll_transfer(priv, NULL, op->data.buf.in, > + op->data.nbytes); > + else > + mut_op->data.nbytes = poll_transfer(priv, op->data.buf.out, > + NULL, op->data.nbytes); > > - dw_write(priv, DW_SPI_SER, 1 << spi_chip_select(slave->dev)); > - while (priv->rx != priv->rx_end) > - dw_reader(priv); > - } else { > - u32 val; > - > - priv->tx = op->data.buf.out; > - priv->tx_end = priv->tx + op->data.nbytes; > - > - /* Fill up the write fifo before starting the transfer */ > - dw_writer(priv); > - dw_write(priv, DW_SPI_SER, 1 << spi_chip_select(slave->dev)); > - while (priv->tx != priv->tx_end) > - dw_writer(priv); > - > - if (readl_poll_timeout(priv->regs + DW_SPI_SR, val, > - (val & SR_TF_EMPT) && !(val & SR_BUSY), > - RX_TIMEOUT * 1000)) { > - dev_dbg(bus, "timed out; sr=%x\n", > - dw_read(priv, DW_SPI_SR)); > - ret = -ETIMEDOUT; > - } > + /* > + * Ensure the data (or the instruction for zero-data instructions) has > + * been transmitted from the fifo/shift register before disabling the > + * device. > + */ > + if (readl_poll_timeout(priv->regs + DW_SPI_SR, val, > + (val & SR_TF_EMPT) && !(val & SR_BUSY), > + RX_TIMEOUT * 1000)) { > + dev_dbg(bus, "timed out; sr=%x\n", dw_read(priv, DW_SPI_SR)); > + ret = -ETIMEDOUT; > } > - > dw_write(priv, DW_SPI_SER, 0); > external_cs_manage(slave->dev, true); > > -- Damien Le Moal Western Digital Research