Hi,

here is mv_cesa TDMA support for the kirkwood target. I only adapted
the patches from Phil Sutter on the linux-crypto list and adjusted the
kernel modules Makefile.

The speed improvement on a dockstar is only around 25% from 13MB/s to
16MB/s on an encrypted file system.

The original patches can be found here:
 http://www.mail-archive.com/linux-crypto@vger.kernel.org/msg07101.html
 http://www.mail-archive.com/linux-crypto@vger.kernel.org/msg07115.html


Index: target/linux/kirkwood/patches-3.3/300-mv_cesa-tdma.patch
===================================================================
--- target/linux/kirkwood/patches-3.3/300-mv_cesa-tdma.patch    (revision 0)
+++ target/linux/kirkwood/patches-3.3/300-mv_cesa-tdma.patch    (revision 0)
@@ -0,0 +1,1495 @@
+--- a/arch/arm/mach-kirkwood/common.c
++++ b/arch/arm/mach-kirkwood/common.c
+@@ -268,9 +268,42 @@ void __init kirkwood_uart1_init(void)
+ /*****************************************************************************
+  * Cryptographic Engines and Security Accelerator (CESA)
+  ****************************************************************************/
++static struct resource kirkwood_tdma_res[] = {
++      {
++              .name   = "regs deco",
++              .start  = CRYPTO_PHYS_BASE + 0xA00,
++              .end    = CRYPTO_PHYS_BASE + 0xA24,
++              .flags  = IORESOURCE_MEM,
++      }, {
++              .name   = "regs control and error",
++              .start  = CRYPTO_PHYS_BASE + 0x800,
++              .end    = CRYPTO_PHYS_BASE + 0x8CF,
++              .flags  = IORESOURCE_MEM,
++      }, {
++              .name   = "crypto error",
++              .start  = IRQ_KIRKWOOD_TDMA_ERR,
++              .end    = IRQ_KIRKWOOD_TDMA_ERR,
++              .flags  = IORESOURCE_IRQ,
++      },
++};
++
++static u64 mv_tdma_dma_mask = 0xffffffffUL;
++
++static struct platform_device kirkwood_tdma_device = {
++      .name           = "mv_tdma",
++      .id             = -1,
++      .dev            = {
++              .dma_mask               = &mv_tdma_dma_mask,
++              .coherent_dma_mask      = 0xffffffff,
++      },
++      .num_resources  = ARRAY_SIZE(kirkwood_tdma_res),
++      .resource       = kirkwood_tdma_res,
++};
++
+ void __init kirkwood_crypto_init(void)
+ {
+       kirkwood_clk_ctrl |= CGC_CRYPTO;
++      platform_device_register(&kirkwood_tdma_device);
+       orion_crypto_init(CRYPTO_PHYS_BASE, KIRKWOOD_SRAM_PHYS_BASE,
+                         KIRKWOOD_SRAM_SIZE, IRQ_KIRKWOOD_CRYPTO);
+ }
+--- a/arch/arm/mach-kirkwood/include/mach/irqs.h
++++ b/arch/arm/mach-kirkwood/include/mach/irqs.h
+@@ -51,6 +51,7 @@
+ #define IRQ_KIRKWOOD_GPIO_HIGH_16_23  41
+ #define IRQ_KIRKWOOD_GE00_ERR 46
+ #define IRQ_KIRKWOOD_GE01_ERR 47
++#define IRQ_KIRKWOOD_TDMA_ERR 49
+ #define IRQ_KIRKWOOD_RTC        53
+ 
+ /*
+--- a/arch/arm/plat-orion/common.c
++++ b/arch/arm/plat-orion/common.c
+@@ -911,9 +911,15 @@ static struct resource orion_crypto_reso
+       },
+ };
+ 
++static u64 mv_crypto_dmamask = DMA_BIT_MASK(32);
++
+ static struct platform_device orion_crypto = {
+       .name           = "mv_crypto",
+       .id             = -1,
++      .dev            = {
++              .dma_mask = &mv_crypto_dmamask,
++              .coherent_dma_mask = DMA_BIT_MASK(32),
++      },
+ };
+ 
+ void __init orion_crypto_init(unsigned long mapbase,
+--- /dev/null
++++ b/drivers/crypto/dma_desclist.h
+@@ -0,0 +1,79 @@
++#ifndef __DMA_DESCLIST__
++#define __DMA_DESCLIST__
++
++struct dma_desc {
++      void *virt;
++      dma_addr_t phys;
++};
++
++struct dma_desclist {
++      struct dma_pool *itempool;
++      struct dma_desc *desclist;
++      unsigned long length;
++      unsigned long usage;
++};
++
++#define DESCLIST_ITEM(dl, x)          ((dl).desclist[(x)].virt)
++#define DESCLIST_ITEM_DMA(dl, x)      ((dl).desclist[(x)].phys)
++#define DESCLIST_FULL(dl)             ((dl).length == (dl).usage)
++
++static inline int
++init_dma_desclist(struct dma_desclist *dl, struct device *dev,
++              size_t size, size_t align, size_t boundary)
++{
++#define STRX(x) #x
++#define STR(x) STRX(x)
++      dl->itempool = dma_pool_create(
++                      "DMA Desclist Pool at "__FILE__"("STR(__LINE__)")",
++                      dev, size, align, boundary);
++#undef STR
++#undef STRX
++      if (!dl->itempool)
++              return 1;
++      dl->desclist = NULL;
++      dl->length = dl->usage = 0;
++      return 0;
++}
++
++static inline int
++set_dma_desclist_size(struct dma_desclist *dl, unsigned long nelem)
++{
++      /* need to increase size first if requested */
++      if (nelem > dl->length) {
++              struct dma_desc *newmem;
++              int newsize = nelem * sizeof(struct dma_desc);
++
++              newmem = krealloc(dl->desclist, newsize, GFP_KERNEL);
++              if (!newmem)
++                      return -ENOMEM;
++              dl->desclist = newmem;
++      }
++
++      /* allocate/free dma descriptors, adjusting dl->length on the go */
++      for (; dl->length < nelem; dl->length++) {
++              DESCLIST_ITEM(*dl, dl->length) = dma_pool_alloc(dl->itempool,
++                              GFP_KERNEL, &DESCLIST_ITEM_DMA(*dl, 
dl->length));
++              if (!DESCLIST_ITEM(*dl, dl->length))
++                      return -ENOMEM;
++      }
++      for (; dl->length > nelem; dl->length--)
++              dma_pool_free(dl->itempool, DESCLIST_ITEM(*dl, dl->length - 1),
++                              DESCLIST_ITEM_DMA(*dl, dl->length - 1));
++
++      /* ignore size decreases but those to zero */
++      if (!nelem) {
++              kfree(dl->desclist);
++              dl->desclist = 0;
++      }
++      return 0;
++}
++
++static inline void
++fini_dma_desclist(struct dma_desclist *dl)
++{
++      set_dma_desclist_size(dl, 0);
++      dma_pool_destroy(dl->itempool);
++      dl->length = dl->usage = 0;
++}
++
++#endif /* __DMA_DESCLIST__ */
+--- a/drivers/crypto/Kconfig
++++ b/drivers/crypto/Kconfig
+@@ -167,6 +167,10 @@ config CRYPTO_GHASH_S390
+ 
+         It is available as of z196.
+ 
++config CRYPTO_DEV_MV_TDMA
++      tristate
++      default no
++
+ config CRYPTO_DEV_MV_CESA
+       tristate "Marvell's Cryptographic Engine"
+       depends on PLAT_ORION
+@@ -175,6 +179,7 @@ config CRYPTO_DEV_MV_CESA
+       select CRYPTO_HASH2
+       select CRYPTO_BLKCIPHER2
+       select CRYPTO_HASH
++      select CRYPTO_DEV_MV_TDMA
+       help
+         This driver allows you to utilize the Cryptographic Engines and
+         Security Accelerator (CESA) which can be found on the Marvell Orion
+--- a/drivers/crypto/Makefile
++++ b/drivers/crypto/Makefile
+@@ -4,6 +4,7 @@ obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-
+ obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o
+ n2_crypto-y := n2_core.o n2_asm.o
+ obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
++obj-$(CONFIG_CRYPTO_DEV_MV_TDMA) += mv_tdma.o
+ obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
+ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
+ obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
+--- a/drivers/crypto/mv_cesa.c
++++ b/drivers/crypto/mv_cesa.c
+@@ -9,6 +9,8 @@
+ #include <crypto/aes.h>
+ #include <crypto/algapi.h>
+ #include <linux/crypto.h>
++#include <linux/dma-mapping.h>
++#include <linux/dmapool.h>
+ #include <linux/interrupt.h>
+ #include <linux/io.h>
+ #include <linux/kthread.h>
+@@ -20,9 +22,17 @@
+ #include <crypto/sha.h>
+ 
+ #include "mv_cesa.h"
++#include "mv_tdma.h"
++#include "dma_desclist.h"
+ 
+ #define MV_CESA       "MV-CESA:"
+ #define MAX_HW_HASH_SIZE      0xFFFF
++#define MV_CESA_EXPIRE                500 /* msec */
++
++#define MV_DMA_INIT_POOLSIZE 16
++#define MV_DMA_ALIGN 16
++
++static int count_sgs(struct scatterlist *, unsigned int);
+ 
+ /*
+  * STM:
+@@ -42,13 +52,12 @@ enum engine_status {
+ 
+ /**
+  * struct req_progress - used for every crypt request
+- * @src_sg_it:                sg iterator for src
+- * @dst_sg_it:                sg iterator for dst
++ * @src_sg:           sg list for src
++ * @dst_sg:           sg list for dst
+  * @sg_src_left:      bytes left in src to process (scatter list)
+  * @src_start:                offset to add to src start position (scatter 
list)
+  * @crypt_len:                length of current hw crypt/hash process
+  * @hw_nbytes:                total bytes to process in hw for this request
+- * @copy_back:                whether to copy data back (crypt) or not (hash)
+  * @sg_dst_left:      bytes left dst to process in this scatter list
+  * @dst_start:                offset to add to dst start position (scatter 
list)
+  * @hw_processed_bytes:       number of bytes processed by hw (request).
+@@ -58,10 +67,9 @@ enum engine_status {
+  * track of progress within current scatterlist.
+  */
+ struct req_progress {
+-      struct sg_mapping_iter src_sg_it;
+-      struct sg_mapping_iter dst_sg_it;
++      struct scatterlist *src_sg;
++      struct scatterlist *dst_sg;
+       void (*complete) (void);
+-      void (*process) (int is_first);
+ 
+       /* src mostly */
+       int sg_src_left;
+@@ -69,15 +77,34 @@ struct req_progress {
+       int crypt_len;
+       int hw_nbytes;
+       /* dst mostly */
+-      int copy_back;
+       int sg_dst_left;
+       int dst_start;
+       int hw_processed_bytes;
+ };
+ 
++struct sec_accel_sram {
++      struct sec_accel_config op;
++      union {
++              struct {
++                      u32 key[8];
++                      u32 iv[4];
++              } crypt;
++              struct {
++                      u32 ivi[5];
++                      u32 ivo[5];
++              } hash;
++      } type;
++#define sa_key        type.crypt.key
++#define sa_iv type.crypt.iv
++#define sa_ivi        type.hash.ivi
++#define sa_ivo        type.hash.ivo
++} __attribute__((packed));
++
+ struct crypto_priv {
++      struct device *dev;
+       void __iomem *reg;
+       void __iomem *sram;
++      u32 sram_phys;
+       int irq;
+       struct task_struct *queue_th;
+ 
+@@ -85,16 +112,25 @@ struct crypto_priv {
+       spinlock_t lock;
+       struct crypto_queue queue;
+       enum engine_status eng_st;
++      struct timer_list completion_timer;
+       struct crypto_async_request *cur_req;
+       struct req_progress p;
+       int max_req_size;
+       int sram_size;
+       int has_sha1;
+       int has_hmac_sha1;
++
++      struct sec_accel_sram sa_sram;
++      dma_addr_t sa_sram_dma;
++
++      struct dma_desclist desclist;
+ };
+ 
+ static struct crypto_priv *cpg;
+ 
++#define ITEM(x)               ((u32 *)DESCLIST_ITEM(cpg->desclist, x))
++#define ITEM_DMA(x)   DESCLIST_ITEM_DMA(cpg->desclist, x)
++
+ struct mv_ctx {
+       u8 aes_enc_key[AES_KEY_LEN];
+       u32 aes_dec_key[8];
+@@ -129,13 +165,69 @@ struct mv_req_hash_ctx {
+       u64 count;
+       u32 state[SHA1_DIGEST_SIZE / 4];
+       u8 buffer[SHA1_BLOCK_SIZE];
++      dma_addr_t buffer_dma;
+       int first_hash;         /* marks that we don't have previous state */
+       int last_chunk;         /* marks that this is the 'final' request */
+       int extra_bytes;        /* unprocessed bytes in buffer */
++      int digestsize;         /* size of the digest */
+       enum hash_op op;
+       int count_add;
++      dma_addr_t result_dma;
+ };
+ 
++static void mv_completion_timer_callback(unsigned long unused)
++{
++      int active = readl(cpg->reg + SEC_ACCEL_CMD) & SEC_CMD_EN_SEC_ACCL0;
++
++      printk(KERN_ERR MV_CESA
++             "completion timer expired (CESA %sactive), cleaning up.\n",
++             active ? "" : "in");
++
++      del_timer(&cpg->completion_timer);
++      writel(SEC_CMD_DISABLE_SEC, cpg->reg + SEC_ACCEL_CMD);
++      while(readl(cpg->reg + SEC_ACCEL_CMD) & SEC_CMD_DISABLE_SEC)
++              printk(KERN_INFO MV_CESA "%s: waiting for engine finishing\n", 
__func__);
++      cpg->eng_st = ENGINE_W_DEQUEUE;
++      wake_up_process(cpg->queue_th);
++}
++
++static void mv_setup_timer(void)
++{
++      setup_timer(&cpg->completion_timer, &mv_completion_timer_callback, 0);
++      mod_timer(&cpg->completion_timer,
++                      jiffies + msecs_to_jiffies(MV_CESA_EXPIRE));
++}
++
++static inline void mv_tdma_u32_copy(dma_addr_t dst, u32 val)
++{
++      if (unlikely(DESCLIST_FULL(cpg->desclist)) &&
++          set_dma_desclist_size(&cpg->desclist, cpg->desclist.length << 1)) {
++              printk(KERN_ERR MV_CESA "resizing poolsize to %lu failed\n",
++                              cpg->desclist.length << 1);
++              return;
++      }
++      *ITEM(cpg->desclist.usage) = val;
++      mv_tdma_memcpy(dst, ITEM_DMA(cpg->desclist.usage), sizeof(u32));
++      cpg->desclist.usage++;
++}
++
++static inline bool
++mv_dma_map_sg(struct scatterlist *sg, int nbytes, enum dma_data_direction dir)
++{
++      int nents = count_sgs(sg, nbytes);
++
++      if (nbytes && dma_map_sg(cpg->dev, sg, nents, dir) != nents)
++              return false;
++      return true;
++}
++
++static inline void
++mv_dma_unmap_sg(struct scatterlist *sg, int nbytes, enum dma_data_direction 
dir)
++{
++      if (nbytes)
++              dma_unmap_sg(cpg->dev, sg, count_sgs(sg, nbytes), dir);
++}
++
+ static void compute_aes_dec_key(struct mv_ctx *ctx)
+ {
+       struct crypto_aes_ctx gen_aes_key;
+@@ -185,19 +277,19 @@ static int mv_setkey_aes(struct crypto_a
+ 
+ static void copy_src_to_buf(struct req_progress *p, char *dbuf, int len)
+ {
+-      int ret;
+       void *sbuf;
+       int copy_len;
+ 
+       while (len) {
+               if (!p->sg_src_left) {
+-                      ret = sg_miter_next(&p->src_sg_it);
+-                      BUG_ON(!ret);
+-                      p->sg_src_left = p->src_sg_it.length;
++                      /* next sg please */
++                      p->src_sg = sg_next(p->src_sg);
++                      BUG_ON(!p->src_sg);
++                      p->sg_src_left = p->src_sg->length;
+                       p->src_start = 0;
+               }
+ 
+-              sbuf = p->src_sg_it.addr + p->src_start;
++              sbuf = sg_virt(p->src_sg) + p->src_start;
+ 
+               copy_len = min(p->sg_src_left, len);
+               memcpy(dbuf, sbuf, copy_len);
+@@ -210,73 +302,123 @@ static void copy_src_to_buf(struct req_p
+       }
+ }
+ 
++static void dma_copy_src_to_buf(struct req_progress *p, dma_addr_t dbuf, int 
len)
++{
++      dma_addr_t sbuf;
++      int copy_len;
++
++      while (len) {
++              if (!p->sg_src_left) {
++                      /* next sg please */
++                      p->src_sg = sg_next(p->src_sg);
++                      BUG_ON(!p->src_sg);
++                      p->sg_src_left = sg_dma_len(p->src_sg);
++                      p->src_start = 0;
++              }
++
++              sbuf = sg_dma_address(p->src_sg) + p->src_start;
++
++              copy_len = min(p->sg_src_left, len);
++              mv_tdma_memcpy(dbuf, sbuf, copy_len);
++
++              p->src_start += copy_len;
++              p->sg_src_left -= copy_len;
++
++              len -= copy_len;
++              dbuf += copy_len;
++      }
++}
++
++static void dma_copy_buf_to_dst(struct req_progress *p, dma_addr_t sbuf, int 
len)
++{
++      dma_addr_t dbuf;
++      int copy_len;
++
++      while (len) {
++              if (!p->sg_dst_left) {
++                      /* next sg please */
++                      p->dst_sg = sg_next(p->dst_sg);
++                      BUG_ON(!p->dst_sg);
++                      p->sg_dst_left = sg_dma_len(p->dst_sg);
++                      p->dst_start = 0;
++              }
++
++              dbuf = sg_dma_address(p->dst_sg) + p->dst_start;
++
++              copy_len = min(p->sg_dst_left, len);
++              mv_tdma_memcpy(dbuf, sbuf, copy_len);
++
++              p->dst_start += copy_len;
++              p->sg_dst_left -= copy_len;
++
++              len -= copy_len;
++              sbuf += copy_len;
++      }
++}
++
+ static void setup_data_in(void)
+ {
+       struct req_progress *p = &cpg->p;
+       int data_in_sram =
+           min(p->hw_nbytes - p->hw_processed_bytes, cpg->max_req_size);
+-      copy_src_to_buf(p, cpg->sram + SRAM_DATA_IN_START + p->crypt_len,
++      dma_copy_src_to_buf(p, cpg->sram_phys + SRAM_DATA_IN_START + 
p->crypt_len,
+                       data_in_sram - p->crypt_len);
+       p->crypt_len = data_in_sram;
+ }
+ 
+-static void mv_process_current_q(int first_block)
++static void mv_init_crypt_config(struct ablkcipher_request *req)
+ {
+-      struct ablkcipher_request *req = ablkcipher_request_cast(cpg->cur_req);
+       struct mv_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+       struct mv_req_ctx *req_ctx = ablkcipher_request_ctx(req);
+-      struct sec_accel_config op;
++      struct sec_accel_config *op = &cpg->sa_sram.op;
+ 
+       switch (req_ctx->op) {
+       case COP_AES_ECB:
+-              op.config = CFG_OP_CRYPT_ONLY | CFG_ENCM_AES | CFG_ENC_MODE_ECB;
++              op->config = CFG_OP_CRYPT_ONLY | CFG_ENCM_AES | 
CFG_ENC_MODE_ECB;
+               break;
+       case COP_AES_CBC:
+       default:
+-              op.config = CFG_OP_CRYPT_ONLY | CFG_ENCM_AES | CFG_ENC_MODE_CBC;
+-              op.enc_iv = ENC_IV_POINT(SRAM_DATA_IV) |
++              op->config = CFG_OP_CRYPT_ONLY | CFG_ENCM_AES | 
CFG_ENC_MODE_CBC;
++              op->enc_iv = ENC_IV_POINT(SRAM_DATA_IV) |
+                       ENC_IV_BUF_POINT(SRAM_DATA_IV_BUF);
+-              if (first_block)
+-                      memcpy(cpg->sram + SRAM_DATA_IV, req->info, 16);
++              memcpy(cpg->sa_sram.sa_iv, req->info, 16);
+               break;
+       }
+       if (req_ctx->decrypt) {
+-              op.config |= CFG_DIR_DEC;
+-              memcpy(cpg->sram + SRAM_DATA_KEY_P, ctx->aes_dec_key,
+-                              AES_KEY_LEN);
++              op->config |= CFG_DIR_DEC;
++              memcpy(cpg->sa_sram.sa_key, ctx->aes_dec_key, AES_KEY_LEN);
+       } else {
+-              op.config |= CFG_DIR_ENC;
+-              memcpy(cpg->sram + SRAM_DATA_KEY_P, ctx->aes_enc_key,
+-                              AES_KEY_LEN);
++              op->config |= CFG_DIR_ENC;
++              memcpy(cpg->sa_sram.sa_key, ctx->aes_enc_key, AES_KEY_LEN);
+       }
+ 
+       switch (ctx->key_len) {
+       case AES_KEYSIZE_128:
+-              op.config |= CFG_AES_LEN_128;
++              op->config |= CFG_AES_LEN_128;
+               break;
+       case AES_KEYSIZE_192:
+-              op.config |= CFG_AES_LEN_192;
++              op->config |= CFG_AES_LEN_192;
+               break;
+       case AES_KEYSIZE_256:
+-              op.config |= CFG_AES_LEN_256;
++              op->config |= CFG_AES_LEN_256;
+               break;
+       }
+-      op.enc_p = ENC_P_SRC(SRAM_DATA_IN_START) |
++      op->enc_p = ENC_P_SRC(SRAM_DATA_IN_START) |
+               ENC_P_DST(SRAM_DATA_OUT_START);
+-      op.enc_key_p = SRAM_DATA_KEY_P;
+-
+-      setup_data_in();
+-      op.enc_len = cpg->p.crypt_len;
+-      memcpy(cpg->sram + SRAM_CONFIG, &op,
+-                      sizeof(struct sec_accel_config));
++      op->enc_key_p = SRAM_DATA_KEY_P;
++      op->enc_len = cpg->p.crypt_len;
+ 
+-      /* GO */
+-      writel(SEC_CMD_EN_SEC_ACCL0, cpg->reg + SEC_ACCEL_CMD);
++      dma_sync_single_for_device(cpg->dev, cpg->sa_sram_dma,
++                      sizeof(struct sec_accel_sram), DMA_TO_DEVICE);
++      mv_tdma_memcpy(cpg->sram_phys + SRAM_CONFIG, cpg->sa_sram_dma,
++                      sizeof(struct sec_accel_sram));
++}
+ 
+-      /*
+-       * XXX: add timer if the interrupt does not occur for some mystery
+-       * reason
+-       */
++static void mv_update_crypt_config(void)
++{
++      /* update the enc_len field only */
++      mv_tdma_u32_copy(cpg->sram_phys + SRAM_CONFIG + 2 * sizeof(u32),
++                      (u32)cpg->p.crypt_len);
+ }
+ 
+ static void mv_crypto_algo_completion(void)
+@@ -284,8 +426,12 @@ static void mv_crypto_algo_completion(vo
+       struct ablkcipher_request *req = ablkcipher_request_cast(cpg->cur_req);
+       struct mv_req_ctx *req_ctx = ablkcipher_request_ctx(req);
+ 
+-      sg_miter_stop(&cpg->p.src_sg_it);
+-      sg_miter_stop(&cpg->p.dst_sg_it);
++      if (req->src == req->dst) {
++              mv_dma_unmap_sg(req->src, req->nbytes, DMA_BIDIRECTIONAL);
++      } else {
++              mv_dma_unmap_sg(req->src, req->nbytes, DMA_TO_DEVICE);
++              mv_dma_unmap_sg(req->dst, req->nbytes, DMA_FROM_DEVICE);
++      }
+ 
+       if (req_ctx->op != COP_AES_CBC)
+               return ;
+@@ -293,37 +439,33 @@ static void mv_crypto_algo_completion(vo
+       memcpy(req->info, cpg->sram + SRAM_DATA_IV_BUF, 16);
+ }
+ 
+-static void mv_process_hash_current(int first_block)
++static void mv_init_hash_config(struct ahash_request *req)
+ {
+-      struct ahash_request *req = ahash_request_cast(cpg->cur_req);
+       const struct mv_tfm_hash_ctx *tfm_ctx = crypto_tfm_ctx(req->base.tfm);
+       struct mv_req_hash_ctx *req_ctx = ahash_request_ctx(req);
+       struct req_progress *p = &cpg->p;
+-      struct sec_accel_config op = { 0 };
++      struct sec_accel_config *op = &cpg->sa_sram.op;
+       int is_last;
+ 
+       switch (req_ctx->op) {
+       case COP_SHA1:
+       default:
+-              op.config = CFG_OP_MAC_ONLY | CFG_MACM_SHA1;
++              op->config = CFG_OP_MAC_ONLY | CFG_MACM_SHA1;
+               break;
+       case COP_HMAC_SHA1:
+-              op.config = CFG_OP_MAC_ONLY | CFG_MACM_HMAC_SHA1;
+-              memcpy(cpg->sram + SRAM_HMAC_IV_IN,
++              op->config = CFG_OP_MAC_ONLY | CFG_MACM_HMAC_SHA1;
++              memcpy(cpg->sa_sram.sa_ivi,
+                               tfm_ctx->ivs, sizeof(tfm_ctx->ivs));
+               break;
+       }
+ 
+-      op.mac_src_p =
+-              MAC_SRC_DATA_P(SRAM_DATA_IN_START) | MAC_SRC_TOTAL_LEN((u32)
+-              req_ctx->
+-              count);
+-
+-      setup_data_in();
++      op->mac_src_p =
++              MAC_SRC_DATA_P(SRAM_DATA_IN_START) |
++              MAC_SRC_TOTAL_LEN((u32)req_ctx->count);
+ 
+-      op.mac_digest =
++      op->mac_digest =
+               MAC_DIGEST_P(SRAM_DIGEST_BUF) | MAC_FRAG_LEN(p->crypt_len);
+-      op.mac_iv =
++      op->mac_iv =
+               MAC_INNER_IV_P(SRAM_HMAC_IV_IN) |
+               MAC_OUTER_IV_P(SRAM_HMAC_IV_OUT);
+ 
+@@ -332,35 +474,59 @@ static void mv_process_hash_current(int
+               && (req_ctx->count <= MAX_HW_HASH_SIZE);
+       if (req_ctx->first_hash) {
+               if (is_last)
+-                      op.config |= CFG_NOT_FRAG;
++                      op->config |= CFG_NOT_FRAG;
+               else
+-                      op.config |= CFG_FIRST_FRAG;
++                      op->config |= CFG_FIRST_FRAG;
+ 
+               req_ctx->first_hash = 0;
+       } else {
+               if (is_last)
+-                      op.config |= CFG_LAST_FRAG;
++                      op->config |= CFG_LAST_FRAG;
+               else
+-                      op.config |= CFG_MID_FRAG;
++                      op->config |= CFG_MID_FRAG;
+ 
+-              if (first_block) {
+-                      writel(req_ctx->state[0], cpg->reg + 
DIGEST_INITIAL_VAL_A);
+-                      writel(req_ctx->state[1], cpg->reg + 
DIGEST_INITIAL_VAL_B);
+-                      writel(req_ctx->state[2], cpg->reg + 
DIGEST_INITIAL_VAL_C);
+-                      writel(req_ctx->state[3], cpg->reg + 
DIGEST_INITIAL_VAL_D);
+-                      writel(req_ctx->state[4], cpg->reg + 
DIGEST_INITIAL_VAL_E);
+-              }
++              writel(req_ctx->state[0], cpg->reg + DIGEST_INITIAL_VAL_A);
++              writel(req_ctx->state[1], cpg->reg + DIGEST_INITIAL_VAL_B);
++              writel(req_ctx->state[2], cpg->reg + DIGEST_INITIAL_VAL_C);
++              writel(req_ctx->state[3], cpg->reg + DIGEST_INITIAL_VAL_D);
++              writel(req_ctx->state[4], cpg->reg + DIGEST_INITIAL_VAL_E);
+       }
+ 
+-      memcpy(cpg->sram + SRAM_CONFIG, &op, sizeof(struct sec_accel_config));
++      dma_sync_single_for_device(cpg->dev, cpg->sa_sram_dma,
++                      sizeof(struct sec_accel_sram), DMA_TO_DEVICE);
++      mv_tdma_memcpy(cpg->sram_phys + SRAM_CONFIG, cpg->sa_sram_dma,
++                      sizeof(struct sec_accel_sram));
++}
+ 
+-      /* GO */
+-      writel(SEC_CMD_EN_SEC_ACCL0, cpg->reg + SEC_ACCEL_CMD);
++static void mv_update_hash_config(struct ahash_request *req)
++{
++      struct mv_req_hash_ctx *req_ctx = ahash_request_ctx(req);
++      struct req_progress *p = &cpg->p;
++      int is_last;
++      u32 val;
++
++      /* update only the config (for changed fragment state) and
++       * mac_digest (for changed frag len) fields */
+ 
+-      /*
+-      * XXX: add timer if the interrupt does not occur for some mystery
+-      * reason
+-      */
++      switch (req_ctx->op) {
++      case COP_SHA1:
++      default:
++              val = CFG_OP_MAC_ONLY | CFG_MACM_SHA1;
++              break;
++      case COP_HMAC_SHA1:
++              val = CFG_OP_MAC_ONLY | CFG_MACM_HMAC_SHA1;
++              break;
++      }
++
++      is_last = req_ctx->last_chunk
++              && (p->hw_processed_bytes + p->crypt_len >= p->hw_nbytes)
++              && (req_ctx->count <= MAX_HW_HASH_SIZE);
++
++      val |= is_last ? CFG_LAST_FRAG : CFG_MID_FRAG;
++      mv_tdma_u32_copy(cpg->sram_phys + SRAM_CONFIG, val);
++
++      val = MAC_DIGEST_P(SRAM_DIGEST_BUF) | MAC_FRAG_LEN(p->crypt_len);
++      mv_tdma_u32_copy(cpg->sram_phys + SRAM_CONFIG + 6 * sizeof(u32), val);
+ }
+ 
+ static inline int mv_hash_import_sha1_ctx(const struct mv_req_hash_ctx *ctx,
+@@ -404,6 +570,15 @@ out:
+       return rc;
+ }
+ 
++static void mv_save_digest_state(struct mv_req_hash_ctx *ctx)
++{
++      ctx->state[0] = readl(cpg->reg + DIGEST_INITIAL_VAL_A);
++      ctx->state[1] = readl(cpg->reg + DIGEST_INITIAL_VAL_B);
++      ctx->state[2] = readl(cpg->reg + DIGEST_INITIAL_VAL_C);
++      ctx->state[3] = readl(cpg->reg + DIGEST_INITIAL_VAL_D);
++      ctx->state[4] = readl(cpg->reg + DIGEST_INITIAL_VAL_E);
++}
++
+ static void mv_hash_algo_completion(void)
+ {
+       struct ahash_request *req = ahash_request_cast(cpg->cur_req);
+@@ -411,72 +586,39 @@ static void mv_hash_algo_completion(void
+ 
+       if (ctx->extra_bytes)
+               copy_src_to_buf(&cpg->p, ctx->buffer, ctx->extra_bytes);
+-      sg_miter_stop(&cpg->p.src_sg_it);
+ 
+       if (likely(ctx->last_chunk)) {
+-              if (likely(ctx->count <= MAX_HW_HASH_SIZE)) {
+-                      memcpy(req->result, cpg->sram + SRAM_DIGEST_BUF,
+-                             crypto_ahash_digestsize(crypto_ahash_reqtfm
+-                                                     (req)));
+-              } else
++              dma_unmap_single(cpg->dev, ctx->result_dma,
++                              ctx->digestsize, DMA_FROM_DEVICE);
++
++              dma_unmap_single(cpg->dev, ctx->buffer_dma,
++                              SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
++
++              if (unlikely(ctx->count > MAX_HW_HASH_SIZE)) {
++                      mv_save_digest_state(ctx);
+                       mv_hash_final_fallback(req);
++              }
+       } else {
+-              ctx->state[0] = readl(cpg->reg + DIGEST_INITIAL_VAL_A);
+-              ctx->state[1] = readl(cpg->reg + DIGEST_INITIAL_VAL_B);
+-              ctx->state[2] = readl(cpg->reg + DIGEST_INITIAL_VAL_C);
+-              ctx->state[3] = readl(cpg->reg + DIGEST_INITIAL_VAL_D);
+-              ctx->state[4] = readl(cpg->reg + DIGEST_INITIAL_VAL_E);
++              mv_save_digest_state(ctx);
+       }
++
++      mv_dma_unmap_sg(req->src, req->nbytes, DMA_TO_DEVICE);
+ }
+ 
+ static void dequeue_complete_req(void)
+ {
+       struct crypto_async_request *req = cpg->cur_req;
+-      void *buf;
+-      int ret;
+-      cpg->p.hw_processed_bytes += cpg->p.crypt_len;
+-      if (cpg->p.copy_back) {
+-              int need_copy_len = cpg->p.crypt_len;
+-              int sram_offset = 0;
+-              do {
+-                      int dst_copy;
+-
+-                      if (!cpg->p.sg_dst_left) {
+-                              ret = sg_miter_next(&cpg->p.dst_sg_it);
+-                              BUG_ON(!ret);
+-                              cpg->p.sg_dst_left = cpg->p.dst_sg_it.length;
+-                              cpg->p.dst_start = 0;
+-                      }
+-
+-                      buf = cpg->p.dst_sg_it.addr;
+-                      buf += cpg->p.dst_start;
+-
+-                      dst_copy = min(need_copy_len, cpg->p.sg_dst_left);
+ 
+-                      memcpy(buf,
+-                             cpg->sram + SRAM_DATA_OUT_START + sram_offset,
+-                             dst_copy);
+-                      sram_offset += dst_copy;
+-                      cpg->p.sg_dst_left -= dst_copy;
+-                      need_copy_len -= dst_copy;
+-                      cpg->p.dst_start += dst_copy;
+-              } while (need_copy_len > 0);
+-      }
+-
+-      cpg->p.crypt_len = 0;
++      mv_tdma_clear();
++      cpg->desclist.usage = 0;
+ 
+       BUG_ON(cpg->eng_st != ENGINE_W_DEQUEUE);
+-      if (cpg->p.hw_processed_bytes < cpg->p.hw_nbytes) {
+-              /* process next scatter list entry */
+-              cpg->eng_st = ENGINE_BUSY;
+-              cpg->p.process(0);
+-      } else {
+-              cpg->p.complete();
+-              cpg->eng_st = ENGINE_IDLE;
+-              local_bh_disable();
+-              req->complete(req, 0);
+-              local_bh_enable();
+-      }
++
++      cpg->p.complete();
++      cpg->eng_st = ENGINE_IDLE;
++      local_bh_disable();
++      req->complete(req, 0);
++      local_bh_enable();
+ }
+ 
+ static int count_sgs(struct scatterlist *sl, unsigned int total_bytes)
+@@ -499,33 +641,68 @@ static int count_sgs(struct scatterlist
+ static void mv_start_new_crypt_req(struct ablkcipher_request *req)
+ {
+       struct req_progress *p = &cpg->p;
+-      int num_sgs;
+ 
+       cpg->cur_req = &req->base;
+       memset(p, 0, sizeof(struct req_progress));
+       p->hw_nbytes = req->nbytes;
+       p->complete = mv_crypto_algo_completion;
+-      p->process = mv_process_current_q;
+-      p->copy_back = 1;
+ 
+-      num_sgs = count_sgs(req->src, req->nbytes);
+-      sg_miter_start(&p->src_sg_it, req->src, num_sgs, SG_MITER_FROM_SG);
++      /* assume inplace request */
++      if (req->src == req->dst) {
++              if (!mv_dma_map_sg(req->src, req->nbytes, DMA_BIDIRECTIONAL))
++                      return;
++      } else {
++              if (!mv_dma_map_sg(req->src, req->nbytes, DMA_TO_DEVICE))
++                      return;
++
++              if (!mv_dma_map_sg(req->dst, req->nbytes, DMA_FROM_DEVICE)) {
++                      mv_dma_unmap_sg(req->src, req->nbytes, DMA_TO_DEVICE);
++                      return;
++              }
++      }
++
++      p->src_sg = req->src;
++      p->dst_sg = req->dst;
++      if (req->nbytes) {
++              BUG_ON(!req->src);
++              BUG_ON(!req->dst);
++              p->sg_src_left = sg_dma_len(req->src);
++              p->sg_dst_left = sg_dma_len(req->dst);
++      }
++
++      setup_data_in();
++      mv_init_crypt_config(req);
++      mv_tdma_separator();
++      dma_copy_buf_to_dst(&cpg->p, cpg->sram_phys + SRAM_DATA_OUT_START, 
cpg->p.crypt_len);
++      cpg->p.hw_processed_bytes += cpg->p.crypt_len;
++      while (cpg->p.hw_processed_bytes < cpg->p.hw_nbytes) {
++              cpg->p.crypt_len = 0;
++
++              setup_data_in();
++              mv_update_crypt_config();
++              mv_tdma_separator();
++              dma_copy_buf_to_dst(&cpg->p, cpg->sram_phys + 
SRAM_DATA_OUT_START, cpg->p.crypt_len);
++              cpg->p.hw_processed_bytes += cpg->p.crypt_len;
++      }
+ 
+-      num_sgs = count_sgs(req->dst, req->nbytes);
+-      sg_miter_start(&p->dst_sg_it, req->dst, num_sgs, SG_MITER_TO_SG);
+ 
+-      mv_process_current_q(1);
++      /* GO */
++      mv_setup_timer();
++      mv_tdma_trigger();
++      writel(SEC_CMD_EN_SEC_ACCL0, cpg->reg + SEC_ACCEL_CMD);
+ }
+ 
+ static void mv_start_new_hash_req(struct ahash_request *req)
+ {
+       struct req_progress *p = &cpg->p;
+       struct mv_req_hash_ctx *ctx = ahash_request_ctx(req);
+-      int num_sgs, hw_bytes, old_extra_bytes, rc;
++      int hw_bytes, old_extra_bytes, rc;
++
+       cpg->cur_req = &req->base;
+       memset(p, 0, sizeof(struct req_progress));
+       hw_bytes = req->nbytes + ctx->extra_bytes;
+       old_extra_bytes = ctx->extra_bytes;
++      ctx->digestsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req));
+ 
+       ctx->extra_bytes = hw_bytes % SHA1_BLOCK_SIZE;
+       if (ctx->extra_bytes != 0
+@@ -534,25 +711,13 @@ static void mv_start_new_hash_req(struct
+       else
+               ctx->extra_bytes = 0;
+ 
+-      num_sgs = count_sgs(req->src, req->nbytes);
+-      sg_miter_start(&p->src_sg_it, req->src, num_sgs, SG_MITER_FROM_SG);
+-
+-      if (hw_bytes) {
+-              p->hw_nbytes = hw_bytes;
+-              p->complete = mv_hash_algo_completion;
+-              p->process = mv_process_hash_current;
+-
+-              if (unlikely(old_extra_bytes)) {
+-                      memcpy(cpg->sram + SRAM_DATA_IN_START, ctx->buffer,
+-                             old_extra_bytes);
+-                      p->crypt_len = old_extra_bytes;
++      if (unlikely(!hw_bytes)) { /* too little data for CESA */
++              if (req->nbytes) {
++                      p->src_sg = req->src;
++                      p->sg_src_left = req->src->length;
++                      copy_src_to_buf(p, ctx->buffer + old_extra_bytes,
++                                      req->nbytes);
+               }
+-
+-              mv_process_hash_current(1);
+-      } else {
+-              copy_src_to_buf(p, ctx->buffer + old_extra_bytes,
+-                              ctx->extra_bytes - old_extra_bytes);
+-              sg_miter_stop(&p->src_sg_it);
+               if (ctx->last_chunk)
+                       rc = mv_hash_final_fallback(req);
+               else
+@@ -561,7 +726,60 @@ static void mv_start_new_hash_req(struct
+               local_bh_disable();
+               req->base.complete(&req->base, rc);
+               local_bh_enable();
++              return;
++      }
++
++      if (likely(req->nbytes)) {
++              BUG_ON(!req->src);
++
++              if (!mv_dma_map_sg(req->src, req->nbytes, DMA_TO_DEVICE)) {
++                      printk(KERN_ERR "%s: out of memory\n", __func__);
++                      return;
++              }
++              p->sg_src_left = sg_dma_len(req->src);
++              p->src_sg = req->src;
++      }
++
++      p->hw_nbytes = hw_bytes;
++      p->complete = mv_hash_algo_completion;
++
++      if (unlikely(old_extra_bytes)) {
++              dma_sync_single_for_device(cpg->dev, ctx->buffer_dma,
++                              SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
++              mv_tdma_memcpy(cpg->sram_phys + SRAM_DATA_IN_START,
++                              ctx->buffer_dma, old_extra_bytes);
++              p->crypt_len = old_extra_bytes;
++      }
++
++      setup_data_in();
++      mv_init_hash_config(req);
++      mv_tdma_separator();
++      cpg->p.hw_processed_bytes += cpg->p.crypt_len;
++      while (cpg->p.hw_processed_bytes < cpg->p.hw_nbytes) {
++              cpg->p.crypt_len = 0;
++
++              setup_data_in();
++              mv_update_hash_config(req);
++              mv_tdma_separator();
++              cpg->p.hw_processed_bytes += cpg->p.crypt_len;
++      }
++      if (req->result) {
++              ctx->result_dma = dma_map_single(cpg->dev, req->result,
++                              ctx->digestsize, DMA_FROM_DEVICE);
++              mv_tdma_memcpy(ctx->result_dma,
++                              cpg->sram_phys + SRAM_DIGEST_BUF,
++                              ctx->digestsize);
++      } else {
++              /* XXX: this fixes some ugly register fuckup bug in the tdma 
engine
++               *      (no need to sync since the data is ignored anyway) */
++              mv_tdma_memcpy(cpg->sa_sram_dma,
++                              cpg->sram_phys + SRAM_CONFIG, 1);
+       }
++
++      /* GO */
++      mv_setup_timer();
++      mv_tdma_trigger();
++      writel(SEC_CMD_EN_SEC_ACCL0, cpg->reg + SEC_ACCEL_CMD);
+ }
+ 
+ static int queue_manag(void *data)
+@@ -684,6 +902,8 @@ static void mv_init_hash_req_ctx(struct
+       ctx->first_hash = 1;
+       ctx->last_chunk = is_last;
+       ctx->count_add = count_add;
++      ctx->buffer_dma = dma_map_single(cpg->dev, ctx->buffer,
++                      SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+ }
+ 
+ static void mv_update_hash_req_ctx(struct mv_req_hash_ctx *ctx, int is_last,
+@@ -883,11 +1103,14 @@ irqreturn_t crypto_int(int irq, void *pr
+       u32 val;
+ 
+       val = readl(cpg->reg + SEC_ACCEL_INT_STATUS);
+-      if (!(val & SEC_INT_ACCEL0_DONE))
++      if (!(val & SEC_INT_ACC0_IDMA_DONE))
+               return IRQ_NONE;
+ 
+-      val &= ~SEC_INT_ACCEL0_DONE;
+-      writel(val, cpg->reg + FPGA_INT_STATUS);
++      if (!del_timer(&cpg->completion_timer)) {
++              printk(KERN_WARNING MV_CESA
++                     "got an interrupt but no pending timer?\n");
++      }
++      val &= ~SEC_INT_ACC0_IDMA_DONE;
+       writel(val, cpg->reg + SEC_ACCEL_INT_STATUS);
+       BUG_ON(cpg->eng_st != ENGINE_BUSY);
+       cpg->eng_st = ENGINE_W_DEQUEUE;
+@@ -1022,6 +1245,7 @@ static int mv_probe(struct platform_devi
+       }
+       cp->sram_size = resource_size(res);
+       cp->max_req_size = cp->sram_size - SRAM_CFG_SPACE;
++      cp->sram_phys = res->start;
+       cp->sram = ioremap(res->start, cp->sram_size);
+       if (!cp->sram) {
+               ret = -ENOMEM;
+@@ -1037,6 +1261,7 @@ static int mv_probe(struct platform_devi
+ 
+       platform_set_drvdata(pdev, cp);
+       cpg = cp;
++      cpg->dev = &pdev->dev;
+ 
+       cp->queue_th = kthread_run(queue_manag, cp, "mv_crypto");
+       if (IS_ERR(cp->queue_th)) {
+@@ -1049,15 +1274,30 @@ static int mv_probe(struct platform_devi
+       if (ret)
+               goto err_thread;
+ 
+-      writel(SEC_INT_ACCEL0_DONE, cpg->reg + SEC_ACCEL_INT_MASK);
+-      writel(SEC_CFG_STOP_DIG_ERR, cpg->reg + SEC_ACCEL_CFG);
++      writel(0, cpg->reg + SEC_ACCEL_INT_STATUS);
++      writel(SEC_INT_ACC0_IDMA_DONE, cpg->reg + SEC_ACCEL_INT_MASK);
++      writel((SEC_CFG_STOP_DIG_ERR | SEC_CFG_CH0_W_IDMA | SEC_CFG_MP_CHAIN |
++              SEC_CFG_ACT_CH0_IDMA), cpg->reg + SEC_ACCEL_CFG);
+       writel(SRAM_CONFIG, cpg->reg + SEC_ACCEL_DESC_P0);
+ 
++      cp->sa_sram_dma = dma_map_single(&pdev->dev, &cp->sa_sram,
++                      sizeof(struct sec_accel_sram), DMA_TO_DEVICE);
++
++      if (init_dma_desclist(&cpg->desclist, &pdev->dev,
++                              sizeof(u32), MV_DMA_ALIGN, 0)) {
++              ret = -ENOMEM;
++              goto err_mapping;
++      }
++      if (set_dma_desclist_size(&cpg->desclist, MV_DMA_INIT_POOLSIZE)) {
++              printk(KERN_ERR MV_CESA "failed to initialise poolsize\n");
++              goto err_pool;
++      }
++
+       ret = crypto_register_alg(&mv_aes_alg_ecb);
+       if (ret) {
+               printk(KERN_WARNING MV_CESA
+                      "Could not register aes-ecb driver\n");
+-              goto err_irq;
++              goto err_pool;
+       }
+ 
+       ret = crypto_register_alg(&mv_aes_alg_cbc);
+@@ -1084,7 +1324,11 @@ static int mv_probe(struct platform_devi
+       return 0;
+ err_unreg_ecb:
+       crypto_unregister_alg(&mv_aes_alg_ecb);
+-err_irq:
++err_pool:
++      fini_dma_desclist(&cpg->desclist);
++err_mapping:
++      dma_unmap_single(&pdev->dev, cpg->sa_sram_dma,
++                      sizeof(struct sec_accel_sram), DMA_TO_DEVICE);
+       free_irq(irq, cp);
+ err_thread:
+       kthread_stop(cp->queue_th);
+@@ -1111,6 +1355,9 @@ static int mv_remove(struct platform_dev
+               crypto_unregister_ahash(&mv_hmac_sha1_alg);
+       kthread_stop(cp->queue_th);
+       free_irq(cp->irq, cp);
++      dma_unmap_single(&pdev->dev, cpg->sa_sram_dma,
++                      sizeof(struct sec_accel_sram), DMA_TO_DEVICE);
++      fini_dma_desclist(&cpg->desclist);
+       memset(cp->sram, 0, cp->sram_size);
+       iounmap(cp->sram);
+       iounmap(cp->reg);
+--- a/drivers/crypto/mv_cesa.h
++++ b/drivers/crypto/mv_cesa.h
+@@ -24,18 +24,12 @@
+ #define SEC_CFG_CH1_W_IDMA    (1 << 8)
+ #define SEC_CFG_ACT_CH0_IDMA  (1 << 9)
+ #define SEC_CFG_ACT_CH1_IDMA  (1 << 10)
++#define SEC_CFG_MP_CHAIN      (1 << 11)
+ 
+ #define SEC_ACCEL_STATUS      0xde0c
+ #define SEC_ST_ACT_0          (1 << 0)
+ #define SEC_ST_ACT_1          (1 << 1)
+ 
+-/*
+- * FPGA_INT_STATUS looks like a FPGA leftover and is documented only in Errata
+- * 4.12. It looks like that it was part of an IRQ-controller in FPGA and
+- * someone forgot to remove  it while switching to the core and moving to
+- * SEC_ACCEL_INT_STATUS.
+- */
+-#define FPGA_INT_STATUS               0xdd68
+ #define SEC_ACCEL_INT_STATUS  0xde20
+ #define SEC_INT_AUTH_DONE     (1 << 0)
+ #define SEC_INT_DES_E_DONE    (1 << 1)
+--- /dev/null
++++ b/drivers/crypto/mv_tdma.c
+@@ -0,0 +1,340 @@
++/*
++ * Support for Marvell's TDMA engine found on Kirkwood chips,
++ * used exclusively by the CESA crypto accelerator.
++ *
++ * Based on unpublished code for IDMA written by Sebastian Siewior.
++ *
++ * Copyright (C) 2012 Phil Sutter <phil.sutter <at> viprinet.com>
++ * License: GPLv2
++ */
++
++#include <linux/delay.h>
++#include <linux/dma-mapping.h>
++#include <linux/dmapool.h>
++#include <linux/interrupt.h>
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/platform_device.h>
++
++#include "mv_tdma.h"
++#include "dma_desclist.h"
++
++#define MV_TDMA "MV-TDMA: "
++
++#define MV_DMA_INIT_POOLSIZE 16
++#define MV_DMA_ALIGN 16
++
++struct tdma_desc {
++      u32 count;
++      u32 src;
++      u32 dst;
++      u32 next;
++} __attribute__((packed));
++
++struct tdma_priv {
++      struct device *dev;
++      void __iomem *reg;
++      int irq;
++      /* protecting the dma descriptors and stuff */
++      spinlock_t lock;
++      struct dma_desclist desclist;
++} tpg;
++
++#define ITEM(x)               ((struct tdma_desc 
*)DESCLIST_ITEM(tpg.desclist, x))
++#define ITEM_DMA(x)   DESCLIST_ITEM_DMA(tpg.desclist, x)
++
++static inline void wait_for_tdma_idle(void)
++{
++      while (readl(tpg.reg + TDMA_CTRL) & TDMA_CTRL_ACTIVE)
++              mdelay(100);
++}
++
++static inline void switch_tdma_engine(bool state)
++{
++      u32 val = readl(tpg.reg + TDMA_CTRL);
++
++      val |=  ( state * TDMA_CTRL_ENABLE);
++      val &= ~(!state * TDMA_CTRL_ENABLE);
++
++      writel(val, tpg.reg + TDMA_CTRL);
++}
++
++static struct tdma_desc *get_new_last_desc(void)
++{
++      if (unlikely(DESCLIST_FULL(tpg.desclist)) &&
++          set_dma_desclist_size(&tpg.desclist, tpg.desclist.length << 1)) {
++              printk(KERN_ERR MV_TDMA "failed to increase DMA pool to %lu\n",
++                              tpg.desclist.length << 1);
++              return NULL;
++      }
++
++      if (likely(tpg.desclist.usage))
++              ITEM(tpg.desclist.usage - 1)->next =
++                      ITEM_DMA(tpg.desclist.usage);
++
++      return ITEM(tpg.desclist.usage++);
++}
++
++static inline void mv_tdma_desc_dump(void)
++{
++      struct tdma_desc *tmp;
++      int i;
++
++      if (!tpg.desclist.usage) {
++              printk(KERN_WARNING MV_TDMA "DMA descriptor list is empty\n");
++              return;
++      }
++
++      printk(KERN_WARNING MV_TDMA "DMA descriptor list:\n");
++      for (i = 0; i < tpg.desclist.usage; i++) {
++              tmp = ITEM(i);
++              printk(KERN_WARNING MV_TDMA "entry %d at 0x%x: dma addr 0x%x, "
++                     "src 0x%x, dst 0x%x, count %u, own %d, next 0x%x", i,
++                     (u32)tmp, ITEM_DMA(i) , tmp->src, tmp->dst,
++                     tmp->count & ~TDMA_OWN_BIT, !!(tmp->count & 
TDMA_OWN_BIT),
++                     tmp->next);
++      }
++}
++
++static inline void mv_tdma_reg_dump(void)
++{
++#define PRINTREG(offset) \
++      printk(KERN_WARNING MV_TDMA "tpg.reg + " #offset " = 0x%x\n", \
++                      readl(tpg.reg + offset))
++
++      PRINTREG(TDMA_CTRL);
++      PRINTREG(TDMA_BYTE_COUNT);
++      PRINTREG(TDMA_SRC_ADDR);
++      PRINTREG(TDMA_DST_ADDR);
++      PRINTREG(TDMA_NEXT_DESC);
++      PRINTREG(TDMA_CURR_DESC);
++
++#undef PRINTREG
++}
++
++void mv_tdma_clear(void)
++{
++      if (!tpg.dev)
++              return;
++
++      spin_lock(&tpg.lock);
++
++      /* make sure tdma is idle */
++      wait_for_tdma_idle();
++      switch_tdma_engine(0);
++      wait_for_tdma_idle();
++
++      /* clear descriptor registers */
++      writel(0, tpg.reg + TDMA_BYTE_COUNT);
++      writel(0, tpg.reg + TDMA_CURR_DESC);
++      writel(0, tpg.reg + TDMA_NEXT_DESC);
++
++      tpg.desclist.usage = 0;
++
++      switch_tdma_engine(1);
++
++      /* finally free system lock again */
++      spin_unlock(&tpg.lock);
++}
++EXPORT_SYMBOL_GPL(mv_tdma_clear);
++
++void mv_tdma_trigger(void)
++{
++      if (!tpg.dev)
++              return;
++
++      spin_lock(&tpg.lock);
++
++      writel(ITEM_DMA(0), tpg.reg + TDMA_NEXT_DESC);
++
++      spin_unlock(&tpg.lock);
++}
++EXPORT_SYMBOL_GPL(mv_tdma_trigger);
++
++void mv_tdma_separator(void)
++{
++      struct tdma_desc *tmp;
++
++      if (!tpg.dev)
++              return;
++
++      spin_lock(&tpg.lock);
++
++      tmp = get_new_last_desc();
++      memset(tmp, 0, sizeof(*tmp));
++
++      spin_unlock(&tpg.lock);
++}
++EXPORT_SYMBOL_GPL(mv_tdma_separator);
++
++void mv_tdma_memcpy(dma_addr_t dst, dma_addr_t src, unsigned int size)
++{
++      struct tdma_desc *tmp;
++
++      if (!tpg.dev)
++              return;
++
++      spin_lock(&tpg.lock);
++
++      tmp = get_new_last_desc();
++      tmp->count = size | TDMA_OWN_BIT;
++      tmp->src = src;
++      tmp->dst = dst;
++      tmp->next = 0;
++
++      spin_unlock(&tpg.lock);
++}
++EXPORT_SYMBOL_GPL(mv_tdma_memcpy);
++
++irqreturn_t tdma_int(int irq, void *priv)
++{
++      u32 val;
++
++      val = readl(tpg.reg + TDMA_ERR_CAUSE);
++
++      if (val & TDMA_INT_MISS)
++              printk(KERN_ERR MV_TDMA "%s: miss!\n", __func__);
++      if (val & TDMA_INT_DOUBLE_HIT)
++              printk(KERN_ERR MV_TDMA "%s: double hit!\n", __func__);
++      if (val & TDMA_INT_BOTH_HIT)
++              printk(KERN_ERR MV_TDMA "%s: both hit!\n", __func__);
++      if (val & TDMA_INT_DATA_ERROR)
++              printk(KERN_ERR MV_TDMA "%s: data error!\n", __func__);
++      if (val) {
++              mv_tdma_reg_dump();
++              mv_tdma_desc_dump();
++      }
++
++      switch_tdma_engine(0);
++      wait_for_tdma_idle();
++
++      /* clear descriptor registers */
++      writel(0, tpg.reg + TDMA_BYTE_COUNT);
++      writel(0, tpg.reg + TDMA_SRC_ADDR);
++      writel(0, tpg.reg + TDMA_DST_ADDR);
++      writel(0, tpg.reg + TDMA_CURR_DESC);
++
++      /* clear error cause register */
++      writel(0, tpg.reg + TDMA_ERR_CAUSE);
++
++      /* initialize control register (also enables engine) */
++      writel(TDMA_CTRL_INIT_VALUE, tpg.reg + TDMA_CTRL);
++      wait_for_tdma_idle();
++
++      return (val ? IRQ_HANDLED : IRQ_NONE);
++}
++
++static int mv_probe(struct platform_device *pdev)
++{
++      struct resource *res;
++      int rc;
++
++      if (tpg.dev) {
++              printk(KERN_ERR MV_TDMA "second TDMA device?!\n");
++              return -ENXIO;
++      }
++      tpg.dev = &pdev->dev;
++
++      res = platform_get_resource_byname(pdev,
++                      IORESOURCE_MEM, "regs control and error");
++      if (!res)
++              return -ENXIO;
++
++      if (!(tpg.reg = ioremap(res->start, resource_size(res))))
++              return -ENOMEM;
++
++      tpg.irq = platform_get_irq(pdev, 0);
++      if (tpg.irq < 0 || tpg.irq == NO_IRQ) {
++              rc = -ENXIO;
++              goto out_unmap_reg;
++      }
++
++      if (init_dma_desclist(&tpg.desclist, tpg.dev,
++                      sizeof(struct tdma_desc), MV_DMA_ALIGN, 0)) {
++              rc = -ENOMEM;
++              goto out_free_irq;
++      }
++      if (set_dma_desclist_size(&tpg.desclist, MV_DMA_INIT_POOLSIZE)) {
++              rc = -ENOMEM;
++              goto out_free_desclist;
++      }
++
++      platform_set_drvdata(pdev, &tpg);
++
++      switch_tdma_engine(0);
++      wait_for_tdma_idle();
++
++      /* clear descriptor registers */
++      writel(0, tpg.reg + TDMA_BYTE_COUNT);
++      writel(0, tpg.reg + TDMA_SRC_ADDR);
++      writel(0, tpg.reg + TDMA_DST_ADDR);
++      writel(0, tpg.reg + TDMA_CURR_DESC);
++
++      /* have an ear for occurring errors */
++      writel(TDMA_INT_ALL, tpg.reg + TDMA_ERR_MASK);
++      writel(0, tpg.reg + TDMA_ERR_CAUSE);
++
++      /* initialize control register (also enables engine) */
++      writel(TDMA_CTRL_INIT_VALUE, tpg.reg + TDMA_CTRL);
++      wait_for_tdma_idle();
++
++      if (request_irq(tpg.irq, tdma_int, IRQF_DISABLED,
++                              dev_name(tpg.dev), &tpg)) {
++              rc = -ENXIO;
++              goto out_free_all;
++      }
++
++      spin_lock_init(&tpg.lock);
++
++      printk(KERN_INFO MV_TDMA "up and running, IRQ %d\n", tpg.irq);
++      return 0;
++out_free_all:
++      switch_tdma_engine(0);
++      platform_set_drvdata(pdev, NULL);
++out_free_desclist:
++      fini_dma_desclist(&tpg.desclist);
++out_free_irq:
++      free_irq(tpg.irq, &tpg);
++out_unmap_reg:
++      iounmap(tpg.reg);
++      tpg.dev = NULL;
++      return rc;
++}
++
++static int mv_remove(struct platform_device *pdev)
++{
++      switch_tdma_engine(0);
++      platform_set_drvdata(pdev, NULL);
++      fini_dma_desclist(&tpg.desclist);
++      free_irq(tpg.irq, &tpg);
++      iounmap(tpg.reg);
++      tpg.dev = NULL;
++      return 0;
++}
++
++static struct platform_driver marvell_tdma = {
++      .probe          = mv_probe,
++      .remove         = mv_remove,
++      .driver         = {
++              .owner  = THIS_MODULE,
++              .name   = "mv_tdma",
++      },
++};
++MODULE_ALIAS("platform:mv_tdma");
++
++static int __init mv_tdma_init(void)
++{
++      return platform_driver_register(&marvell_tdma);
++}
++module_init(mv_tdma_init);
++
++static void __exit mv_tdma_exit(void)
++{
++      platform_driver_unregister(&marvell_tdma);
++}
++module_exit(mv_tdma_exit);
++
++MODULE_AUTHOR("Phil Sutter <phil.sutter <at> viprinet.com>");
++MODULE_DESCRIPTION("Support for Marvell's TDMA engine");
++MODULE_LICENSE("GPL");
++
+--- /dev/null
++++ b/drivers/crypto/mv_tdma.h
+@@ -0,0 +1,50 @@
++#ifndef _MV_TDMA_H
++#define _MV_TDMA_H
++
++/* TDMA_CTRL register bits */
++#define TDMA_CTRL_DST_BURST(x)        (x)
++#define TDMA_CTRL_DST_BURST_32        TDMA_CTRL_DST_BURST(3)
++#define TDMA_CTRL_DST_BURST_128       TDMA_CTRL_DST_BURST(4)
++#define TDMA_CTRL_OUTST_RD_EN (1 << 4)
++#define TDMA_CTRL_SRC_BURST(x)        (x << 6)
++#define TDMA_CTRL_SRC_BURST_32        TDMA_CTRL_SRC_BURST(3)
++#define TDMA_CTRL_SRC_BURST_128       TDMA_CTRL_SRC_BURST(4)
++#define TDMA_CTRL_NO_CHAIN_MODE       (1 << 9)
++#define TDMA_CTRL_NO_BYTE_SWAP        (1 << 11)
++#define TDMA_CTRL_ENABLE      (1 << 12)
++#define TDMA_CTRL_FETCH_ND    (1 << 13)
++#define TDMA_CTRL_ACTIVE      (1 << 14)
++
++#define TDMA_CTRL_INIT_VALUE ( \
++      TDMA_CTRL_DST_BURST_128 | TDMA_CTRL_SRC_BURST_128 | \
++      TDMA_CTRL_NO_BYTE_SWAP | TDMA_CTRL_ENABLE \
++)
++
++/* TDMA_ERR_CAUSE bits */
++#define TDMA_INT_MISS         (1 << 0)
++#define TDMA_INT_DOUBLE_HIT   (1 << 1)
++#define TDMA_INT_BOTH_HIT     (1 << 2)
++#define TDMA_INT_DATA_ERROR   (1 << 3)
++#define TDMA_INT_ALL          0x0f
++
++/* offsets of registers, starting at "regs control and error" */
++#define TDMA_BYTE_COUNT               0x00
++#define TDMA_SRC_ADDR         0x10
++#define TDMA_DST_ADDR         0x20
++#define TDMA_NEXT_DESC                0x30
++#define TDMA_CTRL             0x40
++#define TDMA_CURR_DESC                0x70
++#define TDMA_ERR_CAUSE                0xc8
++#define TDMA_ERR_MASK         0xcc
++
++/* Owner bit in TDMA_BYTE_COUNT and descriptors' count field, used
++ * to signal TDMA in descriptor chain when input data is complete. */
++#define TDMA_OWN_BIT          (1 << 31)
++
++extern void mv_tdma_memcpy(dma_addr_t, dma_addr_t, unsigned int);
++extern void mv_tdma_separator(void);
++extern void mv_tdma_clear(void);
++extern void mv_tdma_trigger(void);
++
++
++#endif /* _MV_TDMA_H */
Index: package/kernel/modules/crypto.mk
===================================================================
--- package/kernel/modules/crypto.mk    (revision 32032)
+++ package/kernel/modules/crypto.mk    (working copy)
@@ -476,14 +476,16 @@
   $(call AddDepends/crypto)
 endef
 
-$(eval $(call KernelPackage,crypto-xts))
 
 define KernelPackage/crypto-mv-cesa
   TITLE:=Marvell crypto engine
   DEPENDS:=+kmod-crypto-manager +kmod-crypto-aes @TARGET_kirkwood||TARGET_orion
   KCONFIG:=CONFIG_CRYPTO_DEV_MV_CESA
-  FILES:=$(LINUX_DIR)/drivers/crypto/mv_cesa.ko
-  AUTOLOAD:=$(call AutoLoad,09,mv_cesa)
+  FILES:=$(LINUX_DIR)/drivers/crypto/mv_cesa.ko \
+       $(LINUX_DIR)/drivers/crypto/mv_tdma.ko
+  AUTOLOAD:=$(call AutoLoad,09, \
+       mv_tdma \
+       mv_cesa)
   $(call AddDepends/crypto)
 endef
 

bye
  MM
--
A: Because it messes up the order in which people normally read text.
Q: Why is top-posting such a bad thing?
A: Top-posting.
Q: What is the most annoying thing in e-mail?
_______________________________________________
openwrt-devel mailing list
openwrt-devel@lists.openwrt.org
https://lists.openwrt.org/mailman/listinfo/openwrt-devel

Reply via email to