Re: [dpdk-dev] [PATCH] ipsec: include high order bytes of esn in pkt len

2019-05-19 Thread Ananyev, Konstantin


Hi Lukasz,
Thanks for clarifications.
Looks good in general.
Few small comments below.
Konstantin
 
> When esn is used then high-order 32 bits are included in ICV
> calculation however are not transmitted. Update packet length
> to be consistent with auth data offset and length before crypto
> operation. High-order 32 bits of esn will be removed from packet
> length in crypto post processing.
> 
> Change-Id: I5ba50e341059a8d6a5e4ce7c626dfe7b9173740b
> Signed-off-by: Lukasz Bartosik 
> ---
>  lib/librte_ipsec/esp_inb.c  | 56 
> +
>  lib/librte_ipsec/esp_outb.c | 31 +
>  lib/librte_ipsec/sa.c   |  4 ++--
>  lib/librte_ipsec/sa.h   |  8 +++
>  4 files changed, 87 insertions(+), 12 deletions(-)
> 
> diff --git a/lib/librte_ipsec/esp_inb.c b/lib/librte_ipsec/esp_inb.c
> index 4e0e12a..eb899e3 100644
> --- a/lib/librte_ipsec/esp_inb.c
> +++ b/lib/librte_ipsec/esp_inb.c
> @@ -16,7 +16,8 @@
>  #include "pad.h"
> 
>  typedef uint16_t (*esp_inb_process_t)(const struct rte_ipsec_sa *sa,
> - struct rte_mbuf *mb[], uint32_t sqn[], uint32_t dr[], uint16_t num);
> + struct rte_mbuf *mb[], uint32_t sqn[], uint32_t dr[], uint16_t num,
> + uint8_t is_inline);
> 
>  /*
>   * helper function to fill crypto_sym op for cipher+auth algorithms.
> @@ -181,6 +182,15 @@ inb_pkt_prepare(const struct rte_ipsec_sa *sa, const 
> struct replay_sqn *rsn,
>   icv->va = rte_pktmbuf_mtod_offset(ml, void *, icv_ofs);
>   icv->pa = rte_pktmbuf_iova_offset(ml, icv_ofs);
> 
> + /*
> +  * if esn is used then high-order 32 bits are also used in ICV
> +  * calculation but are not transmitted, update packet length
> +  * to be consistent with auth data length and offset, this will
> +  * be subtracted from packet length in post crypto processing

Here and in several other comments below - you repeat basically the same thing.
Seems a bit excessive. I suppose just to put in in one place, or probably even
in patch description will be enough.

> +  */
> + mb->pkt_len += sa->sqh_len;
> + ml->data_len += sa->sqh_len;
> +
>   inb_pkt_xprepare(sa, sqn, icv);
>   return plen;
>  }
> @@ -373,14 +383,20 @@ tun_process_step3(struct rte_mbuf *mb, uint64_t 
> txof_msk, uint64_t txof_val)
>   */
>  static inline uint16_t
>  tun_process(const struct rte_ipsec_sa *sa, struct rte_mbuf *mb[],
> - uint32_t sqn[], uint32_t dr[], uint16_t num)
> + uint32_t sqn[], uint32_t dr[], uint16_t num, uint8_t is_inline)
>  {
>   uint32_t adj, i, k, tl;
>   uint32_t hl[num];
>   struct esp_tail espt[num];
>   struct rte_mbuf *ml[num];
> 
> - const uint32_t tlen = sa->icv_len + sizeof(espt[0]);
> + /*
> +  * remove high-order 32 bits of esn from packet length
> +  * which was added before crypto processing, this doesn't
> +  * apply to inline case
> +  */

This comment seems a bit misleading, as we have remove not only sqh,
but also icv, espt, padding.

> + const uint32_t tlen = sa->icv_len + sizeof(espt[0]) +
> + (is_inline ? 0 : sa->sqh_len);
>   const uint32_t cofs = sa->ctp.cipher.offset;
> 
>   /*
> @@ -420,7 +436,7 @@ tun_process(const struct rte_ipsec_sa *sa, struct 
> rte_mbuf *mb[],
>   */
>  static inline uint16_t
>  trs_process(const struct rte_ipsec_sa *sa, struct rte_mbuf *mb[],
> - uint32_t sqn[], uint32_t dr[], uint16_t num)
> + uint32_t sqn[], uint32_t dr[], uint16_t num, uint8_t is_inline)
>  {
>   char *np;
>   uint32_t i, k, l2, tl;
> @@ -428,7 +444,13 @@ trs_process(const struct rte_ipsec_sa *sa, struct 
> rte_mbuf *mb[],
>   struct esp_tail espt[num];
>   struct rte_mbuf *ml[num];
> 
> - const uint32_t tlen = sa->icv_len + sizeof(espt[0]);
> + /*
> +  * remove high-order 32 bits of esn from packet length
> +  * which was added before crypto processing, this doesn't
> +  * apply to inline case
> +  */
> + const uint32_t tlen = sa->icv_len + sizeof(espt[0]) +
> + (is_inline ? 0 : sa->sqh_len);
>   const uint32_t cofs = sa->ctp.cipher.offset;
> 
>   /*
> @@ -496,8 +518,8 @@ esp_inb_rsn_update(struct rte_ipsec_sa *sa, const 
> uint32_t sqn[],
>   * process group of ESP inbound packets.
>   */
>  static inline uint16_t
> -esp_inb_pkt_process(const struct rte_ipsec_session *ss,
> - struct rte_mbuf *mb[], uint16_t num, esp_inb_process_t process)
> +esp_inb_pkt_process(const struct rte_ipsec_session *ss, struct rte_mbuf 
> *mb[],
> + uint16_t num, uint8_t is_inline, esp_inb_process_t process)
>  {
>   uint32_t k, n;
>   struct rte_ipsec_sa *sa;
> @@ -507,7 +529,7 @@ esp_inb_pkt_process(const struct rte_ipsec_session *ss,
>   sa = ss->sa;
> 
>   /* process packets, extract seq numbers */
> - k = process(sa, mb, sqn, dr, num);
> + k = process(sa, mb, sqn, dr, num, is_inline);
> 
>   /* handle unprocessed mbufs */
>   if 

Re: [dpdk-dev] [PATCH v1] lib/ipsec: add support for header construction

2019-05-19 Thread Ananyev, Konstantin
Hi,

> 
> Add support for RFC 4301(5.1.2) to update of
> Type of service field and Traffic class field
> bits inside ipv4/ipv6 packets for outbound cases
> and inbound cases which deals with the update of
> the DSCP/ENC bits inside each of the fields.
> 
> Signed-off-by: Marko Kovacevic 
> ---
>  examples/ipsec-secgw/sa.c  |   2 +
>  lib/librte_ipsec/esp_inb.c |  14 -
>  lib/librte_ipsec/esp_outb.c|   4 +-
>  lib/librte_ipsec/iph.h | 119 
> +++--
>  lib/librte_ipsec/rte_ipsec_sa.h|  25 
>  lib/librte_ipsec/sa.c  |  17 ++
>  lib/librte_ipsec/sa.h  |   2 +
>  lib/librte_net/rte_ip.h|   8 +++
>  lib/librte_security/rte_security.h |   9 +++
>  9 files changed, 191 insertions(+), 9 deletions(-)

Looks good in general, some generic comments:
- I think it is better to split the patch into few sub-pathces:
  One for rte_security, second for rte_net, third - rte_ipsec, forth - 
examples/ipsec-secgw
- Would be good to add support for other options too (ttl, etc.)
- Would be good to add new test-case for it into examples/ipsec-secgw/test/

Plus few nits in the code below.
Konstantin

> 
> diff --git a/examples/ipsec-secgw/sa.c b/examples/ipsec-secgw/sa.c
> index b850e9839..4d85d09df 100644
> --- a/examples/ipsec-secgw/sa.c
> +++ b/examples/ipsec-secgw/sa.c
> @@ -991,6 +991,8 @@ fill_ipsec_sa_prm(struct rte_ipsec_sa_prm *prm, const 
> struct ipsec_sa *ss,
>   prm->ipsec_xform.mode = (ss->flags == TRANSPORT) ?
>   RTE_SECURITY_IPSEC_SA_MODE_TRANSPORT :
>   RTE_SECURITY_IPSEC_SA_MODE_TUNNEL;
> + prm->ipsec_xform.options.ecn = 1;
> + prm->ipsec_xform.options.copy_dscp = 1;
> 
>   if (ss->flags == IP4_TUNNEL) {
>   prm->ipsec_xform.tunnel.type = RTE_SECURITY_IPSEC_TUNNEL_IPV4;
> diff --git a/lib/librte_ipsec/esp_inb.c b/lib/librte_ipsec/esp_inb.c
> index 4e0e12a85..8a3cb8a15 100644
> --- a/lib/librte_ipsec/esp_inb.c
> +++ b/lib/librte_ipsec/esp_inb.c
> @@ -377,9 +377,10 @@ tun_process(const struct rte_ipsec_sa *sa, struct 
> rte_mbuf *mb[],
>  {
>   uint32_t adj, i, k, tl;
>   uint32_t hl[num];
> + void *inner_h;
> + const void *outter_h;
>   struct esp_tail espt[num];
>   struct rte_mbuf *ml[num];
> -
>   const uint32_t tlen = sa->icv_len + sizeof(espt[0]);
>   const uint32_t cofs = sa->ctp.cipher.offset;
> 
> @@ -400,9 +401,16 @@ tun_process(const struct rte_ipsec_sa *sa, struct 
> rte_mbuf *mb[],
>   if (tun_process_check(mb[i], ml[i], espt[i], adj, tl,
>   sa->proto) == 0) {
> 
> + outter_h = rte_pktmbuf_mtod_offset(mb[i], uint8_t *,
> + mb[i]->l2_len);
> +
>   /* modify packet's layout */
> - tun_process_step2(mb[i], ml[i], hl[i], adj,
> - tl, sqn + k);
> + inner_h = tun_process_step2(mb[i], ml[i], hl[i], adj,
> + tl, sqn + k);
> +
> + if ((sa->type & INB_TUN_HDR_MSK) != 0)
> + update_inb_tun_l3_hdr(sa, inner_h, outter_h);
> +
>   /* update mbuf's metadata */
>   tun_process_step3(mb[i], sa->tx_offload.msk,
>   sa->tx_offload.val);
> diff --git a/lib/librte_ipsec/esp_outb.c b/lib/librte_ipsec/esp_outb.c
> index c798bc4c4..a71164e0c 100644
> --- a/lib/librte_ipsec/esp_outb.c
> +++ b/lib/librte_ipsec/esp_outb.c
> @@ -152,8 +152,8 @@ outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t 
> sqc,
>   rte_memcpy(ph, sa->hdr, sa->hdr_len);
> 
>   /* update original and new ip header fields */
> - update_tun_l3hdr(sa, ph + sa->hdr_l3_off, mb->pkt_len, sa->hdr_l3_off,
> - sqn_low16(sqc));
> + update_outb_tun_l3hdr(sa, ph + sa->hdr_l3_off, ph + hlen, mb->pkt_len,
> + sa->hdr_l3_off, sqn_low16(sqc));
> 
>   /* update spi, seqn and iv */
>   esph = (struct esp_hdr *)(ph + sa->hdr_len);
> diff --git a/lib/librte_ipsec/iph.h b/lib/librte_ipsec/iph.h
> index 58930cf18..f45db5d4a 100644
> --- a/lib/librte_ipsec/iph.h
> +++ b/lib/librte_ipsec/iph.h
> @@ -11,6 +11,11 @@
>   * used internally by ipsec library.
>   */
> 
> +#define IPV6_DSCP_MASK   (DSCP_MASK << IPV6_HDR_TC_SHIFT)
> +#define IPV6_ECN_MASK(ECN_MASK << IPV6_HDR_TC_SHIFT)
> +#define IPV6_TOS_MASK(IPV6_ECN_MASK | IPV6_DSCP_MASK)
> +#define IPV6_ECN_CE  IPV6_ECN_MASK
> +
>  /*
>   * Move preceding (L3) headers down to remove ESP header and IV.
>   */
> @@ -35,6 +40,26 @@ insert_esph(char *np, char *op, uint32_t hlen)
>   np[i] = op[i];
>  }
> 
> +static inline uint8_t
> +get_ipv6_tos(rte_be32_t vtc_flow)
> +{
> + uint32_t v;
> +
> + v = rte_be_to_cpu_32(vtc_flow);
> + return v >> IPV6_HDR_TC_SHIFT;
> +}
> +
> +static inline 

Re: [dpdk-dev] [PATCH 1/6] eal: replace libc-based random number generation with LFSR

2019-05-19 Thread Neil Horman
On Fri, May 17, 2019 at 11:10:25PM +0200, Mattias Rönnblom wrote:
> On 2019-05-17 21:27, Neil Horman wrote:
> 
> > > > > diff --git a/lib/librte_eal/common/rte_random.c 
> > > > > b/lib/librte_eal/common/rte_random.c
> > > > > new file mode 100644
> > > > > index 0..4d3cf5226
> > > > > --- /dev/null
> > > > > +++ b/lib/librte_eal/common/rte_random.c
> > > > > @@ -0,0 +1,139 @@
> > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > + * Copyright(c) 2019 Ericsson AB
> > > > > + */
> > > > > +
> > > > > +#include 
> > > > > +
> > > > > +#include 
> > > > > +#include 
> > > > > +#include 
> > > > > +#include 
> > > > > +#include 
> > > > > +#include 
> > > > > +
> > > > > +struct rte_rand_state {
> > > > > + uint64_t z1;
> > > > > + uint64_t z2;
> > > > > + uint64_t z3;
> > > > > + uint64_t z4;
> > > > > + uint64_t z5;
> > > > > +} __rte_cache_aligned;
> > > > > +
> > > > > +static struct rte_rand_state rand_states[RTE_MAX_LCORE];
> > > > > +
> > > > 
> > > > It just occured to me that this variable embodies all the state of the 
> > > > rng.
> > > > Whats to stop an attacker from digging through ram to get this info and
> > > > predicting what the output will be?  I understand that this rng probably
> > > > shouldn't be considered secure for cryptographic use, but it is used in 
> > > > the
> > > > ipsec test example, so it could be mistaken for an rng that is.
> > > > 
> > > 
> > > rte_rand() was never safe for use in cryptography, and now it's spelled 
> > > out
> > > in the API documentation.
> > > 
> > > If the IPsec example uses rte_rand() for anything security-related, it's a
> > > bug or at least should be accompanied by a comment, in my opinion.
> > > 
> > I would agree with that, but the fact remains that the examples use 
> > rte_rand in
> > a context that is explicitly in violation of what you are documenting, which
> > certainly seems confusing.
> > 
> > > That said, if you have an attacker who's already broken into your DPDK
> > > process' virtual memory, I'm not sure I understand why he would care much
> > > about the state of the PRNG. Wouldn't he just read your private keys, your
> > > messages in clear text or whatever other secrets you might have in memory?
> > > 
> > Because the term "Breaking in" is a misnomer here.  In unix system, IIRC, 
> > global
> > variables are shared accross processes.
> 
> Mutable data, like the BSS section where the PRNG state goes, is not shared.
> 
Yes, you and bruce are right, It is shared, but marked copy on write, so
its safe from the common case of multiple programs loading the object.

Neil

> 


Re: [dpdk-dev] Instability of port ids

2019-05-19 Thread Jay Rolette
On Fri, May 17, 2019 at 1:46 PM Stephen Hemminger <
step...@networkplumber.org> wrote:

> Several customers have reported similar issues with how the owned/stack
> device model
> works in DPDK. With failsafe/tap and VF or netvsc and VF there are DPDK
> ports which
> are marked as owned and therefore not visible.
>
> The problem is the application has to guess and workaround these port
> values in
> the port mask that gets passed in on command line. This means a working
> application
> has to modify its startup script to run on Azure. Worse the actual port
> values
> change based on the number of NIC's configured.
>
> Overall this is a nuisance for users. The whole DPDK port index concept is
> a bad
> design. In Linux/BSD there is ifindex, but few applications care, they all
> use names
> which is better. Very very few application care that eth1 is ifindex 4.
>
> The whole assignment of ports is a mess as well since it is based on probe
> order
> and that is based on PCI order, and not anything dependable. It gets worse
> with
> command line arguments, vdev, owned devices etc.
>
> All I can think of is that:
>   * DPDK network devices need to have human readable names. current PCI is
> not good.
>   * The names need to be repeatable/persistent. udev names are probably
> better than anything so far.
> Or bsd style names but they end up being device dependent.
>   * The API to get from name to port needs to easy to use and the
> preferred method.
>   * All examples and documentation should avoid using port index directly.
> You need port for fast rx/tx but setup should be by name.
>

+1 agree completely


Re: [dpdk-dev] [PATCH v4] vhost: support inflight share memory protocol feature

2019-05-19 Thread Tiwei Bie
On Fri, May 17, 2019 at 05:47:07PM +0200, Maxime Coquelin wrote:
> On 5/5/19 11:02 AM, Li Lin wrote:
[...]
> >   /**
> >* Device structure contains all configuration information relating
> >* to the device.
> > @@ -303,6 +314,7 @@ struct virtio_net {
> > uint32_tnr_vring;
> > int dequeue_zero_copy;
> > struct vhost_virtqueue  *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
> > +   struct inflight_mem_info inflight_info;
> >   #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
> > charifname[IF_NAME_SZ];
> > uint64_tlog_size;
> 
> Do you have some code example using these new APIs?
> It would help for reviewing the patch.

+1


Re: [dpdk-dev] [PATCH v2 1/5] vhost: un-inline dirty pages logging functions

2019-05-19 Thread Tiwei Bie
On Fri, May 17, 2019 at 05:06:09PM +0200, Maxime Coquelin wrote:
[...]
> +void
> +__vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
> + uint64_t addr, uint64_t len)
> +{
> + uint64_t page;
> +
> + if (unlikely(!dev->log_base || !len))
> + return;
> +
> + if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
> + return;
> +
> + page = addr / VHOST_LOG_PAGE;
> + while (page * VHOST_LOG_PAGE < addr + len) {
> + vhost_log_cache_page(dev, vq, page);
> + page += 1;
> + }
> +}
> +
> +

Just need one empty line here.

For the rest,
Reviewed-by: Tiwei Bie 

>  void
>  cleanup_vq(struct vhost_virtqueue *vq, int destroy)
>  {


Re: [dpdk-dev] [PATCH v2 2/5] vhost: do not inline packed and split functions

2019-05-19 Thread Tiwei Bie
On Fri, May 17, 2019 at 05:06:10PM +0200, Maxime Coquelin wrote:
> At runtime either packed Tx/Rx functions will always be called,
> or split Tx/Rx functions will always be called.
> 
> This patch removes the forced inlining in order to reduce
> the I-cache pressure.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  lib/librte_vhost/virtio_net.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index a6a33a1013..8aeb180016 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -771,7 +771,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct 
> vhost_virtqueue *vq,
>   return error;
>  }
>  
> -static __rte_always_inline uint32_t
> +static __rte_noinline uint32_t
>  virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   struct rte_mbuf **pkts, uint32_t count)
>  {
> @@ -830,7 +830,7 @@ virtio_dev_rx_split(struct virtio_net *dev, struct 
> vhost_virtqueue *vq,
>   return pkt_idx;
>  }
>  
> -static __rte_always_inline uint32_t
> +static __rte_noinline uint32_t
>  virtio_dev_rx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   struct rte_mbuf **pkts, uint32_t count)
>  {
> @@ -1300,7 +1300,7 @@ get_zmbuf(struct vhost_virtqueue *vq)
>   return NULL;
>  }
>  
> -static __rte_always_inline uint16_t
> +static __rte_noinline uint16_t
>  virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
>  {
> @@ -1422,7 +1422,7 @@ virtio_dev_tx_split(struct virtio_net *dev, struct 
> vhost_virtqueue *vq,
>   return i;
>  }
>  
> -static __rte_always_inline uint16_t
> +static __rte_noinline uint16_t
>  virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
>  {
> -- 
> 2.21.0

Reviewed-by: Tiwei Bie 


Re: [dpdk-dev] [PATCH v2 3/5] vhost: do not inline unlikely fragmented buffers code

2019-05-19 Thread Tiwei Bie
On Fri, May 17, 2019 at 05:06:11PM +0200, Maxime Coquelin wrote:
[...]
>  
> +static void
> +copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
> + struct buf_vector *buf_vec)
> +{
> + uint64_t len;
> + uint64_t remain = sizeof(struct virtio_net_hdr);
> + uint64_t src;
> + uint64_t dst = (uint64_t)(uintptr_t)&hdr;

typo: s/&hdr/hdr/

> +
> + /*
> +  * No luck, the virtio-net header doesn't fit
> +  * in a contiguous virtual area.
> +  */
> + while (remain) {
> + len = RTE_MIN(remain, buf_vec->buf_len);
> + src = buf_vec->buf_addr;
> + rte_memcpy((void *)(uintptr_t)dst,
> + (void *)(uintptr_t)src, len);
> +
> + remain -= len;
> + dst += len;
> + buf_vec++;
> + }
> +}
> +
>  static __rte_always_inline int
>  copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
> struct buf_vector *buf_vec, uint16_t nr_vec,
> @@ -1094,28 +1126,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
> vhost_virtqueue *vq,
>  
>   if (virtio_net_with_host_offload(dev)) {
>   if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
> - uint64_t len;
> - uint64_t remain = sizeof(struct virtio_net_hdr);
> - uint64_t src;
> - uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr;
> - uint16_t hdr_vec_idx = 0;
> -
> - /*
> -  * No luck, the virtio-net header doesn't fit
> -  * in a contiguous virtual area.
> -  */

It's better to not move above comments.

For the rest,
Reviewed-by: Tiwei Bie 

> - while (remain) {
> - len = RTE_MIN(remain,
> - buf_vec[hdr_vec_idx].buf_len);
> - src = buf_vec[hdr_vec_idx].buf_addr;
> - rte_memcpy((void *)(uintptr_t)dst,
> -(void *)(uintptr_t)src, len);
> -
> - remain -= len;
> - dst += len;
> - hdr_vec_idx++;
> - }
> -
> + copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
>   hdr = &tmp_hdr;
>   } else {
>   hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);
> -- 
> 2.21.0
>