Re: [dpdk-dev] [PATCH] ipsec: include high order bytes of esn in pkt len
Hi Lukasz, Thanks for clarifications. Looks good in general. Few small comments below. Konstantin > When esn is used then high-order 32 bits are included in ICV > calculation however are not transmitted. Update packet length > to be consistent with auth data offset and length before crypto > operation. High-order 32 bits of esn will be removed from packet > length in crypto post processing. > > Change-Id: I5ba50e341059a8d6a5e4ce7c626dfe7b9173740b > Signed-off-by: Lukasz Bartosik > --- > lib/librte_ipsec/esp_inb.c | 56 > + > lib/librte_ipsec/esp_outb.c | 31 + > lib/librte_ipsec/sa.c | 4 ++-- > lib/librte_ipsec/sa.h | 8 +++ > 4 files changed, 87 insertions(+), 12 deletions(-) > > diff --git a/lib/librte_ipsec/esp_inb.c b/lib/librte_ipsec/esp_inb.c > index 4e0e12a..eb899e3 100644 > --- a/lib/librte_ipsec/esp_inb.c > +++ b/lib/librte_ipsec/esp_inb.c > @@ -16,7 +16,8 @@ > #include "pad.h" > > typedef uint16_t (*esp_inb_process_t)(const struct rte_ipsec_sa *sa, > - struct rte_mbuf *mb[], uint32_t sqn[], uint32_t dr[], uint16_t num); > + struct rte_mbuf *mb[], uint32_t sqn[], uint32_t dr[], uint16_t num, > + uint8_t is_inline); > > /* > * helper function to fill crypto_sym op for cipher+auth algorithms. > @@ -181,6 +182,15 @@ inb_pkt_prepare(const struct rte_ipsec_sa *sa, const > struct replay_sqn *rsn, > icv->va = rte_pktmbuf_mtod_offset(ml, void *, icv_ofs); > icv->pa = rte_pktmbuf_iova_offset(ml, icv_ofs); > > + /* > + * if esn is used then high-order 32 bits are also used in ICV > + * calculation but are not transmitted, update packet length > + * to be consistent with auth data length and offset, this will > + * be subtracted from packet length in post crypto processing Here and in several other comments below - you repeat basically the same thing. Seems a bit excessive. I suppose just to put in in one place, or probably even in patch description will be enough. > + */ > + mb->pkt_len += sa->sqh_len; > + ml->data_len += sa->sqh_len; > + > inb_pkt_xprepare(sa, sqn, icv); > return plen; > } > @@ -373,14 +383,20 @@ tun_process_step3(struct rte_mbuf *mb, uint64_t > txof_msk, uint64_t txof_val) > */ > static inline uint16_t > tun_process(const struct rte_ipsec_sa *sa, struct rte_mbuf *mb[], > - uint32_t sqn[], uint32_t dr[], uint16_t num) > + uint32_t sqn[], uint32_t dr[], uint16_t num, uint8_t is_inline) > { > uint32_t adj, i, k, tl; > uint32_t hl[num]; > struct esp_tail espt[num]; > struct rte_mbuf *ml[num]; > > - const uint32_t tlen = sa->icv_len + sizeof(espt[0]); > + /* > + * remove high-order 32 bits of esn from packet length > + * which was added before crypto processing, this doesn't > + * apply to inline case > + */ This comment seems a bit misleading, as we have remove not only sqh, but also icv, espt, padding. > + const uint32_t tlen = sa->icv_len + sizeof(espt[0]) + > + (is_inline ? 0 : sa->sqh_len); > const uint32_t cofs = sa->ctp.cipher.offset; > > /* > @@ -420,7 +436,7 @@ tun_process(const struct rte_ipsec_sa *sa, struct > rte_mbuf *mb[], > */ > static inline uint16_t > trs_process(const struct rte_ipsec_sa *sa, struct rte_mbuf *mb[], > - uint32_t sqn[], uint32_t dr[], uint16_t num) > + uint32_t sqn[], uint32_t dr[], uint16_t num, uint8_t is_inline) > { > char *np; > uint32_t i, k, l2, tl; > @@ -428,7 +444,13 @@ trs_process(const struct rte_ipsec_sa *sa, struct > rte_mbuf *mb[], > struct esp_tail espt[num]; > struct rte_mbuf *ml[num]; > > - const uint32_t tlen = sa->icv_len + sizeof(espt[0]); > + /* > + * remove high-order 32 bits of esn from packet length > + * which was added before crypto processing, this doesn't > + * apply to inline case > + */ > + const uint32_t tlen = sa->icv_len + sizeof(espt[0]) + > + (is_inline ? 0 : sa->sqh_len); > const uint32_t cofs = sa->ctp.cipher.offset; > > /* > @@ -496,8 +518,8 @@ esp_inb_rsn_update(struct rte_ipsec_sa *sa, const > uint32_t sqn[], > * process group of ESP inbound packets. > */ > static inline uint16_t > -esp_inb_pkt_process(const struct rte_ipsec_session *ss, > - struct rte_mbuf *mb[], uint16_t num, esp_inb_process_t process) > +esp_inb_pkt_process(const struct rte_ipsec_session *ss, struct rte_mbuf > *mb[], > + uint16_t num, uint8_t is_inline, esp_inb_process_t process) > { > uint32_t k, n; > struct rte_ipsec_sa *sa; > @@ -507,7 +529,7 @@ esp_inb_pkt_process(const struct rte_ipsec_session *ss, > sa = ss->sa; > > /* process packets, extract seq numbers */ > - k = process(sa, mb, sqn, dr, num); > + k = process(sa, mb, sqn, dr, num, is_inline); > > /* handle unprocessed mbufs */ > if
Re: [dpdk-dev] [PATCH v1] lib/ipsec: add support for header construction
Hi, > > Add support for RFC 4301(5.1.2) to update of > Type of service field and Traffic class field > bits inside ipv4/ipv6 packets for outbound cases > and inbound cases which deals with the update of > the DSCP/ENC bits inside each of the fields. > > Signed-off-by: Marko Kovacevic > --- > examples/ipsec-secgw/sa.c | 2 + > lib/librte_ipsec/esp_inb.c | 14 - > lib/librte_ipsec/esp_outb.c| 4 +- > lib/librte_ipsec/iph.h | 119 > +++-- > lib/librte_ipsec/rte_ipsec_sa.h| 25 > lib/librte_ipsec/sa.c | 17 ++ > lib/librte_ipsec/sa.h | 2 + > lib/librte_net/rte_ip.h| 8 +++ > lib/librte_security/rte_security.h | 9 +++ > 9 files changed, 191 insertions(+), 9 deletions(-) Looks good in general, some generic comments: - I think it is better to split the patch into few sub-pathces: One for rte_security, second for rte_net, third - rte_ipsec, forth - examples/ipsec-secgw - Would be good to add support for other options too (ttl, etc.) - Would be good to add new test-case for it into examples/ipsec-secgw/test/ Plus few nits in the code below. Konstantin > > diff --git a/examples/ipsec-secgw/sa.c b/examples/ipsec-secgw/sa.c > index b850e9839..4d85d09df 100644 > --- a/examples/ipsec-secgw/sa.c > +++ b/examples/ipsec-secgw/sa.c > @@ -991,6 +991,8 @@ fill_ipsec_sa_prm(struct rte_ipsec_sa_prm *prm, const > struct ipsec_sa *ss, > prm->ipsec_xform.mode = (ss->flags == TRANSPORT) ? > RTE_SECURITY_IPSEC_SA_MODE_TRANSPORT : > RTE_SECURITY_IPSEC_SA_MODE_TUNNEL; > + prm->ipsec_xform.options.ecn = 1; > + prm->ipsec_xform.options.copy_dscp = 1; > > if (ss->flags == IP4_TUNNEL) { > prm->ipsec_xform.tunnel.type = RTE_SECURITY_IPSEC_TUNNEL_IPV4; > diff --git a/lib/librte_ipsec/esp_inb.c b/lib/librte_ipsec/esp_inb.c > index 4e0e12a85..8a3cb8a15 100644 > --- a/lib/librte_ipsec/esp_inb.c > +++ b/lib/librte_ipsec/esp_inb.c > @@ -377,9 +377,10 @@ tun_process(const struct rte_ipsec_sa *sa, struct > rte_mbuf *mb[], > { > uint32_t adj, i, k, tl; > uint32_t hl[num]; > + void *inner_h; > + const void *outter_h; > struct esp_tail espt[num]; > struct rte_mbuf *ml[num]; > - > const uint32_t tlen = sa->icv_len + sizeof(espt[0]); > const uint32_t cofs = sa->ctp.cipher.offset; > > @@ -400,9 +401,16 @@ tun_process(const struct rte_ipsec_sa *sa, struct > rte_mbuf *mb[], > if (tun_process_check(mb[i], ml[i], espt[i], adj, tl, > sa->proto) == 0) { > > + outter_h = rte_pktmbuf_mtod_offset(mb[i], uint8_t *, > + mb[i]->l2_len); > + > /* modify packet's layout */ > - tun_process_step2(mb[i], ml[i], hl[i], adj, > - tl, sqn + k); > + inner_h = tun_process_step2(mb[i], ml[i], hl[i], adj, > + tl, sqn + k); > + > + if ((sa->type & INB_TUN_HDR_MSK) != 0) > + update_inb_tun_l3_hdr(sa, inner_h, outter_h); > + > /* update mbuf's metadata */ > tun_process_step3(mb[i], sa->tx_offload.msk, > sa->tx_offload.val); > diff --git a/lib/librte_ipsec/esp_outb.c b/lib/librte_ipsec/esp_outb.c > index c798bc4c4..a71164e0c 100644 > --- a/lib/librte_ipsec/esp_outb.c > +++ b/lib/librte_ipsec/esp_outb.c > @@ -152,8 +152,8 @@ outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t > sqc, > rte_memcpy(ph, sa->hdr, sa->hdr_len); > > /* update original and new ip header fields */ > - update_tun_l3hdr(sa, ph + sa->hdr_l3_off, mb->pkt_len, sa->hdr_l3_off, > - sqn_low16(sqc)); > + update_outb_tun_l3hdr(sa, ph + sa->hdr_l3_off, ph + hlen, mb->pkt_len, > + sa->hdr_l3_off, sqn_low16(sqc)); > > /* update spi, seqn and iv */ > esph = (struct esp_hdr *)(ph + sa->hdr_len); > diff --git a/lib/librte_ipsec/iph.h b/lib/librte_ipsec/iph.h > index 58930cf18..f45db5d4a 100644 > --- a/lib/librte_ipsec/iph.h > +++ b/lib/librte_ipsec/iph.h > @@ -11,6 +11,11 @@ > * used internally by ipsec library. > */ > > +#define IPV6_DSCP_MASK (DSCP_MASK << IPV6_HDR_TC_SHIFT) > +#define IPV6_ECN_MASK(ECN_MASK << IPV6_HDR_TC_SHIFT) > +#define IPV6_TOS_MASK(IPV6_ECN_MASK | IPV6_DSCP_MASK) > +#define IPV6_ECN_CE IPV6_ECN_MASK > + > /* > * Move preceding (L3) headers down to remove ESP header and IV. > */ > @@ -35,6 +40,26 @@ insert_esph(char *np, char *op, uint32_t hlen) > np[i] = op[i]; > } > > +static inline uint8_t > +get_ipv6_tos(rte_be32_t vtc_flow) > +{ > + uint32_t v; > + > + v = rte_be_to_cpu_32(vtc_flow); > + return v >> IPV6_HDR_TC_SHIFT; > +} > + > +static inline
Re: [dpdk-dev] [PATCH 1/6] eal: replace libc-based random number generation with LFSR
On Fri, May 17, 2019 at 11:10:25PM +0200, Mattias Rönnblom wrote: > On 2019-05-17 21:27, Neil Horman wrote: > > > > > > diff --git a/lib/librte_eal/common/rte_random.c > > > > > b/lib/librte_eal/common/rte_random.c > > > > > new file mode 100644 > > > > > index 0..4d3cf5226 > > > > > --- /dev/null > > > > > +++ b/lib/librte_eal/common/rte_random.c > > > > > @@ -0,0 +1,139 @@ > > > > > +/* SPDX-License-Identifier: BSD-3-Clause > > > > > + * Copyright(c) 2019 Ericsson AB > > > > > + */ > > > > > + > > > > > +#include > > > > > + > > > > > +#include > > > > > +#include > > > > > +#include > > > > > +#include > > > > > +#include > > > > > +#include > > > > > + > > > > > +struct rte_rand_state { > > > > > + uint64_t z1; > > > > > + uint64_t z2; > > > > > + uint64_t z3; > > > > > + uint64_t z4; > > > > > + uint64_t z5; > > > > > +} __rte_cache_aligned; > > > > > + > > > > > +static struct rte_rand_state rand_states[RTE_MAX_LCORE]; > > > > > + > > > > > > > > It just occured to me that this variable embodies all the state of the > > > > rng. > > > > Whats to stop an attacker from digging through ram to get this info and > > > > predicting what the output will be? I understand that this rng probably > > > > shouldn't be considered secure for cryptographic use, but it is used in > > > > the > > > > ipsec test example, so it could be mistaken for an rng that is. > > > > > > > > > > rte_rand() was never safe for use in cryptography, and now it's spelled > > > out > > > in the API documentation. > > > > > > If the IPsec example uses rte_rand() for anything security-related, it's a > > > bug or at least should be accompanied by a comment, in my opinion. > > > > > I would agree with that, but the fact remains that the examples use > > rte_rand in > > a context that is explicitly in violation of what you are documenting, which > > certainly seems confusing. > > > > > That said, if you have an attacker who's already broken into your DPDK > > > process' virtual memory, I'm not sure I understand why he would care much > > > about the state of the PRNG. Wouldn't he just read your private keys, your > > > messages in clear text or whatever other secrets you might have in memory? > > > > > Because the term "Breaking in" is a misnomer here. In unix system, IIRC, > > global > > variables are shared accross processes. > > Mutable data, like the BSS section where the PRNG state goes, is not shared. > Yes, you and bruce are right, It is shared, but marked copy on write, so its safe from the common case of multiple programs loading the object. Neil >
Re: [dpdk-dev] Instability of port ids
On Fri, May 17, 2019 at 1:46 PM Stephen Hemminger < step...@networkplumber.org> wrote: > Several customers have reported similar issues with how the owned/stack > device model > works in DPDK. With failsafe/tap and VF or netvsc and VF there are DPDK > ports which > are marked as owned and therefore not visible. > > The problem is the application has to guess and workaround these port > values in > the port mask that gets passed in on command line. This means a working > application > has to modify its startup script to run on Azure. Worse the actual port > values > change based on the number of NIC's configured. > > Overall this is a nuisance for users. The whole DPDK port index concept is > a bad > design. In Linux/BSD there is ifindex, but few applications care, they all > use names > which is better. Very very few application care that eth1 is ifindex 4. > > The whole assignment of ports is a mess as well since it is based on probe > order > and that is based on PCI order, and not anything dependable. It gets worse > with > command line arguments, vdev, owned devices etc. > > All I can think of is that: > * DPDK network devices need to have human readable names. current PCI is > not good. > * The names need to be repeatable/persistent. udev names are probably > better than anything so far. > Or bsd style names but they end up being device dependent. > * The API to get from name to port needs to easy to use and the > preferred method. > * All examples and documentation should avoid using port index directly. > You need port for fast rx/tx but setup should be by name. > +1 agree completely
Re: [dpdk-dev] [PATCH v4] vhost: support inflight share memory protocol feature
On Fri, May 17, 2019 at 05:47:07PM +0200, Maxime Coquelin wrote: > On 5/5/19 11:02 AM, Li Lin wrote: [...] > > /** > >* Device structure contains all configuration information relating > >* to the device. > > @@ -303,6 +314,7 @@ struct virtio_net { > > uint32_tnr_vring; > > int dequeue_zero_copy; > > struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; > > + struct inflight_mem_info inflight_info; > > #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) > > charifname[IF_NAME_SZ]; > > uint64_tlog_size; > > Do you have some code example using these new APIs? > It would help for reviewing the patch. +1
Re: [dpdk-dev] [PATCH v2 1/5] vhost: un-inline dirty pages logging functions
On Fri, May 17, 2019 at 05:06:09PM +0200, Maxime Coquelin wrote: [...] > +void > +__vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq, > + uint64_t addr, uint64_t len) > +{ > + uint64_t page; > + > + if (unlikely(!dev->log_base || !len)) > + return; > + > + if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8))) > + return; > + > + page = addr / VHOST_LOG_PAGE; > + while (page * VHOST_LOG_PAGE < addr + len) { > + vhost_log_cache_page(dev, vq, page); > + page += 1; > + } > +} > + > + Just need one empty line here. For the rest, Reviewed-by: Tiwei Bie > void > cleanup_vq(struct vhost_virtqueue *vq, int destroy) > {
Re: [dpdk-dev] [PATCH v2 2/5] vhost: do not inline packed and split functions
On Fri, May 17, 2019 at 05:06:10PM +0200, Maxime Coquelin wrote: > At runtime either packed Tx/Rx functions will always be called, > or split Tx/Rx functions will always be called. > > This patch removes the forced inlining in order to reduce > the I-cache pressure. > > Signed-off-by: Maxime Coquelin > --- > lib/librte_vhost/virtio_net.c | 8 > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c > index a6a33a1013..8aeb180016 100644 > --- a/lib/librte_vhost/virtio_net.c > +++ b/lib/librte_vhost/virtio_net.c > @@ -771,7 +771,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct > vhost_virtqueue *vq, > return error; > } > > -static __rte_always_inline uint32_t > +static __rte_noinline uint32_t > virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, > struct rte_mbuf **pkts, uint32_t count) > { > @@ -830,7 +830,7 @@ virtio_dev_rx_split(struct virtio_net *dev, struct > vhost_virtqueue *vq, > return pkt_idx; > } > > -static __rte_always_inline uint32_t > +static __rte_noinline uint32_t > virtio_dev_rx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, > struct rte_mbuf **pkts, uint32_t count) > { > @@ -1300,7 +1300,7 @@ get_zmbuf(struct vhost_virtqueue *vq) > return NULL; > } > > -static __rte_always_inline uint16_t > +static __rte_noinline uint16_t > virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, > struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) > { > @@ -1422,7 +1422,7 @@ virtio_dev_tx_split(struct virtio_net *dev, struct > vhost_virtqueue *vq, > return i; > } > > -static __rte_always_inline uint16_t > +static __rte_noinline uint16_t > virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, > struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) > { > -- > 2.21.0 Reviewed-by: Tiwei Bie
Re: [dpdk-dev] [PATCH v2 3/5] vhost: do not inline unlikely fragmented buffers code
On Fri, May 17, 2019 at 05:06:11PM +0200, Maxime Coquelin wrote: [...] > > +static void > +copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr, > + struct buf_vector *buf_vec) > +{ > + uint64_t len; > + uint64_t remain = sizeof(struct virtio_net_hdr); > + uint64_t src; > + uint64_t dst = (uint64_t)(uintptr_t)&hdr; typo: s/&hdr/hdr/ > + > + /* > + * No luck, the virtio-net header doesn't fit > + * in a contiguous virtual area. > + */ > + while (remain) { > + len = RTE_MIN(remain, buf_vec->buf_len); > + src = buf_vec->buf_addr; > + rte_memcpy((void *)(uintptr_t)dst, > + (void *)(uintptr_t)src, len); > + > + remain -= len; > + dst += len; > + buf_vec++; > + } > +} > + > static __rte_always_inline int > copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, > struct buf_vector *buf_vec, uint16_t nr_vec, > @@ -1094,28 +1126,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct > vhost_virtqueue *vq, > > if (virtio_net_with_host_offload(dev)) { > if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) { > - uint64_t len; > - uint64_t remain = sizeof(struct virtio_net_hdr); > - uint64_t src; > - uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr; > - uint16_t hdr_vec_idx = 0; > - > - /* > - * No luck, the virtio-net header doesn't fit > - * in a contiguous virtual area. > - */ It's better to not move above comments. For the rest, Reviewed-by: Tiwei Bie > - while (remain) { > - len = RTE_MIN(remain, > - buf_vec[hdr_vec_idx].buf_len); > - src = buf_vec[hdr_vec_idx].buf_addr; > - rte_memcpy((void *)(uintptr_t)dst, > -(void *)(uintptr_t)src, len); > - > - remain -= len; > - dst += len; > - hdr_vec_idx++; > - } > - > + copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec); > hdr = &tmp_hdr; > } else { > hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr); > -- > 2.21.0 >