Ho Huichao,

> According to RFC791,the options may appear or not in datagrams.
> They must be implemented by all IP modules (host and gateways).
> What is optional is their transmission in any particular datagram,
> not their implementation.So we have to deal with it during the
> fragmenting process.Add some test data for the IPv4 header optional
> field fragmenting.

Apologies for delay in getting back to you.
LGTM in general, just few extra questions/nits/suggestions below.

> 
> Signed-off-by: Huichao Cai <chcch...@163.com>
> ---
>  app/test/test_ipfrag.c               | 263 
> ++++++++++++++++++++++++++++++++---
>  lib/ip_frag/rte_ipv4_fragmentation.c |  77 +++++++++-
>  2 files changed, 318 insertions(+), 22 deletions(-)
> 
> diff --git a/app/test/test_ipfrag.c b/app/test/test_ipfrag.c
> index 1ced25a..996130d 100644
> --- a/app/test/test_ipfrag.c
> +++ b/app/test/test_ipfrag.c
> @@ -18,10 +18,96 @@
>  #define NUM_MBUFS 128
>  #define BURST 32
> 
> +/* IP options */
> +#define RTE_IPOPT_EOL                                0
> +#define RTE_IPOPT_NOP                                1
> +#define RTE_IPOPT_COPIED(v)                  ((v) & 0x80)
> +#define RTE_IPOPT_MAX_LEN                    40

These macros are dups for what we have in rte_ipv4_fragmentation.c
Would probably make sense to name them RTE_IPV4_IPOPT_... and put them 
in some public .h to avoid duplication.

> +#define RTE_IPOPT_MANUAL

Could you clarify what this macro does?
BTW, I assume it is a local one?
If so, no need for RTE_ prefix.

> +#ifdef RTE_IPOPT_MANUAL
> +uint8_t expected_first_frag_ipv4_opts[] = {
> +     0x07, 0x0b, 0x04, 0x00,
> +     0x00, 0x00, 0x00, 0x00,
> +     0x00, 0x00, 0x00, 0x83,
> +     0x07, 0x04, 0xc0, 0xa8,
> +     0xe3, 0x96, 0x00, 0x00,
> +};
> +
> +uint8_t expected_sub_frag_ipv4_opts[] = {
> +     0x83, 0x07, 0x04, 0xc0,
> +     0xa8, 0xe3, 0x96, 0x00,
> +};
> +#else
> +/**
> + * IPv4 Options
> + */
> +struct test_ipv4_opt {
> +     __extension__
> +     union {
> +             uint8_t type;               /**< option type */
> +             struct {
> +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
> +                     uint8_t number:5;   /**< option number */
> +                     uint8_t category:2; /**< option class */
> +                     uint8_t copied:1;   /**< option copy flag */
> +#elif RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> +                     uint8_t copied:1;   /**< option copy flag */
> +                     uint8_t category:2; /**< option class */
> +                     uint8_t number:5;   /**< option number */
> +#endif
> +             } s_type;
> +     };
> +     uint8_t length;                     /**< option length */
> +     uint8_t pointer;                    /**< option pointer */
> +     uint8_t data[37];                   /**< option data */
> +} __rte_packed;
> +
> +struct test_ipv4_opt test_ipv4_opts[] = {
> +     {
> +             .s_type.copied = 0,
> +             .s_type.category = 0,
> +             .s_type.number = 7,
> +             .length = 11,
> +             .pointer = 4,
> +     },
> +     {
> +             .s_type.copied = 1,
> +             .s_type.category = 0,
> +             .s_type.number = 3,
> +             .length = 7,
> +             .pointer = 4,
> +             .data[0] = 0xc0,
> +             .data[1] = 0xa8,
> +             .data[2] = 0xe3,
> +             .data[3] = 0x96,
> +     },
> +};
> +#endif
> +
> +struct test_opt_data {
> +     bool is_first_frag;              /**< offset is 0 */
> +     uint16_t len;                    /**< option data len */
> +     uint8_t data[RTE_IPOPT_MAX_LEN]; /**< option data */
> +};
> +
>  static struct rte_mempool *pkt_pool,
>                         *direct_pool,
>                         *indirect_pool;
> 
> +static inline void
> +hex_to_str(uint8_t *hex, uint16_t len, char *str)
> +{
> +     int i;
> +
> +     for (i = 0; i < len; i++) {
> +             sprintf(str, "%02x", hex[i]);
> +             str += 2;
> +     }
> +     *str = 0;
> +}
> +
>  static int
>  setup_buf_pool(void)
>  {
> @@ -88,23 +174,78 @@ static void ut_teardown(void)
>  {
>  }
> 
> +static inline void
> +test_get_ipv4_opt(bool is_first_frag,
> +     struct test_opt_data *expected_opt)
> +{
> +#ifdef RTE_IPOPT_MANUAL
> +     if (is_first_frag) {
> +             expected_opt->len = sizeof(expected_first_frag_ipv4_opts);
> +             rte_memcpy(expected_opt->data, expected_first_frag_ipv4_opts,
> +                     sizeof(expected_first_frag_ipv4_opts));
> +     } else {
> +             expected_opt->len = sizeof(expected_sub_frag_ipv4_opts);
> +             rte_memcpy(expected_opt->data, expected_sub_frag_ipv4_opts,
> +                     sizeof(expected_sub_frag_ipv4_opts));
> +     }
> +#else
> +     uint16_t i;
> +     uint16_t pos = 0;
> +     expected_opt->len = 0;
> +
> +     for (i = 0; i < RTE_DIM(test_ipv4_opts); i++) {
> +             if (unlikely(pos + test_ipv4_opts[i].length >
> +                             RTE_IPOPT_MAX_LEN))
> +                     return;
> +
> +             if (is_first_frag) {
> +                     rte_memcpy(expected_opt->data + pos, &test_ipv4_opts[i],
> +                             test_ipv4_opts[i].length);
> +                     expected_opt->len += test_ipv4_opts[i].length;
> +                     pos += test_ipv4_opts[i].length;
> +             } else {
> +                     if (test_ipv4_opts[i].s_type.copied) {
> +                             rte_memcpy(expected_opt->data + pos,
> +                                     &test_ipv4_opts[i],
> +                                     test_ipv4_opts[i].length);
> +                             expected_opt->len += test_ipv4_opts[i].length;
> +                             pos += test_ipv4_opts[i].length;
> +                     }
> +             }
> +     }
> +
> +     expected_opt->len = RTE_ALIGN_CEIL(expected_opt->len, 4);
> +     memset(expected_opt->data + pos, RTE_IPOPT_EOL,
> +             expected_opt->len - pos);
> +#endif
> +}
> +
>  static void
> -v4_allocate_packet_of(struct rte_mbuf *b, int fill,
> -                   size_t s, int df, uint8_t mf, uint16_t off,
> -                   uint8_t ttl, uint8_t proto, uint16_t pktid)
> +v4_allocate_packet_of(struct rte_mbuf *b, int fill, size_t s,
> +     int df, uint8_t mf, uint16_t off, uint8_t ttl, uint8_t proto,
> +     uint16_t pktid, bool have_opt, bool is_first_frag)
>  {
>       /* Create a packet, 2k bytes long */
>       b->data_off = 0;
>       char *data = rte_pktmbuf_mtod(b, char *);
> -     rte_be16_t fragment_offset = 0; /**< fragmentation offset */
> +     rte_be16_t fragment_offset = 0; /* fragmentation offset */
> +     uint16_t iph_len;
> +     struct test_opt_data opt;
> 
> -     memset(data, fill, sizeof(struct rte_ipv4_hdr) + s);
> +     opt.len = 0;
> +
> +     if (have_opt)
> +             test_get_ipv4_opt(is_first_frag, &opt);
> +
> +     iph_len = sizeof(struct rte_ipv4_hdr) + opt.len;
> +     memset(data, fill, iph_len + s);
> 
>       struct rte_ipv4_hdr *hdr = (struct rte_ipv4_hdr *)data;
> 
> -     hdr->version_ihl = 0x45; /* standard IP header... */
> +     hdr->version_ihl = 0x40; /* ipv4 */
> +     hdr->version_ihl += (iph_len / 4);
>       hdr->type_of_service = 0;
> -     b->pkt_len = s + sizeof(struct rte_ipv4_hdr);
> +     b->pkt_len = s + iph_len;
>       b->data_len = b->pkt_len;
>       hdr->total_length = rte_cpu_to_be_16(b->pkt_len);
>       hdr->packet_id = rte_cpu_to_be_16(pktid);
> @@ -131,6 +272,8 @@ static void ut_teardown(void)
>       hdr->hdr_checksum = 0;
>       hdr->src_addr = rte_cpu_to_be_32(0x8080808);
>       hdr->dst_addr = rte_cpu_to_be_32(0x8080404);
> +
> +     rte_memcpy(hdr + 1, opt.data, opt.len);
>  }
> 
>  static void
> @@ -187,6 +330,43 @@ static void ut_teardown(void)
>       }
>  }
> 
> +static inline void
> +test_get_frag_opt(struct rte_mbuf **mb, int32_t num,
> +     struct test_opt_data *opt, int ipv)
> +{
> +     int32_t i;
> +
> +     for (i = 0; i < num; i++) {
> +             if (ipv == 4) {
> +                     struct rte_ipv4_hdr *iph =
> +                         rte_pktmbuf_mtod(mb[i], struct rte_ipv4_hdr *);
> +                     uint16_t header_len = (iph->version_ihl &
> +                             RTE_IPV4_HDR_IHL_MASK) *
> +                             RTE_IPV4_IHL_MULTIPLIER;
> +                     uint16_t opt_len = header_len -
> +                             sizeof(struct rte_ipv4_hdr);
> +
> +                     if ((rte_be_to_cpu_16(iph->fragment_offset) &
> +                                 RTE_IPV4_HDR_OFFSET_MASK) == 0)
> +                             opt->is_first_frag = true;
> +                     else
> +                             opt->is_first_frag = false;
> +
> +                     if (opt_len && (opt_len <= RTE_IPOPT_MAX_LEN)) {
> +                             char *iph_opt = rte_pktmbuf_mtod_offset(mb[i],
> +                                 char *, sizeof(struct rte_ipv4_hdr));
> +                             opt->len = opt_len;
> +                             rte_memcpy(opt->data, iph_opt, opt_len);
> +                     } else {
> +                             opt->len = RTE_IPOPT_MAX_LEN;
> +                             memset(opt->data, RTE_IPOPT_EOL,
> +                                 sizeof(opt->data));
> +                     }
> +                     opt++;
> +             }
> +     }
> +}
> +
>  static int
>  test_ip_frag(void)
>  {
> @@ -206,32 +386,42 @@ static void ut_teardown(void)
>               uint16_t pkt_id;
>               int      expected_frags;
>               uint16_t expected_fragment_offset[BURST];
> +             bool have_opt;
> +             bool is_first_frag;
>       } tests[] = {
>                {4, 1280, 1400, 0, 0, 0, 64, IPPROTO_ICMP, RND_ID,       2,
> -               {0x2000, 0x009D}},
> +               {0x2000, 0x009D}, 0},
>                {4, 1280, 1400, 0, 0, 0, 64, IPPROTO_ICMP, 0,            2,
> -               {0x2000, 0x009D}},
> +               {0x2000, 0x009D}, 0},
>                {4,  600, 1400, 0, 0, 0, 64, IPPROTO_ICMP, RND_ID,       3,
> -               {0x2000, 0x2048, 0x0090}},
> +               {0x2000, 0x2048, 0x0090}, 0},
>                {4, 4, 1400, 0, 0, 0, 64, IPPROTO_ICMP, RND_ID,    -EINVAL},
>                {4, 600, 1400, 1, 0, 0, 64, IPPROTO_ICMP, RND_ID, -ENOTSUP},
>                {4, 600, 1400, 0, 0, 0, 0, IPPROTO_ICMP, RND_ID,         3,
> -               {0x2000, 0x2048, 0x0090}},
> +               {0x2000, 0x2046, 0x008C}, 1, 1},
> +              /* The first fragment */
> +              {4, 68, 104, 0, 1, 0, 0, IPPROTO_ICMP, RND_ID,           5,
> +               {0x2000, 0x2003, 0x2006, 0x2009, 0x200C}, 1, 1},
> +              /* The middle fragment */
>                {4, 68, 104, 0, 1, 13, 0, IPPROTO_ICMP, RND_ID,          3,
> -               {0x200D, 0x2013, 0x2019}},
> -
> +               {0x200D, 0x2012, 0x2017}, 1, 0},
> +              /* The last fragment */
> +              {4, 68, 104, 0, 0, 26, 0, IPPROTO_ICMP, RND_ID,          3,
> +               {0x201A, 0x201F, 0x0024}, 1, 0},
>                {6, 1280, 1400, 0, 0, 0, 64, IPPROTO_ICMP, RND_ID,       2,
> -               {0x0001, 0x04D0}},
> +               {0x0001, 0x04D0}, 0},
>                {6, 1300, 1400, 0, 0, 0, 64, IPPROTO_ICMP, RND_ID,       2,
> -               {0x0001, 0x04E0}},
> +               {0x0001, 0x04E0}, 0},
>                {6, 4, 1400, 0, 0, 0, 64, IPPROTO_ICMP, RND_ID,    -EINVAL},
>                {6, 1300, 1400, 0, 0, 0, 0, IPPROTO_ICMP, RND_ID,        2,
> -               {0x0001, 0x04E0}},
> +               {0x0001, 0x04E0}, 0},
>       };
> 
>       for (i = 0; i < RTE_DIM(tests); i++) {
>               int32_t len = 0;
>               uint16_t fragment_offset[BURST];
> +             struct test_opt_data opt_res[BURST];
> +             struct test_opt_data opt_exp;
>               uint16_t pktid = tests[i].pkt_id;
>               struct rte_mbuf *pkts_out[BURST];
>               struct rte_mbuf *b = rte_pktmbuf_alloc(pkt_pool);
> @@ -250,7 +440,9 @@ static void ut_teardown(void)
>                                             tests[i].set_of,
>                                             tests[i].ttl,
>                                             tests[i].proto,
> -                                           pktid);
> +                                           pktid,
> +                                           tests[i].have_opt,
> +                                           tests[i].is_first_frag);
>               } else if (tests[i].ipv == 6) {
>                       v6_allocate_packet_of(b, 0x41414141,
>                                             tests[i].pkt_size,
> @@ -275,17 +467,21 @@ static void ut_teardown(void)
>               if (len > 0) {
>                       test_get_offset(pkts_out, len,
>                           fragment_offset, tests[i].ipv);
> +                     if (tests[i].have_opt)
> +                             test_get_frag_opt(pkts_out, len,
> +                                     opt_res,
> +                                     tests[i].ipv);
>                       test_free_fragments(pkts_out, len);
>               }
> 
> -             printf("%zd: checking %d with %d\n", i, len,
> +             printf("[check frag number]%zd: checking %d with %d\n", i, len,
>                      tests[i].expected_frags);
>               RTE_TEST_ASSERT_EQUAL(len, tests[i].expected_frags,
>                                     "Failed case %zd.\n", i);
> 
>               if (len > 0) {
>                       for (j = 0; j < (size_t)len; j++) {
> -                             printf("%zd-%zd: checking %d with %d\n",
> +                             printf("[check offset]%zd-%zd: checking %d with 
> %d\n",
>                                   i, j, fragment_offset[j],
>                                   rte_cpu_to_be_16(
>                                       tests[i].expected_fragment_offset[j]));
> @@ -294,6 +490,35 @@ static void ut_teardown(void)
>                                       tests[i].expected_fragment_offset[j]),
>                                   "Failed case %zd.\n", i);
>                       }
> +
> +                     if (tests[i].have_opt && (tests[i].ipv == 4)) {
> +                             for (j = 0; j < (size_t)len; j++) {
> +                                     char opt_res_str[2 *
> +                                             RTE_IPOPT_MAX_LEN + 1];
> +                                     char opt_exp_str[2 *
> +                                             RTE_IPOPT_MAX_LEN + 1];
> +
> +                                     test_get_ipv4_opt(
> +                                             opt_res[j].is_first_frag,
> +                                             &opt_exp);
> +                                     hex_to_str(opt_res[j].data,
> +                                             opt_res[j].len,
> +                                             opt_res_str);
> +                                     hex_to_str(opt_exp.data,
> +                                             opt_exp.len,
> +                                             opt_exp_str);
> +
> +                                     printf(
> +                                             "[check ipv4 option]%zd-%zd: 
> checking (%u)%s with (%u)%s\n",
> +                                             i, j,
> +                                             opt_res[j].len, opt_res_str,
> +                                             opt_exp.len, opt_exp_str);
> +                                             RTE_TEST_ASSERT_SUCCESS(
> +                                                     strcmp(opt_res_str,
> +                                                             opt_exp_str),
> +                                             "Failed case %zd.\n", i);
> +                             }
> +                     }
>               }
> 
>       }
> diff --git a/lib/ip_frag/rte_ipv4_fragmentation.c 
> b/lib/ip_frag/rte_ipv4_fragmentation.c
> index 2e7739d..57b8bc1 100644
> --- a/lib/ip_frag/rte_ipv4_fragmentation.c
> +++ b/lib/ip_frag/rte_ipv4_fragmentation.c
> @@ -12,6 +12,12 @@
> 
>  #include "ip_frag_common.h"
> 
> +/* IP options */
> +#define RTE_IPOPT_EOL                                0
> +#define RTE_IPOPT_NOP                                1
> +#define RTE_IPOPT_COPIED(v)                  ((v) & 0x80)
> +#define RTE_IPOPT_MAX_LEN                    40
> +
>  /* Fragment Offset */
>  #define      RTE_IPV4_HDR_DF_SHIFT                   14
>  #define      RTE_IPV4_HDR_MF_SHIFT                   13
> @@ -22,6 +28,8 @@
> 
>  #define      IPV4_HDR_FO_ALIGN                       (1 << 
> RTE_IPV4_HDR_FO_SHIFT)
> 
> +#define      RTE_IPV4_HDR_MAX_LEN            60
> +
>  static inline void __fill_ipv4hdr_frag(struct rte_ipv4_hdr *dst,
>               const struct rte_ipv4_hdr *src, uint16_t header_len,
>               uint16_t len, uint16_t fofs, uint16_t dofs, uint32_t mf)
> @@ -41,6 +49,50 @@ static inline void __free_fragments(struct rte_mbuf *mb[], 
> uint32_t num)
>               rte_pktmbuf_free(mb[i]);
>  }
> 
> +static inline void __create_ipopt_frag_hdr(uint8_t *iph,
> +     uint16_t *ipopt_len, uint8_t *ipopt_frag_hdr)
> +{


Instead of returning void and having out parameter (ipopt_len),
why just not make it a return value?
static inline uint16_t
__create_ipopt_frag_hdr(const uint8_t *iph, uint8_t * ipopt_frag_hdr, uint16_t 
len)
{
        ....
        return ipopt_len;
}

> +     uint16_t len = *ipopt_len;
> +     struct rte_ipv4_hdr *iph_opt = (struct rte_ipv4_hdr *)ipopt_frag_hdr;
> +
> +     *ipopt_len = 0;
> +     rte_memcpy(ipopt_frag_hdr, iph, sizeof(struct rte_ipv4_hdr));
> +     iph_opt->ihl = sizeof(struct rte_ipv4_hdr) / RTE_IPV4_IHL_MULTIPLIER;

We probably can update ihl once at the very end of this function.

> +     ipopt_frag_hdr += sizeof(struct rte_ipv4_hdr);
> +
> +     if (unlikely(len > RTE_IPOPT_MAX_LEN))
> +             return;
> +
> +     uint8_t *p_opt = iph + sizeof(struct rte_ipv4_hdr);
> +
> +     while (len > 0) {
> +             if (unlikely(*p_opt == RTE_IPOPT_NOP)) {
> +                     len--;
> +                     p_opt++;
> +                     continue;
> +             } else if (unlikely(*p_opt == RTE_IPOPT_EOL))
> +                     break;
> +
> +             if (unlikely(p_opt[1] < 2 || p_opt[1] > len))
> +                     break;
> +
> +             if (RTE_IPOPT_COPIED(*p_opt)) {
> +                     rte_memcpy(ipopt_frag_hdr + *ipopt_len,
> +                             p_opt, p_opt[1]);
> +                     *ipopt_len += p_opt[1];
> +             }
> +
> +             len -= p_opt[1];
> +             p_opt += p_opt[1];
> +     }
> +
> +     len = RTE_ALIGN_CEIL(*ipopt_len, RTE_IPV4_IHL_MULTIPLIER);
> +     memset(ipopt_frag_hdr + *ipopt_len,
> +             RTE_IPOPT_EOL, len - *ipopt_len);
> +     *ipopt_len = len;
> +     iph_opt->ihl += len / RTE_IPV4_IHL_MULTIPLIER;
> +}
> +
>  /**
>   * IPv4 fragmentation.
>   *
> @@ -76,6 +128,8 @@ static inline void __free_fragments(struct rte_mbuf *mb[], 
> uint32_t num)
>       uint32_t more_in_segs;
>       uint16_t fragment_offset, flag_offset, frag_size, header_len;
>       uint16_t frag_bytes_remaining;
> +     uint8_t ipopt_frag_hdr[RTE_IPV4_HDR_MAX_LEN];
> +     uint16_t ipopt_len;
> 
>       /*
>        * Formal parameter checking.
> @@ -117,6 +171,7 @@ static inline void __free_fragments(struct rte_mbuf 
> *mb[], uint32_t num)
>       in_seg_data_pos = header_len;
>       out_pkt_pos = 0;
>       fragment_offset = 0;
> +     ipopt_len = header_len - sizeof(struct rte_ipv4_hdr);
> 
>       more_in_segs = 1;
>       while (likely(more_in_segs)) {
> @@ -188,10 +243,26 @@ static inline void __free_fragments(struct rte_mbuf 
> *mb[], uint32_t num)
>                   (uint16_t)out_pkt->pkt_len,
>                   flag_offset, fragment_offset, more_in_segs);
> 
> -             fragment_offset = (uint16_t)(fragment_offset +
> -                 out_pkt->pkt_len - header_len);
> +             /* Create a separate IP header to handle frag options. */
> +             if (unlikely((fragment_offset == 0) &&
> +                         ((flag_offset & RTE_IPV4_HDR_OFFSET_MASK) == 0) &&
> +                         (ipopt_len))) {
> +                     __create_ipopt_frag_hdr((uint8_t *)in_hdr,
> +                             &ipopt_len, ipopt_frag_hdr);

Can we probably do that before the loop (as we have to do it only once anyway?
Something like:

....
ipopt_len = header_len - sizeof(struct rte_ipv4_hdr);
if (ipopt_len > RTE_IPOPT_MAX_LEN)
        return -EINVAL;
if (ipopt_len != 0)
        ipopt_len = __create_ipopt_frag_hdr((in_hdr, ipopt_frag_hdr, ipopt_len);
....

And then:
while (likely(more_in_segs)) {
        ...
        if (ipopt_len ! = 0)
                in_hdr = (struct rte_ipv4_hdr *)ipopt_frag_hdr; 
}

> +
> +                     fragment_offset = (uint16_t)(fragment_offset +
> +                             out_pkt->pkt_len - header_len);
> 
> -             out_pkt->l3_len = header_len;
> +                     out_pkt->l3_len = header_len;
> +
> +                     header_len = sizeof(struct rte_ipv4_hdr) + ipopt_len;
> +                     in_hdr = (struct rte_ipv4_hdr *)ipopt_frag_hdr;
> +             } else {
> +                     fragment_offset = (uint16_t)(fragment_offset +
> +                         out_pkt->pkt_len - header_len);
> +
> +                     out_pkt->l3_len = header_len;
> +             }
> 
>               /* Write the fragment to the output list */
>               pkts_out[out_pkt_pos] = out_pkt;
> --
> 1.8.3.1

Reply via email to