The rte_pcapng_write_packets() function fails when we try to write more
packets than the IOV_MAX limit. The error is caused by the writev()
system call, which is limited by the IOV_MAX limit. The iovcnt argument
is valid if it is greater than 0 and less than or equal to IOV_MAX as
defined in <limits.h>.

To avoid this problem, we can split the iovec buffer into smaller
chunks with a maximum size of IOV_MAX and write them sequentially by
calling the writev() repeatedly.

Fixes: 8d23ce8f5ee9 ("pcapng: add new library for writing pcapng files")
Cc: step...@networkplumber.org

Signed-off-by: Mário Kuka <k...@cesnet.cz>
---
 app/test/test_pcapng.c  | 42 ++++++++++++++++++++++++++++-
 lib/pcapng/rte_pcapng.c | 58 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 98 insertions(+), 2 deletions(-)

diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c
index 320dacea34..7f51946fff 100644
--- a/app/test/test_pcapng.c
+++ b/app/test/test_pcapng.c
@@ -110,7 +110,7 @@ test_setup(void)
        }
 
        /* Make a pool for cloned packets */
-       mp = rte_pktmbuf_pool_create_by_ops("pcapng_test_pool", NUM_PACKETS,
+       mp = rte_pktmbuf_pool_create_by_ops("pcapng_test_pool", IOV_MAX + 
NUM_PACKETS,
                                            0, 0,
                                            rte_pcapng_mbuf_size(pkt_len),
                                            SOCKET_ID_ANY, "ring_mp_sc");
@@ -237,6 +237,45 @@ test_validate(void)
        return ret;
 }
 
+static int
+test_write_over_limit_iov_max(void)
+{
+       struct rte_mbuf *orig;
+       struct rte_mbuf *clones[IOV_MAX + NUM_PACKETS] = { };
+       struct dummy_mbuf mbfs;
+       unsigned int i;
+       ssize_t len;
+
+       /* make a dummy packet */
+       mbuf1_prepare(&mbfs, pkt_len);
+
+       /* clone them */
+       orig  = &mbfs.mb[0];
+       for (i = 0; i < IOV_MAX + NUM_PACKETS; i++) {
+               struct rte_mbuf *mc;
+
+               mc = rte_pcapng_copy(port_id, 0, orig, mp, pkt_len,
+                               rte_get_tsc_cycles(), 0);
+               if (mc == NULL) {
+                       fprintf(stderr, "Cannot copy packet\n");
+                       return -1;
+               }
+               clones[i] = mc;
+       }
+
+       /* write it to capture file */
+       len = rte_pcapng_write_packets(pcapng, clones, IOV_MAX + NUM_PACKETS);
+
+       rte_pktmbuf_free_bulk(clones, IOV_MAX + NUM_PACKETS);
+
+       if (len <= 0) {
+               fprintf(stderr, "Write of packets failed\n");
+               return -1;
+       }
+
+       return 0;
+}
+
 static void
 test_cleanup(void)
 {
@@ -256,6 +295,7 @@ unit_test_suite test_pcapng_suite  = {
                TEST_CASE(test_write_packets),
                TEST_CASE(test_write_stats),
                TEST_CASE(test_validate),
+               TEST_CASE(test_write_over_limit_iov_max),
                TEST_CASES_END()
        }
 };
diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
index 06ad712bd1..5762f89cb9 100644
--- a/lib/pcapng/rte_pcapng.c
+++ b/lib/pcapng/rte_pcapng.c
@@ -567,6 +567,62 @@ mbuf_burst_segs(struct rte_mbuf *pkts[], unsigned int n)
        return iovcnt;
 }
 
+/*
+ * Update iov after writev() has returned written. We must find how many iov
+ * buffers (from beginning) have been written. The first buffer that was not
+ * written fully is to be updated accordingly.
+ *
+ * Returns offset of buffer that was not written fully.
+ */
+static int
+pcapng_update_iov(struct iovec *iov, const int count, size_t written)
+{
+       for (int i = 0; written > 0 && i < count; ++i) {
+               if (written < iov[i].iov_len) {
+                       /* found buffer that was not written fully */
+                       iov[i].iov_base = RTE_PTR_ADD(iov[i].iov_base, written);
+                       iov[i].iov_len -= written;
+
+                       return i;
+               }
+
+               /* buffer fully written, zero it and skip */
+               written -= iov[i].iov_len;
+
+               iov[i].iov_base = NULL;
+               iov[i].iov_len = 0;
+       }
+
+       return count;
+}
+
+/*
+ * Writes all iovcnt buffers of data described by iov to the file associated 
with
+ * the file descriptor fd.
+ *
+ * Note: POSIX.1-2001 allows an implementation to place a limit on the number
+ *       of items that can be passed in iov. An implementation can advertise
+ *       its limit by defining IOV_MAX in <limits.h>.
+ */
+static ssize_t
+pcapng_writev(int fd, struct iovec *iov, const int count)
+{
+       size_t total = 0;
+       int at = 0;
+
+       while (at < count) {
+               const int iov_cnt = RTE_MIN(count - at, IOV_MAX);
+               ssize_t wlen = writev(fd, &iov[at], iov_cnt);
+               if (unlikely(wlen < 0))
+                       return wlen;
+
+               total += wlen;
+               at += pcapng_update_iov(&iov[at], iov_cnt, wlen);
+       }
+
+       return total;
+}
+
 /* Write pre-formatted packets to file. */
 ssize_t
 rte_pcapng_write_packets(rte_pcapng_t *self,
@@ -601,7 +657,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self,
                } while ((m = m->next));
        }
 
-       ret = writev(self->outfd, iov, iovcnt);
+       ret = pcapng_writev(self->outfd, iov, iovcnt);
        if (unlikely(ret < 0))
                rte_errno = errno;
        return ret;
-- 
2.31.1

Reply via email to