When measuring latency when running a latency critical workload
on mlx5 pmd drivers we noticed high latency can occur due to
delayed doorbell record update flush.

This can be reproduced using the simple program [1]
against testpmd macswap fwd mode. This utility sends
a raw ethernet frame to the dpdk port and measures the
time between send and the received mirrored frame.

This patchset guarantees immediate doorbell updates
visibility by making the doorbell a non-cacheble memory.
In addition, we relax the memory barrier for dma-able
memory.

Without this fix the tsc delta was 3550760-5993019 cycles
(which translates to 2-6 ms on 1.7 GHz processor).

With the fix applied the tsc delta reduced to 17740-29663
(wich translates to 9-17 us).

Shahaf Shuler (2):
  net/mlx5: replace memory barrier type
  net/mlx5: don't map doorbell register to write combining

 drivers/net/mlx5/mlx5.c      | 2 ++
 drivers/net/mlx5/mlx5_rxtx.h | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

[1]:
/*
 * compiling: gcc test.c -o test
 * run using: ./test <local_iface> <dest_mac> 
 */
#include <arpa/inet.h>
#include <linux/if_packet.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <net/if.h>
#include <netinet/ether.h>

#define BUF_SIZ         1024

static inline uint64_t rte_rdtsc(void)
{
        union {
                uint64_t tsc_64;
                struct {
                        uint32_t lo_32;
                        uint32_t hi_32;
                };
        } tsc;

        asm volatile("rdtsc" :
                     "=a" (tsc.lo_32),
                     "=d" (tsc.hi_32));
        return tsc.tsc_64;
}

int main(int argc, char *argv[])
{
        int sockfd;
        struct ifreq if_idx;
        struct ifreq if_mac;
        int tx_len = 0;
        char sendbuf[BUF_SIZ];
        struct ether_header *eh = (struct ether_header *) sendbuf;
        struct sockaddr_ll socket_address;
        char ifname[IFNAMSIZ];
        int values[6];
        struct ether_header expected;
        uint64_t payload = 0xB16B00B5;
        uint8_t buffer[1024];
        int result;
        uint64_t before_rcv;
        uint64_t after_rcv;
        uint64_t delta;
        int numbytes;

        if (argc != 3) {
                fprintf(stderr, "device name and dest mac\n");
                return -1;
        }

        strcpy(ifname, argv[1]);
        result = sscanf(argv[2], "%x:%x:%x:%x:%x:%x",
                        &values[0], &values[1], &values[2], &values[3], 
&values[4], &values[5]);
        if (result != 6) {
                fprintf(stderr, "invalid mac\n");
                return -1;
        }

        /* Open RAW socket to send on */
        if ((sockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL))) == -1) {
            perror("socket");
        }

        /* Get the index of the interface to send on */
        memset(&if_idx, 0, sizeof(struct ifreq));
        strncpy(if_idx.ifr_name, ifname, IFNAMSIZ-1);
        if (ioctl(sockfd, SIOCGIFINDEX, &if_idx) < 0)
            perror("SIOCGIFINDEX");
        /* Get the MAC address of the interface to send on */
        memset(&if_mac, 0, sizeof(struct ifreq));
        strncpy(if_mac.ifr_name, ifname, IFNAMSIZ-1);
        if (ioctl(sockfd, SIOCGIFHWADDR, &if_mac) < 0)
            perror("SIOCGIFHWADDR");

        /* Construct the Ethernet header */
        memset(sendbuf, 0, BUF_SIZ);
        /* Ethernet header */
        eh->ether_shost[0] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[0];
        eh->ether_shost[1] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[1];
        eh->ether_shost[2] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[2];
        eh->ether_shost[3] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[3];
        eh->ether_shost[4] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[4];
        eh->ether_shost[5] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[5];
        eh->ether_dhost[0] = values[0];
        eh->ether_dhost[1] = values[1];
        eh->ether_dhost[2] = values[2];
        eh->ether_dhost[3] = values[3];
        eh->ether_dhost[4] = values[4];
        eh->ether_dhost[5] = values[5];
        /* Ethertype field */
        eh->ether_type = htons(ETH_P_IP);
        tx_len += sizeof(struct ether_header);

        memcpy(&sendbuf[tx_len], &payload, sizeof(payload));
        tx_len += sizeof(payload);

        /* Index of the network device */
        socket_address.sll_ifindex = if_idx.ifr_ifindex;
        /* Address length*/
        socket_address.sll_halen = ETH_ALEN;
        /* Destination MAC */
        socket_address.sll_addr[0] = values[0];
        socket_address.sll_addr[1] = values[1];
        socket_address.sll_addr[2] = values[2];
        socket_address.sll_addr[3] = values[3];
        socket_address.sll_addr[4] = values[4];
        socket_address.sll_addr[5] = values[5];

        memcpy(&expected.ether_dhost, &eh->ether_shost, ETH_ALEN);
        memcpy(&expected.ether_shost, &eh->ether_dhost, ETH_ALEN);
        expected.ether_type = eh->ether_type;


        /* Send packet */
        if (sendto(sockfd, sendbuf, tx_len, 0, (struct 
sockaddr*)&socket_address, sizeof(struct sockaddr_ll)) < 0) {
            printf("Send failed\n");
            return -2;
        }

        before_rcv = rte_rdtsc();
        while (1) {
                numbytes = recvfrom(sockfd, buffer, BUF_SIZ, 0, NULL, NULL);
                if (numbytes <= 0)
                        continue;
                after_rcv = rte_rdtsc();

                if (memcmp(&expected, buffer, sizeof(expected)) != 0)
                        continue;

                if (memcmp(&payload, &buffer[sizeof(expected)], 
sizeof(payload)) == 0) {
                        break;
                }

        }

        delta =  after_rcv - before_rcv;
        printf("RTT is %lu tsc \n", delta);
        return 0;
}
-- 
2.7.4

Reply via email to