Greetings, it appears that policy based routing works rather strange for IPv6, if I try to make it to select routing table based on IPv6 traffic class. In case of IPv4, you can add rule like
ip rule add tos 8 table 111 and all the packets from sockets with IP_TOS set to 8 will be routed according to table 111. In case of IPv6, IPV6 Traffic Class seems to be very similar to TOS, however “ip rule” does not have separate “tclass” option. From kernel sources (fib6_rule_configure() in net/ipv6/fib6_rules.c) it seems that it sets tclass to value specified for tos, and rule like ip -6 rule add tos 8 table 111 should work (if instead of IP_TOS we set IPV6_TCLASS to 8 for IPv6 socket). However I discovered that if that table 111 contains a route to some destination and there is no other route to there in any other table, this really works fine only for sending data with sendto() from not connected UDP IPv6 socket. connect() for UDP IPv6 socket to the same destination fails with ENETUNREACH, for TCP socket connection establishment does not work too. I’ve seen this while using 5.0.0 kernel. In kernel sources I see that in fib6_rule_match() tclass is compared with ip6_tclass(fl6->flowlabel). However ip6_make_flowinfo() which is used to add tclass to flowlabel is used only in udpv6_sendmsg() defined in net/ipv6/udp.c (other places where it can be seen seem not being directly related to TCP/UDP). This may explain the strange behavior described above. To make reproducing the issue simpler, I include two C files in attachment, ipv6_tclass_server.c and ipv6_tclass_client.c. I have two hosts, host A and host B, connected with interfaces enp1s0f1np1 (on host A) and eth3 (on host B). 1. On host B I did the following: sudo ip -6 addr add fec1:34::5555 dev eth3 sudo ip -6 route add fec1:33::5555/128 dev eth3 gcc ipv6_tclass_server.c -o ipv6_tclass_server ./ipv6_tclass_server fec1:34::5555 8 [output] fec1:34::5555 8 5743 5743 In the last command the second argument (8) is value of IPV6_TCLASS option set on sockets. Its output should be fed to ipv6_tclass_client on the second step (after source address as the first argument). 2. On host A I did the following: sudo ip -6 addr add fec1:33::5555 dev enp1s0f1np1 sudo ip -6 route add fec1:34::5555/128 dev enp1s0f1np1 table 111 sudo ip -6 rule add tos 8 table 111 gcc ipv6_tclass_client.c -o ipv6_tclass_client ./ipv6_tclass_client fec1:33::5555 fec1:34::5555 8 5743 5743 This ipv6_tclass_client tries to check four things: UDP connect(), UDP sendto(), TCP accept(), TCP connect(). Resulting output - only sendto() from UDP socket is successfull (but not connect() to the same destination from the same UDP socket): Connecting UDP socket to fec1:34::5555 failed with errno 101 (Network is unreachable) Sending from UDP socket to fec1:34::5555 succeeded TCP listener did not become readable within 1 second Nonblocking connect() failed with unexpected errno errno=101 (Network is unreachable) If I add a route to fec1:34:5555 in main table on host A: sudo ip -6 route add fec1:34::5555/128 dev enp1s0f1np1 all works fine: ./ipv6_tclass_client fec1:33::5555 fec1:34::5555 8 5743 5743 Connecting UDP socket to fec1:34::5555 succeeded Sending from UDP socket to fec1:34::5555 succeeded TCP connection was accepted successfully from fec1:34::5555 Active TCP connection was successfully established Redards, Dmitry Izbitsky
#include <stdlib.h> #include <stdio.h> #include <string.h> #include <errno.h> #include <arpa/inet.h> #include <sys/types.h> #include <sys/socket.h> #include <unistd.h> #include <fcntl.h> #include <poll.h> #define ERRNO_EXIT \ fprintf(stderr, "errno=%d (%s)\n", errno, strerror(errno)); \ exit(EXIT_FAILURE) int main(int argc, const char *argv[]) { struct sockaddr_in6 src_addr; struct sockaddr_in6 dst_addr; struct sockaddr_in6 addr; socklen_t addr_len; int tclass; int tcp_port; int udp_port; int listener_port; char port_str[200]; char addr_str[200]; int rc; struct pollfd fds; int tcp_listener_s; int tcp_conn_s; int acc_s; int udp_s; if (argc < 6) { fprintf(stderr, "Usage: ./ipv6_class_client src_ipv6 dst_ipv6 " "tclass udp_port tcp_port\n"); exit(EXIT_FAILURE); } memset(&src_addr, 0, sizeof(src_addr)); memset(&dst_addr, 0, sizeof(dst_addr)); src_addr.sin6_family = AF_INET6; dst_addr.sin6_family = AF_INET6; if (inet_pton(AF_INET6, argv[1], &src_addr.sin6_addr) < 1) { fprintf(stderr, "Failed to convert '%s' to source IPv6 address\n", argv[1]); ERRNO_EXIT; } if (inet_pton(AF_INET6, argv[2], &dst_addr.sin6_addr) < 1) { fprintf(stderr, "Failed to convert '%s' to destination IPv6 " "address\n", argv[2]); ERRNO_EXIT; } tclass = atoi(argv[3]); udp_port = atoi(argv[4]); tcp_port = atoi(argv[5]); udp_s = socket(AF_INET6, SOCK_DGRAM, 0); if (udp_s < 0) { fprintf(stderr, "Failed to create UDP socket\n"); ERRNO_EXIT; } tcp_listener_s = socket(AF_INET6, SOCK_STREAM, 0); if (tcp_listener_s < 0) { fprintf(stderr, "Failed to create TCP listener socket\n"); ERRNO_EXIT; } tcp_conn_s = socket(AF_INET6, SOCK_STREAM, 0); if (tcp_conn_s < 0) { fprintf(stderr, "Failed to create TCP socket\n"); ERRNO_EXIT; } if (setsockopt(udp_s, SOL_IPV6, IPV6_TCLASS, &tclass, sizeof(tclass)) < 0) { fprintf(stderr, "setsockopt(IPV6_TCLASS) failed for UDP socket\n"); ERRNO_EXIT; } if (setsockopt(tcp_listener_s, SOL_IPV6, IPV6_TCLASS, &tclass, sizeof(tclass)) < 0) { fprintf(stderr, "setsockopt(IPV6_TCLASS) failed for TCP listener " "socket\n"); ERRNO_EXIT; } if (setsockopt(tcp_conn_s, SOL_IPV6, IPV6_TCLASS, &tclass, sizeof(tclass)) < 0) { fprintf(stderr, "setsockopt(IPV6_TCLASS) failed for TCP socket\n"); ERRNO_EXIT; } if (bind(udp_s, (struct sockaddr *)&src_addr, sizeof(src_addr)) < 0) { fprintf(stderr, "Failed to bind UDP socket\n"); ERRNO_EXIT; } if (bind(tcp_listener_s, (struct sockaddr *)&src_addr, sizeof(src_addr)) < 0) { fprintf(stderr, "Failed to bind TCP listener socket\n"); ERRNO_EXIT; } if (bind(tcp_conn_s, (struct sockaddr *)&src_addr, sizeof(src_addr)) < 0) { fprintf(stderr, "Failed to bind TCP socket\n"); ERRNO_EXIT; } addr_len = sizeof(addr); if (getsockname(tcp_listener_s, (struct sockaddr *)&addr, &addr_len) < 0) { fprintf(stderr, "getsockname() failed for TCP listener socket\n"); ERRNO_EXIT; } listener_port = ntohs(addr.sin6_port); if (listen(tcp_listener_s, 1) < 0) { fprintf(stderr, "listen() failed for TCP listener socket\n"); ERRNO_EXIT; } dst_addr.sin6_port = htons(udp_port); if (connect(udp_s, (struct sockaddr *)&dst_addr, sizeof(dst_addr)) < 0) { fprintf(stderr, "Connecting UDP socket to %s failed with errno " "%d (%s)\n", argv[2], errno, strerror(errno)); } else { printf("Connecting UDP socket to %s succeeded\n", argv[2]); } snprintf(port_str, sizeof(port_str), "%d", listener_port); rc = sendto(udp_s, port_str, strlen(port_str) + 1, 0, (struct sockaddr *)&dst_addr, sizeof(dst_addr)); if (rc < 0) { fprintf(stderr, "Sending from UDP socket to %s failed with errno " "%d (%s)\n", argv[2], errno, strerror(errno)); } else { printf("Sending from UDP socket to %s succeeded\n", argv[2]); } memset(&fds, 0, sizeof(fds)); fds.fd = tcp_listener_s; fds.events = POLLIN; rc = poll(&fds, 1, 1000); if (rc == 0) { fprintf(stderr, "TCP listener did not become readable within " "1 second\n"); } else if (fds.revents != POLLIN) { fprintf(stderr, "Unexpected events 0x%x for TCP listener\n", fds.revents); } else { addr_len = sizeof(addr); acc_s = accept(tcp_listener_s, (struct sockaddr *)&addr, &addr_len); if (acc_s < 0) { fprintf(stderr, "accept() failed with errno %d (%s)\n", errno, strerror(errno)); } else { if (inet_ntop(AF_INET6, &addr.sin6_addr, addr_str, sizeof(addr_str)) == NULL) { fprintf(stderr, "Failed to convert source address " "of accepted connection to string\n"); } else { printf("TCP connection was accepted successfully " "from %s\n", addr_str); } close(acc_s); } } if (fcntl(tcp_conn_s, F_SETFL, O_NONBLOCK) < 0) { fprintf(stderr, "Failed to make TCP socket non-blocking\n"); ERRNO_EXIT; } dst_addr.sin6_port = htons(tcp_port); rc = connect(tcp_conn_s, (struct sockaddr *)&dst_addr, sizeof(dst_addr)); if (rc < 0 && errno != EINPROGRESS) { fprintf(stderr, "Nonblocking connect() failed with " "unexpected errno\n"); ERRNO_EXIT; } sleep(1); rc = connect(tcp_conn_s, (struct sockaddr *)&dst_addr, sizeof(dst_addr)); if (rc < 0 && errno != EISCONN) { fprintf(stderr, "Active TCP connection was not established in " "1 second\n"); } else { printf("Active TCP connection was successfully established\n"); } close(tcp_conn_s); close(tcp_listener_s); close(udp_s); return 0; }
#include <stdlib.h> #include <stdio.h> #include <string.h> #include <errno.h> #include <arpa/inet.h> #include <sys/types.h> #include <sys/socket.h> #include <unistd.h> #include <fcntl.h> #include <poll.h> #define ERRNO_EXIT \ fprintf(stderr, "errno=%d (%s)\n", errno, strerror(errno)); \ exit(EXIT_FAILURE) int main(int argc, const char *argv[]) { struct sockaddr_in6 addr; int udp_s; int tcp_s; int acc_s; int tcp_conn_s; int udp_port; int tcp_port; struct sockaddr_in6 bound_addr; socklen_t addr_len; char addr_str[200]; char port_str[200]; struct pollfd fds[2]; int nfds; int rc; int tclass; if (argc < 3) { fprintf(stderr, "Usage: ./ipv6_class_server ipv6_addr tclass\n"); exit(EXIT_FAILURE); } tclass = atoi(argv[2]); memset(&addr, sizeof(addr), 0); if (inet_pton(AF_INET6, argv[1], &addr.sin6_addr) < 1) { fprintf(stderr, "Failed to convert '%s' to IPv6 address\n", argv[1]); ERRNO_EXIT; } addr.sin6_family = AF_INET6; udp_s = socket(AF_INET6, SOCK_DGRAM, 0); if (udp_s < 0) { fprintf(stderr, "Failed to create UDP socket\n"); ERRNO_EXIT; } tcp_s = socket(AF_INET6, SOCK_STREAM, 0); if (tcp_s < 0) { fprintf(stderr, "Failed to create TCP socket\n"); ERRNO_EXIT; } if (setsockopt(udp_s, SOL_IPV6, IPV6_TCLASS, &tclass, sizeof(tclass)) < 0) { fprintf(stderr, "setsockopt(IPV6_TCLASS) failed for UDP socket\n"); ERRNO_EXIT; } if (setsockopt(tcp_s, SOL_IPV6, IPV6_TCLASS, &tclass, sizeof(tclass)) < 0) { fprintf(stderr, "setsockopt(IPV6_TCLASS) failed for TCP socket\n"); ERRNO_EXIT; } if (bind(udp_s, (struct sockaddr *)&addr, sizeof(addr)) < 0) { fprintf(stderr, "Failed to bind UDP socket\n"); ERRNO_EXIT; } if (bind(tcp_s, (struct sockaddr *)&addr, sizeof(addr)) < 0) { fprintf(stderr, "Failed to bind TCP socket\n"); ERRNO_EXIT; } addr_len = sizeof(bound_addr); if (getsockname(udp_s, (struct sockaddr *)&bound_addr, &addr_len) < 0) { fprintf(stderr, "getsockname() failed for UDP socket\n"); ERRNO_EXIT; } udp_port = ntohs(bound_addr.sin6_port); addr_len = sizeof(bound_addr); if (getsockname(tcp_s, (struct sockaddr *)&bound_addr, &addr_len) < 0) { fprintf(stderr, "getsockname() failed for TCP socket\n"); ERRNO_EXIT; } tcp_port = ntohs(bound_addr.sin6_port); printf("%s %d %d %d\n", argv[1], tclass, udp_port, tcp_port); if (listen(tcp_s, 1) < 0) { fprintf(stderr, "listen() failed for TCP socket\n"); ERRNO_EXIT; } memset(fds, 0, sizeof(fds)); fds[0].fd = tcp_s; fds[0].events = POLLIN; fds[1].fd = udp_s; fds[1].events = POLLIN; while (1) { rc = poll(fds, 2, -1); if (rc < 0) { fprintf(stderr, "poll() failed\n"); ERRNO_EXIT; } /* TCP listener accepts connection */ if (fds[0].revents == POLLIN) { addr_len = sizeof(addr); acc_s = accept(tcp_s, (struct sockaddr *)&addr, &addr_len); if (acc_s < 0) { fprintf(stderr, "accept() failed\n"); ERRNO_EXIT; } if (inet_ntop(AF_INET6, &addr.sin6_addr, addr_str, sizeof(addr_str)) == NULL) { fprintf(stderr, "Failed to convert source address " "of accepted connection to string\n"); ERRNO_EXIT; } printf("Accepted connection from %s %d", addr_str, (int)ntohs(addr.sin6_port)); close(acc_s); } else if (fds[0].revents != 0) { fprintf(stderr, "Unexpected events 0x%x for TCP listener\n", fds[0].revents); break; } /* UDP socket received data */ if (fds[1].revents == POLLIN) { addr_len = sizeof(addr); rc = recvfrom(udp_s, port_str, sizeof(port_str), 0, (struct sockaddr *)&addr, &addr_len); if (rc < 0) { fprintf(stderr, "recvfrom() failed\n"); ERRNO_EXIT; } else if (rc == 0) { fprintf(stderr, "recvfrom() returned zero\n"); exit(EXIT_FAILURE); } if (inet_ntop(AF_INET6, &addr.sin6_addr, addr_str, sizeof(addr_str)) == NULL) { fprintf(stderr, "Failed to convert source address " "of received data to string\n"); ERRNO_EXIT; } printf("Received data from %s %d\n", addr_str, (int)ntohs(addr.sin6_port)); /* * Via UDP socket we receive port number to which * we try to establish TCP connection, so that * passive TCP connection establishment will be * checked by the client. */ if (port_str[rc - 1] != '\0') { fprintf(stderr, "Not null-terminated string was received " "from UDP socket\n"); exit(EXIT_FAILURE); } printf("TCP socket will try to connect to %s %s\n", addr_str, port_str); addr.sin6_port = htons(atoi(port_str)); tcp_conn_s = socket(AF_INET6, SOCK_STREAM, 0); if (tcp_conn_s < 0) { fprintf(stderr, "Failed to create TCP socket\n"); ERRNO_EXIT; } if (setsockopt(tcp_conn_s, SOL_IPV6, IPV6_TCLASS, &tclass, sizeof(tclass)) < 0) { fprintf(stderr, "setsockopt(IPV6_TCLASS) failed for " "TCP socket\n"); ERRNO_EXIT; } bound_addr.sin6_port = 0; if (bind(tcp_conn_s, (struct sockaddr *)&bound_addr, sizeof(bound_addr)) < 0) { fprintf(stderr, "Failed to bind TCP socket\n"); ERRNO_EXIT; } if (fcntl(tcp_conn_s, F_SETFL, O_NONBLOCK) < 0) { fprintf(stderr, "Failed to make TCP socket non-blocking\n"); ERRNO_EXIT; } rc = connect(tcp_conn_s, (struct sockaddr *)&addr, sizeof(addr)); if (rc < 0 && errno != EINPROGRESS) { fprintf(stderr, "Nonblocking connect() failed with " "unexpected errno\n"); ERRNO_EXIT; } sleep(1); rc = connect(tcp_conn_s, (struct sockaddr *)&addr, sizeof(addr)); if (rc < 0 && errno != EISCONN) { fprintf(stderr, "TCP connection was not established in " "1 second\n"); } close(tcp_conn_s); } else if (fds[1].revents != 0) { fprintf(stderr, "Unexpected events 0x%x for UDP socket\n", fds[1].revents); break; } } close(tcp_s); close(tcp_conn_s); close(udp_s); return 0; }