From: Ishaan Gandhi <ishaangan...@gmail.com> This patch identifies the interface a packet arrived on when sending ICMP time exceeded, destination unreachable, and parameter problem messages, in accordance with RFC 5837.
It was tested by pinging a machine with a ttl of 1, and observing the response in Wireshark. Signed-off-by: Ishaan Gandhi <ishaangan...@gmail.com> --- include/uapi/linux/icmp.h | 21 ++++++ net/ipv4/icmp.c | 137 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) diff --git a/include/uapi/linux/icmp.h b/include/uapi/linux/icmp.h index fb169a50895e..5e5a75abe0a4 100644 --- a/include/uapi/linux/icmp.h +++ b/include/uapi/linux/icmp.h @@ -118,4 +118,25 @@ struct icmp_extobj_hdr { __u8 class_type; }; +/* RFC 5837 Bitmasks */ +#define ICMP_5837_MTU_CTYPE (1 << 0) +#define ICMP_5837_NAME_CTYPE (1 << 1) +#define ICMP_5837_IP_ADDR_CTYPE (1 << 2) +#define ICMP_5837_IF_INDEX_CTYPE (1 << 3) + +#define ICMP_5837_ARRIVAL_ROLE_CTYPE (0 << 6) +#define ICMP_5837_SUB_IP_ROLE_CTYPE (1 << 6) +#define ICMP_5837_FORWARD_ROLE_CTYPE (2 << 6) +#define ICMP_5837_NEXT_HOP_ROLE_CTYPE (3 << 6) + +#define ICMP_5837_MIN_ORIG_LEN 128 +#define ICMP_5837_MAX_NAME_LEN 63 + +/* RFC 5837 Interface IP Address sub-object */ +struct interface_ipv4_addr_sub_obj { + __be16 afi; + __be16 reserved; + __be32 addr; +}; + #endif /* _UAPI_LINUX_ICMP_H */ diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 793aebf07c2a..3abae84a6e9e 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -555,6 +555,139 @@ static struct rtable *icmp_route_lookup(struct net *net, return ERR_PTR(err); } +/* Appends interface identification object to ICMP packet to identify + * the interface on which the original datagram arrived, per RFC 5837. + * + * Should only be called on the following messages + * - ICMPv4 Time Exceeded + * - ICMPv4 Destination Unreachable + * - ICMPv4 Parameter Problem + */ + +void icmp_identify_arrival_interface(struct sk_buff *skb, struct net *net, int room, + struct icmphdr *icmph) +{ + unsigned int ext_len, if_index, orig_len, offset, extra_space_needed, + word_aligned_orig_len, mtu, name_len, name_subobj_len; + struct interface_ipv4_addr_sub_obj ip_addr; + struct icmp_extobj_hdr *iio_hdr; + struct icmp_ext_hdr *ext_hdr; + struct net_device *dev; + void *subobj_offset; + char *name, ctype; + + skb_linearize(skb); + if_index = inet_iif(skb); + orig_len = skb->len - skb_network_offset(skb); + word_aligned_orig_len = (orig_len + 3) & ~0x03; + + // Original datagram length is measured in 32-bit words + icmph->un.reserved[1] = word_aligned_orig_len / 4; + ctype = ICMP_5837_ARRIVAL_ROLE_CTYPE; + + ext_len = sizeof(struct icmp_ext_hdr) + sizeof(struct icmp_extobj_hdr); + + // Always add if_index to the IIO + ext_len += 4; + ctype |= ICMP_5837_IF_INDEX_CTYPE; + + dev = dev_get_by_index(net, if_index); + // Try to append IP address, name, and MTU + if (dev) { + ip_addr.addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); + if (ip_addr.addr) { + ip_addr.afi = htons(1); + ip_addr.reserved = 0; + ctype |= ICMP_5837_IP_ADDR_CTYPE; + ext_len += 8; + } + + name = dev->name; + if (name) { + name_len = strlen(name); + name_subobj_len = min_t(unsigned int, name_len, ICMP_5837_MAX_NAME_LEN) + 1; + name_subobj_len = (name_subobj_len + 3) & ~0x03; + ctype |= ICMP_5837_NAME_CTYPE; + ext_len += name_subobj_len; + } + + mtu = dev->mtu; + if (mtu) { + ctype |= ICMP_5837_MTU_CTYPE; + ext_len += 4; + } + } + + if (word_aligned_orig_len + ext_len > room) { + offset = room - ext_len; + extra_space_needed = room - orig_len; + } else if (orig_len < ICMP_5837_MIN_ORIG_LEN) { + // Original packet must be zero padded to 128 bytes + offset = ICMP_5837_MIN_ORIG_LEN; + extra_space_needed = offset + ext_len - orig_len; + } else { + // There is enough room to just add to the end of the packet + offset = word_aligned_orig_len; + extra_space_needed = ext_len; + } + + if (skb_tailroom(skb) < extra_space_needed) { + if (pskb_expand_head(skb, 0, extra_space_needed - skb_tailroom(skb), GFP_ATOMIC)) + return; + } + + // Zero-pad from the end of the original message to the beginning of the header + if (orig_len < ICMP_5837_MIN_ORIG_LEN) { + // Original packet must be zero padded to 128 bytes + memset(skb_network_header(skb) + orig_len, 0, ICMP_5837_MIN_ORIG_LEN - orig_len); + } else { + // Just zero-pad so the original packet is aligned on a 4 byte boundary + memset(skb_network_header(skb) + orig_len, 0, word_aligned_orig_len - orig_len); + } + + skb_put(skb, extra_space_needed); + ext_hdr = (struct icmp_ext_hdr *)(skb_network_header(skb) + offset); + iio_hdr = (struct icmp_extobj_hdr *)(ext_hdr + 1); + subobj_offset = (void *)(iio_hdr + 1); + + ext_hdr->reserved1 = 0; + ext_hdr->reserved2 = 0; + ext_hdr->version = 2; + ext_hdr->checksum = 0; + + iio_hdr->length = htons(ext_len - 4); + iio_hdr->class_num = 2; + iio_hdr->class_type = ctype; + + *(__be32 *)subobj_offset = htonl(if_index); + subobj_offset += sizeof(__be32); + + if (ip_addr.addr) { + *(struct interface_ipv4_addr_sub_obj *)subobj_offset = ip_addr; + subobj_offset += sizeof(ip_addr); + } + + if (name) { + *(__u8 *)subobj_offset = name_subobj_len; + subobj_offset += sizeof(__u8); + if (name_len >= ICMP_5837_MAX_NAME_LEN) { + memcpy(subobj_offset, name, ICMP_5837_MAX_NAME_LEN); + } else { + memcpy(subobj_offset, name, name_len); + memset(subobj_offset + name_len, 0, name_subobj_len - name_len - 1); + } + subobj_offset += name_subobj_len - sizeof(__u8); + } + + if (mtu) { + *(__be32 *)subobj_offset = htonl(mtu); + subobj_offset += sizeof(__be32); + } + + ext_hdr->checksum = + csum_fold(skb_checksum(skb, skb_network_offset(skb) + offset, ext_len, 0)); +} + /* * Send an ICMP message in response to a situation * @@ -731,6 +864,10 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, room = 576; room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen; room -= sizeof(struct icmphdr); + if (type == ICMP_DEST_UNREACH || type == ICMP_TIME_EXCEEDED || + type == ICMP_PARAMETERPROB) { + icmp_identify_arrival_interface(skb_in, net, room, &icmp_param.data.icmph); + } icmp_param.data_len = skb_in->len - icmp_param.offset; if (icmp_param.data_len > room) -- 2.25.1