This is not intented to go to mainline, provided just for those who are interested enough about the algorithm internals during a test.
Signed-off-by: Ilpo Järvinen <[EMAIL PROTECTED]> --- include/linux/snmp.h | 19 +++++++++++++++++++ net/ipv4/proc.c | 19 +++++++++++++++++++ net/ipv4/tcp_input.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 86 insertions(+), 2 deletions(-) diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 89f0c2b..fbcd62d 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -214,6 +214,25 @@ enum LINUX_MIB_TCPDSACKIGNOREDOLD, /* TCPSACKIgnoredOld */ LINUX_MIB_TCPDSACKIGNOREDNOUNDO, /* TCPSACKIgnoredNoUndo */ LINUX_MIB_TCPSPURIOUSRTOS, /* TCPSpuriousRTOs */ + LINUX_MIB_TCP_SACK0, + LINUX_MIB_TCP_SACK1, + LINUX_MIB_TCP_SACK2, + LINUX_MIB_TCP_SACK3, + LINUX_MIB_TCP_SACK4, + LINUX_MIB_TCP_WALKEDSKBS, + LINUX_MIB_TCP_WALKEDDSACKS, + LINUX_MIB_TCP_SKIPPEDSKBS, + LINUX_MIB_TCP_NOCACHE, + LINUX_MIB_TCP_HEADWALK, + LINUX_MIB_TCP_FULLSKIP, + LINUX_MIB_TCP_TAILSKIP, + LINUX_MIB_TCP_HEADSKIP_TOHIGH, + LINUX_MIB_TCP_TAIL_TOHIGH, + LINUX_MIB_TCP_HEADSKIP, + LINUX_MIB_TCP_NEWSKIP, + LINUX_MIB_TCP_FULLWALK, + LINUX_MIB_TCP_TAILWALK, + LINUX_MIB_TCP_CACHEREMAINING, __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index ce34b28..a5e842d 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -227,6 +227,25 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD), SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO), SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), + SNMP_MIB_ITEM("TCP_SACK0", LINUX_MIB_TCP_SACK0), + SNMP_MIB_ITEM("TCP_SACK1", LINUX_MIB_TCP_SACK1), + SNMP_MIB_ITEM("TCP_SACK2", LINUX_MIB_TCP_SACK2), + SNMP_MIB_ITEM("TCP_SACK3", LINUX_MIB_TCP_SACK3), + SNMP_MIB_ITEM("TCP_SACK4", LINUX_MIB_TCP_SACK4), + SNMP_MIB_ITEM("TCP_WALKEDSKBS", LINUX_MIB_TCP_WALKEDSKBS), + SNMP_MIB_ITEM("TCP_WALKEDDSACKS", LINUX_MIB_TCP_WALKEDDSACKS), + SNMP_MIB_ITEM("TCP_SKIPPEDSKBS", LINUX_MIB_TCP_SKIPPEDSKBS), + SNMP_MIB_ITEM("TCP_NOCACHE", LINUX_MIB_TCP_NOCACHE), + SNMP_MIB_ITEM("TCP_FULLWALK", LINUX_MIB_TCP_FULLWALK), + SNMP_MIB_ITEM("TCP_HEADWALK", LINUX_MIB_TCP_HEADWALK), + SNMP_MIB_ITEM("TCP_TAILWALK", LINUX_MIB_TCP_TAILWALK), + SNMP_MIB_ITEM("TCP_FULLSKIP", LINUX_MIB_TCP_FULLSKIP), + SNMP_MIB_ITEM("TCP_TAILSKIP", LINUX_MIB_TCP_TAILSKIP), + SNMP_MIB_ITEM("TCP_HEADSKIP", LINUX_MIB_TCP_HEADSKIP), + SNMP_MIB_ITEM("TCP_HEADSKIP_TOHIGH", LINUX_MIB_TCP_HEADSKIP_TOHIGH), + SNMP_MIB_ITEM("TCP_TAIL_TOHIGH", LINUX_MIB_TCP_TAIL_TOHIGH), + SNMP_MIB_ITEM("TCP_NEWSKIP", LINUX_MIB_TCP_NEWSKIP), + SNMP_MIB_ITEM("TCP_CACHEREMAINING", LINUX_MIB_TCP_CACHEREMAINING), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5833b01..87ab327 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1370,6 +1370,10 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, *flag |= tcp_sacktag_one(skb, tp, reord, dup_sack, *fack_count); *fack_count += tcp_skb_pcount(skb); + + NET_INC_STATS_BH(LINUX_MIB_TCP_WALKEDSKBS); + if (dup_sack) + NET_INC_STATS_BH(LINUX_MIB_TCP_WALKEDDSACKS); } return skb; } @@ -1386,6 +1390,8 @@ static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, if (before(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) break; + + NET_INC_STATS_BH(LINUX_MIB_TCP_SKIPPEDSKBS); } return skb; } @@ -1434,6 +1440,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ int fack_count; int i, j; int first_sack_index; + int fullwalk = 1; if (!tp->sacked_out) { if (WARN_ON(tp->fackets_out)) @@ -1523,6 +1530,17 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ cache++; } + switch (used_sacks) { + case 0: NET_INC_STATS_BH(LINUX_MIB_TCP_SACK0); break; + case 1: NET_INC_STATS_BH(LINUX_MIB_TCP_SACK1); break; + case 2: NET_INC_STATS_BH(LINUX_MIB_TCP_SACK2); break; + case 3: NET_INC_STATS_BH(LINUX_MIB_TCP_SACK3); break; + case 4: NET_INC_STATS_BH(LINUX_MIB_TCP_SACK4); break; + } + + if (!tcp_sack_cache_ok(tp, cache)) + NET_INC_STATS_BH(LINUX_MIB_TCP_NOCACHE); + while (i < used_sacks) { u32 start_seq = sp[i].start_seq; u32 end_seq = sp[i].end_seq; @@ -1544,6 +1562,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ /* Can skip some work by looking recv_sack_cache? */ if (tcp_sack_cache_ok(tp, cache) && !dup_sack && after(end_seq, cache->start_seq)) { + int headskip = 0; + + fullwalk = 0; /* Head todo? */ if (before(start_seq, cache->start_seq)) { @@ -1551,12 +1572,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, cache->start_seq, dup_sack, &fack_count, &reord, &flag); - } + NET_INC_STATS_BH(LINUX_MIB_TCP_HEADWALK); + } else + headskip = 1; /* Rest of the block already fully processed? */ if (!after(end_seq, cache->end_seq)) { skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, cache->end_seq, &fack_count, &reord, &flag); + if (headskip) + NET_INC_STATS_BH(LINUX_MIB_TCP_FULLSKIP); + else + NET_INC_STATS_BH(LINUX_MIB_TCP_TAILSKIP); goto advance_sp; } @@ -1571,24 +1598,37 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ skb = tcp_write_queue_next(sk, tp->highest_sack); fack_count = tp->fackets_out; cache++; + + if (headskip) + NET_INC_STATS_BH(LINUX_MIB_TCP_HEADSKIP_TOHIGH); + else + NET_INC_STATS_BH(LINUX_MIB_TCP_TAIL_TOHIGH); goto walk; } skb = tcp_sacktag_skip(skb, sk, cache->end_seq); /* Check overlap against next cached too (past this one already) */ cache++; + + if (headskip) + NET_INC_STATS_BH(LINUX_MIB_TCP_HEADSKIP); continue; } if (!before(start_seq, tcp_highest_sack_seq(tp))) { skb = tcp_write_queue_next(sk, tp->highest_sack); fack_count = tp->fackets_out; + NET_INC_STATS_BH(LINUX_MIB_TCP_NEWSKIP); } skb = tcp_sacktag_skip(skb, sk, start_seq); walk: skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq, dup_sack, &fack_count, &reord, &flag); + if (fullwalk) + NET_INC_STATS_BH(LINUX_MIB_TCP_FULLWALK); + else + NET_INC_STATS_BH(LINUX_MIB_TCP_TAILWALK); advance_sp: /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct @@ -1598,15 +1638,21 @@ advance_sp: flag &= ~FLAG_ONLY_ORIG_SACKED; i++; + fullwalk = 1; } + if (tcp_sack_cache_ok(tp, cache)) + NET_INC_STATS_BH(LINUX_MIB_TCP_CACHEREMAINING); + /* Clear the head of the cache sack blocks so we can skip it next time */ for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) { tp->recv_sack_cache[i].start_seq = 0; tp->recv_sack_cache[i].end_seq = 0; } - for (j = 0; j < used_sacks; j++) + for (j = 0; j < used_sacks; j++) { + WARN_ON(i >= ARRAY_SIZE(tp->recv_sack_cache)); tp->recv_sack_cache[i++] = sp[j]; + } flag |= tcp_mark_lost_retrans(sk); -- 1.5.0.6 - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html