Simplify the TCP congestion infrastructure. Can fold the packets acked into the cong_avoid hook.
Signed-off-by: Stephen Hemminger <[EMAIL PROTECTED]> --- net-2.6.orig/include/net/tcp.h +++ net-2.6/include/net/tcp.h @@ -679,7 +679,7 @@ struct tcp_congestion_ops { u32 (*min_cwnd)(struct sock *sk); /* do new cwnd calculation (required) */ void (*cong_avoid)(struct sock *sk, u32 ack, - u32 rtt, u32 in_flight, int good_ack); + u32 rtt, u32 in_flight, u32 pkts_acked); /* round trip time sample per acked packet (optional) */ void (*rtt_sample)(struct sock *sk, u32 usrtt); /* call before changing ca_state (optional) */ @@ -688,8 +688,6 @@ struct tcp_congestion_ops { void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); /* new value of cwnd after loss (optional) */ u32 (*undo_cwnd)(struct sock *sk); - /* hook for packet ack accounting (optional) */ - void (*pkts_acked)(struct sock *sk, u32 num_acked); /* get info for inet_diag (optional) */ void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); @@ -709,7 +707,7 @@ extern int tcp_set_congestion_control(st extern struct tcp_congestion_ops tcp_init_congestion_ops; extern u32 tcp_reno_ssthresh(struct sock *sk); extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, - u32 rtt, u32 in_flight, int flag); + u32 rtt, u32 in_flight, u32 pkts_acked); extern u32 tcp_reno_min_cwnd(struct sock *sk); extern struct tcp_congestion_ops tcp_reno; --- net-2.6.orig/net/ipv4/tcp_bic.c +++ net-2.6/net/ipv4/tcp_bic.c @@ -156,7 +156,7 @@ static inline void bictcp_update(struct /* Detect low utilization in congestion avoidance */ -static inline void bictcp_low_utilization(struct sock *sk, int flag) +static inline void bictcp_low_utilization(struct sock *sk, u32 pkts_acked) { const struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); @@ -167,7 +167,7 @@ static inline void bictcp_low_utilizatio /* Discard delay samples right after fast recovery */ tcp_time_stamp < ca->epoch_start + HZ || /* this delay samples may not be accurate */ - flag == 0) { + pkts_acked == 0) { ca->last_delay = 0; goto notlow; } @@ -210,12 +210,18 @@ static inline void bictcp_low_utilizatio } static void bictcp_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int data_acked) + u32 seq_rtt, u32 in_flight, u32 count) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); - bictcp_low_utilization(sk, data_acked); + bictcp_low_utilization(sk, count); + + /* Track delayed ack ratio */ + if (count && inet_csk(sk)->icsk_ca_state == TCP_CA_Open) { + count -= ca->delayed_ack >> ACK_RATIO_SHIFT; + ca->delayed_ack += count; + } if (!tcp_is_cwnd_limited(sk, in_flight)) return; @@ -291,21 +297,6 @@ static void bictcp_state(struct sock *sk bictcp_reset(inet_csk_ca(sk)); } -/* Track delayed acknowledgement ratio using sliding window - * ratio = (15*ratio + sample) / 16 - */ -static void bictcp_acked(struct sock *sk, u32 cnt) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - - if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { - struct bictcp *ca = inet_csk_ca(sk); - cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; - ca->delayed_ack += cnt; - } -} - - static struct tcp_congestion_ops bictcp = { .init = bictcp_init, .ssthresh = bictcp_recalc_ssthresh, @@ -313,7 +304,6 @@ static struct tcp_congestion_ops bictcp .set_state = bictcp_state, .undo_cwnd = bictcp_undo_cwnd, .min_cwnd = bictcp_min_cwnd, - .pkts_acked = bictcp_acked, .owner = THIS_MODULE, .name = "bic", }; --- net-2.6.orig/net/ipv4/tcp_cong.c +++ net-2.6/net/ipv4/tcp_cong.c @@ -182,7 +182,7 @@ int tcp_set_congestion_control(struct so * SIGCOMM '88, p. 328. */ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, - int flag) + u32 pkts_acked) { struct tcp_sock *tp = tcp_sk(sk); --- net-2.6.orig/net/ipv4/tcp_htcp.c +++ net-2.6/net/ipv4/tcp_htcp.c @@ -202,11 +202,14 @@ static u32 htcp_recalc_ssthresh(struct s } static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, - u32 in_flight, int data_acked) + u32 in_flight, u32 pkts_acked) { struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); + if (use_bandwidth_switch) + measure_achieved_throughput(sk, pkts_acked); + if (!tcp_is_cwnd_limited(sk, in_flight)) return; @@ -271,7 +274,6 @@ static struct tcp_congestion_ops htcp = .cong_avoid = htcp_cong_avoid, .set_state = htcp_state, .undo_cwnd = htcp_cwnd_undo, - .pkts_acked = measure_achieved_throughput, .owner = THIS_MODULE, .name = "htcp", }; @@ -280,8 +282,7 @@ static int __init htcp_register(void) { BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); BUILD_BUG_ON(BETA_MIN >= BETA_MAX); - if (!use_bandwidth_switch) - htcp.pkts_acked = NULL; + return tcp_register_congestion_control(&htcp); } --- net-2.6.orig/net/ipv4/tcp_hybla.c +++ net-2.6/net/ipv4/tcp_hybla.c @@ -87,7 +87,7 @@ static inline u32 hybla_fraction(u32 odd * o remember increments <1 */ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt, - u32 in_flight, int flag) + u32 in_flight, u32 count) { struct tcp_sock *tp = tcp_sk(sk); struct hybla *ca = inet_csk_ca(sk); @@ -104,7 +104,7 @@ static void hybla_cong_avoid(struct sock return; if (!ca->hybla_en) - return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, count); if (ca->rho == 0) hybla_recalc_param(sk); --- net-2.6.orig/net/ipv4/tcp_input.c +++ net-2.6/net/ipv4/tcp_input.c @@ -1974,10 +1974,10 @@ static inline void tcp_ack_update_rtt(st } static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, - u32 in_flight, int good) + u32 in_flight, u32 count) { const struct inet_connection_sock *icsk = inet_csk(sk); - icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good); + icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, count); tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; } @@ -2060,7 +2060,7 @@ static inline u32 tcp_usrtt(const struct } /* Remove acknowledged frames from the retransmission queue. */ -static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) +static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, u32 *pkts_acked) { struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2068,7 +2068,6 @@ static int tcp_clean_rtx_queue(struct so __u32 now = tcp_time_stamp; int acked = 0; __s32 seq_rtt = -1; - u32 pkts_acked = 0; void (*rtt_sample)(struct sock *sk, u32 usrtt) = icsk->icsk_ca_ops->rtt_sample; @@ -2098,7 +2097,7 @@ static int tcp_clean_rtx_queue(struct so */ if (!(scb->flags & TCPCB_FLAG_SYN)) { acked |= FLAG_DATA_ACKED; - ++pkts_acked; + *pkts_acked += 1; } else { acked |= FLAG_SYN_ACKED; tp->retrans_stamp = 0; @@ -2138,9 +2137,6 @@ static int tcp_clean_rtx_queue(struct so if (acked&FLAG_ACKED) { tcp_ack_update_rtt(sk, acked, seq_rtt); tcp_ack_packets_out(sk, tp); - - if (icsk->icsk_ca_ops->pkts_acked) - icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked); } #if FASTRETRANS_DEBUG > 0 @@ -2148,7 +2144,6 @@ static int tcp_clean_rtx_queue(struct so BUG_TRAP((int)tp->lost_out >= 0); BUG_TRAP((int)tp->retrans_out >= 0); if (!tp->packets_out && tp->rx_opt.sack_ok) { - const struct inet_connection_sock *icsk = inet_csk(sk); if (tp->lost_out) { printk(KERN_DEBUG "Leak l=%u %d\n", tp->lost_out, icsk->icsk_ca_state); @@ -2297,7 +2292,7 @@ static int tcp_ack(struct sock *sk, stru u32 prior_snd_una = tp->snd_una; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; - u32 prior_in_flight; + u32 prior_in_flight, pkts_acked; s32 seq_rtt; int prior_packets; @@ -2351,7 +2346,8 @@ static int tcp_ack(struct sock *sk, stru prior_in_flight = tcp_packets_in_flight(tp); /* See if we can take anything off of the retransmit queue. */ - flag |= tcp_clean_rtx_queue(sk, &seq_rtt); + pkts_acked = 0; + flag |= tcp_clean_rtx_queue(sk, &seq_rtt, &pkts_acked); if (tp->frto_counter) tcp_process_frto(sk, prior_snd_una); @@ -2359,11 +2355,11 @@ static int tcp_ack(struct sock *sk, stru if (tcp_ack_is_dubious(sk, flag)) { /* Advanve CWND, if state allows this. */ if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) - tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0); + tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, pkts_acked); tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); } else { if ((flag & FLAG_DATA_ACKED)) - tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1); + tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, pkts_acked); } if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP)) --- net-2.6.orig/net/ipv4/tcp_scalable.c +++ net-2.6/net/ipv4/tcp_scalable.c @@ -17,7 +17,7 @@ #define TCP_SCALABLE_MD_SCALE 3 static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt, - u32 in_flight, int flag) + u32 in_flight, u32 cnt) { struct tcp_sock *tp = tcp_sk(sk); --- net-2.6.orig/net/ipv4/tcp_vegas.c +++ net-2.6/net/ipv4/tcp_vegas.c @@ -163,13 +163,13 @@ static void tcp_vegas_cwnd_event(struct } static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int flag) + u32 seq_rtt, u32 in_flight, u32 cnt) { struct tcp_sock *tp = tcp_sk(sk); struct vegas *vegas = inet_csk_ca(sk); if (!vegas->doing_vegas_now) - return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, cnt); /* The key players are v_beg_snd_una and v_beg_snd_nxt. * --- net-2.6.orig/net/ipv4/tcp_westwood.c +++ net-2.6/net/ipv4/tcp_westwood.c @@ -70,18 +70,6 @@ static inline void westwood_filter(struc } /* - * @westwood_pkts_acked - * Called after processing group of packets. - * but all westwood needs is the last sample of srtt. - */ -static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt) -{ - struct westwood *w = inet_csk_ca(sk); - if (cnt > 0) - w->rtt = tcp_sk(sk)->srtt >> 3; -} - -/* * @westwood_update_window * It updates RTT evaluation window if it is the right moment to do * it. If so it calls filter for evaluating bandwidth. @@ -231,14 +219,29 @@ static void tcp_westwood_info(struct soc } +/* + * @westwood_cong_avoid + * Called after processing group of packets. + * but all westwood needs is the last sample of srtt. + */ +static void tcp_westwood_cong_avoid(struct sock *sk, u32 ack, u32 rtt, + u32 in_flight, u32 pkts_acked) +{ + if (pkts_acked > 0) { + struct westwood *w = inet_csk_ca(sk); + w->rtt = tcp_sk(sk)->srtt >> 3; + } + return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, pkts_acked); +} + + static struct tcp_congestion_ops tcp_westwood = { .init = tcp_westwood_init, .ssthresh = tcp_reno_ssthresh, - .cong_avoid = tcp_reno_cong_avoid, + .cong_avoid = tcp_westwood_cong_avoid, .min_cwnd = tcp_westwood_cwnd_min, .cwnd_event = tcp_westwood_event, .get_info = tcp_westwood_info, - .pkts_acked = tcp_westwood_pkts_acked, .owner = THIS_MODULE, .name = "westwood" -- Stephen Hemminger <[EMAIL PROTECTED]> OSDL http://developer.osdl.org/~shemminger - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html