>>> diff --git a/net/ipv4/inet_connection_sock.c 
>>> b/net/ipv4/inet_connection_sock.c
>>> index 13ec7c3a9c49..fd45ed2fd985 100644
>>> --- a/net/ipv4/inet_connection_sock.c
>>> +++ b/net/ipv4/inet_connection_sock.c
>>> @@ -749,7 +749,7 @@ static void reqsk_timer_handler(struct timer_list *t)
>>>         inet_csk_reqsk_queue_drop_and_put(sk_listener, req);
>>>  }
>>>
>>> -static void reqsk_queue_hash_req(struct request_sock *req,
>>> +static bool reqsk_queue_hash_req(struct request_sock *req,
>>>                                  unsigned long timeout)
>>>  {
>>>         req->num_retrans = 0;
>>> @@ -759,19 +759,27 @@ static void reqsk_queue_hash_req(struct request_sock 
>>> *req,
>>>         timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
>>>         mod_timer(&req->rsk_timer, jiffies + timeout);
>>>
>>> -       inet_ehash_insert(req_to_sk(req), NULL);
>>> +       if (!inet_ehash_insert(req_to_sk(req), NULL)) {
>>> +               if (timer_pending(&req->rsk_timer))
>>> +                       del_timer_sync(&req->rsk_timer);
>>> +               return false;
>>> +       }
>>>         /* before letting lookups find us, make sure all req fields
>>>          * are committed to memory and refcnt initialized.
>>>          */
>>>         smp_wmb();
>>>         refcount_set(&req->rsk_refcnt, 2 + 1);
>>> +       return true;
>>>  }
>>>
>>> -void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock 
>>> *req,
>>> +bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock 
>>> *req,
>>>                                    unsigned long timeout)
>>>  {
>>> -       reqsk_queue_hash_req(req, timeout);
>>> +       if (!reqsk_queue_hash_req(req, timeout))
>>> +               return false;
>>> +
>>>         inet_csk_reqsk_queue_added(sk);
>>> +       return true;
>>>  }
>>>  EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
>>>
>>> diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
>>> index c4503073248b..b6a1b5334565 100644
>>> --- a/net/ipv4/inet_hashtables.c
>>> +++ b/net/ipv4/inet_hashtables.c
>>> @@ -477,6 +477,7 @@ bool inet_ehash_insert(struct sock *sk, struct sock 
>>> *osk)
>>>         struct inet_ehash_bucket *head;
>>>         spinlock_t *lock;
>>>         bool ret = true;
>>> +       struct sock *reqsk = NULL;
>>>
>>>         WARN_ON_ONCE(!sk_unhashed(sk));
>>>
>>> @@ -486,6 +487,18 @@ bool inet_ehash_insert(struct sock *sk, struct sock 
>>> *osk)
>>>         lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
>>>
>>>         spin_lock(lock);
>>> +       if (!osk)
>>> +               reqsk = __inet_lookup_established(sock_net(sk), 
>>> &tcp_hashinfo,
>>> +                                                       sk->sk_daddr, 
>>> sk->sk_dport,
>>> +                                                       sk->sk_rcv_saddr, 
>>> sk->sk_num,
>>> +                                                       
>>> sk->sk_bound_dev_if, sk->sk_bound_dev_if);
>>> +       if (unlikely(reqsk)) {
>>
>> What reqsk would be a SYN_RECV socket, and not a ESTABLISH one (or a
>> TIME_WAIT ?)
> 
> It wouldn't be SYN_RECV,ESTABLISH or TIME_WAIT, just TCP_NEW_SYN_RECV.
> 
> When server receives the third handshake packet ACK, SYN_RECV sk will insert 
> to hash with osk(!= NULL).
> The looking up here just avoid to create two or more request sk with 
> TCP_NEW_SYN_RECV when receive syn packet.
> 


@Eric, for this issue I only want to check TCP_NEW_SYN_RECV sk, is it OK like 
below?
 +       if (!osk && sk->sk_state == TCP_NEW_SYN_RECV)
 +               reqsk = __inet_lookup_established(sock_net(sk), &tcp_hashinfo,
 +                                                       sk->sk_daddr, 
sk->sk_dport,
 +                                                       sk->sk_rcv_saddr, 
sk->sk_num,
 +                                                       sk->sk_bound_dev_if, 
sk->sk_bound_dev_if);
 +       if (unlikely(reqsk)) {




>>
>>> +               ret = false;
>>> +               reqsk_free(inet_reqsk(sk));
>>> +               spin_unlock(lock);
>>> +               return ret;
>>> +       }
>>> +
>>>         if (osk) {
>>
>> This test should have be a hint here : Sometime we _expect_ to have an
>> old socket (TIMEWAIT) and remove it
> I will check TIMEWAIT sk.
>>
>>
>>>                 WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
>>>                 ret = sk_nulls_del_node_init_rcu(osk);
>>> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
>>> index 38dfc308c0fb..358272394590 100644
>>> --- a/net/ipv4/tcp_input.c
>>> +++ b/net/ipv4/tcp_input.c
>>> @@ -6570,9 +6570,10 @@ int tcp_conn_request(struct request_sock_ops 
>>> *rsk_ops,
>>>                 sock_put(fastopen_sk);
>>>         } else {
>>>                 tcp_rsk(req)->tfo_listener = false;
>>> -               if (!want_cookie)
>>> -                       inet_csk_reqsk_queue_hash_add(sk, req,
>>> -                               tcp_timeout_init((struct sock *)req));
>>> +               if (!want_cookie && !inet_csk_reqsk_queue_hash_add(sk, req,
>>> +                                       tcp_timeout_init((struct sock 
>>> *)req)))
>>> +                       return 0;
>>> +
>>>                 af_ops->send_synack(sk, dst, &fl, req, &foc,
>>>                                     !want_cookie ? TCP_SYNACK_NORMAL :
>>>                                                    TCP_SYNACK_COOKIE);
>>> --
>>> 2.20.1
>>>
>>
>> I believe the proper fix is more complicated.
> yes, pretty complicated.
>>
>> Probably we need to move the locking in a less deeper location.


Currently, I find inet_ehash_insert is the most suitable location to do hash 
looking up,
because the sk's lock can be found from sk_hash, and there has already existed 
spin_lock code
In v1, I put the hash looking up in tcp_connect_request, there will be 
redundant lock to do looking up.


> 
>>
>> (Also a similar fix would be needed in IPv6)
> ok

I find IPv6 has the same call trace, so this fix seems good to IPv6?
tcp_v6_conn_request
        tcp_conn_request
                inet_csk_reqsk_queue_hash_add
                        reqsk_queue_hash_req
                                inet_ehash_insert
                                        
                

>>
>> Thanks.
>>
>> .
>>

Reply via email to